├── .idea ├── .gitignore ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── prox.iml └── vcs.xml ├── LICENSE ├── README.md ├── cfg_files ├── PROX.yaml ├── PROXD.yaml ├── RGB.yaml └── SMPLifyD.yaml ├── images └── teaser.jpg ├── prox ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ └── projection_utils.cpython-36.pyc ├── align_RGBD.py ├── camera.py ├── cmd_parser.py ├── data_parser.py ├── dist_chamfer.py ├── fit_single_frame.py ├── fitting.py ├── main.py ├── misc_utils.py ├── optimizers │ ├── __init__.py │ ├── lbfgs_ls.py │ └── optim_factory.py ├── prior.py ├── projection_utils.py ├── renderer.py └── viz │ ├── __init__.py │ ├── viz_fitting.py │ ├── viz_mosh.py │ └── viz_raw_data.py └── requirements.txt /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Default ignored files 3 | /workspace.xml -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/prox.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | License 2 | 3 | Software Copyright License for non-commercial scientific research purposes 4 | Please read carefully the following terms and conditions and any accompanying documentation before you download and/or use the SMPL-X/SMPLify-X/PROX model, data and software, (the "Model & Software"), including 3D meshes, blend weights, blend shapes, textures, software, scripts, and animations. By downloading and/or using the Model & Software (including downloading, cloning, installing, and any other use of this github repository), you acknowledge that you have read these terms and conditions, understand them, and agree to be bound by them. If you do not agree with these terms and conditions, you must not download and/or use the Model & Software. Any infringement of the terms of this agreement will automatically terminate your rights under this License 5 | 6 | Ownership / Licensees 7 | The Software and the associated materials has been developed at the 8 | 9 | Max Planck Institute for Intelligent Systems (hereinafter "MPI"). 10 | 11 | Any copyright or patent right is owned by and proprietary material of the 12 | 13 | Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (hereinafter “MPG”; MPI and MPG hereinafter collectively “Max-Planck”) 14 | 15 | hereinafter the “Licensor”. 16 | 17 | License Grant 18 | Licensor grants you (Licensee) personally a single-user, non-exclusive, non-transferable, free of charge right: 19 | 20 | To install the Model & Software on computers owned, leased or otherwise controlled by you and/or your organization; 21 | To use the Model & Software for the sole purpose of performing non-commercial scientific research, non-commercial education, or non-commercial artistic projects; 22 | Any other use, in particular any use for commercial, pornographic, military, or surveillance, purposes is prohibited. This includes, without limitation, incorporation in a commercial product, use in a commercial service, or production of other artifacts for commercial purposes. The Data & Software may not be used to create fake, libelous, misleading, or defamatory content of any kind excluding analyses in peer-reviewed scientific research. The Data & Software may not be reproduced, modified and/or made available in any form to any third party without Max-Planck’s prior written permission. 23 | 24 | The Data & Software may not be used for pornographic purposes or to generate pornographic material whether commercial or not. This license also prohibits the use of the Software to train methods/algorithms/neural networks/etc. for commercial, pornographic, military, surveillance, or defamatory use of any kind. By downloading the Data & Software, you agree not to reverse engineer it. 25 | 26 | No Distribution 27 | The Model & Software and the license herein granted shall not be copied, shared, distributed, re-sold, offered for re-sale, transferred or sub-licensed in whole or in part except that you may make one copy for archive purposes only. 28 | 29 | Disclaimer of Representations and Warranties 30 | You expressly acknowledge and agree that the Model & Software results from basic research, is provided “AS IS”, may contain errors, and that any use of the Model & Software is at your sole risk. LICENSOR MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE MODEL & SOFTWARE, NEITHER EXPRESS NOR IMPLIED, AND THE ABSENCE OF ANY LEGAL OR ACTUAL DEFECTS, WHETHER DISCOVERABLE OR NOT. Specifically, and not to limit the foregoing, licensor makes no representations or warranties (i) regarding the merchantability or fitness for a particular purpose of the Model & Software, (ii) that the use of the Model & Software will not infringe any patents, copyrights or other intellectual property rights of a third party, and (iii) that the use of the Model & Software will not cause any damage of any kind to you or a third party. 31 | 32 | Limitation of Liability 33 | Because this Model & Software License Agreement qualifies as a donation, according to Section 521 of the German Civil Code (Bürgerliches Gesetzbuch – BGB) Licensor as a donor is liable for intent and gross negligence only. If the Licensor fraudulently conceals a legal or material defect, they are obliged to compensate the Licensee for the resulting damage. 34 | Licensor shall be liable for loss of data only up to the amount of typical recovery costs which would have arisen had proper and regular data backup measures been taken. For the avoidance of doubt Licensor shall be liable in accordance with the German Product Liability Act in the event of product liability. The foregoing applies also to Licensor’s legal representatives or assistants in performance. Any further liability shall be excluded. 35 | Patent claims generated through the usage of the Model & Software cannot be directed towards the copyright holders. 36 | The Model & Software is provided in the state of development the licensor defines. If modified or extended by Licensee, the Licensor makes no claims about the fitness of the Model & Software and is not responsible for any problems such modifications cause. 37 | 38 | No Maintenance Services 39 | You understand and agree that Licensor is under no obligation to provide either maintenance services, update services, notices of latent defects, or corrections of defects with regard to the Model & Software. Licensor nevertheless reserves the right to update, modify, or discontinue the Model & Software at any time. 40 | 41 | Defects of the Model & Software must be notified in writing to the Licensor with a comprehensible description of the error symptoms. The notification of the defect should enable the reproduction of the error. The Licensee is encouraged to communicate any use, results, modification or publication. 42 | 43 | Publications using the Model & Software 44 | You acknowledge that the Model & Software is a valuable scientific resource and agree to appropriately reference the following paper in any publication making use of the Model & Software. 45 | 46 | Citation: 47 | 48 | 49 | @inproceedings{PROX:2019, 50 | title = {Resolving {3D} Human Pose Ambiguities with {3D} Scene Constraints}, 51 | author = {Hassan, Mohamed and Choutas, Vasileios and Tzionas, Dimitrios and Black, Michael J.}, 52 | booktitle = {International Conference on Computer Vision}, 53 | month = oct, 54 | year = {2019}, 55 | url = {https://prox.is.tue.mpg.de}, 56 | month_numeric = {10} 57 | } 58 | Commercial licensing opportunities 59 | For commercial uses of the Software, please send email to ps-license@tue.mpg.de 60 | 61 | This Agreement shall be governed by the laws of the Federal Republic of Germany except for the UN Sales Convention. 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Resolving 3D Human Pose Ambiguities with 3D Scene Constraints 2 | 3 | [[Project Page](https://prox.is.tue.mpg.de/)] 4 | [[Paper](https://ps.is.tuebingen.mpg.de/uploads_file/attachment/attachment/530/ICCV_2019___PROX.pdf)] 5 | 6 | ![PROX Examples](./images/teaser.jpg) 7 | 8 | 9 | ## License 10 | 11 | Software Copyright License for **non-commercial scientific research purposes**. 12 | Please read carefully the [terms and conditions](./LICENSE) and any accompanying documentation before you download and/or 13 | use the SMPL-X/SMPLify-X/PROX model, data and software, (the "Model & Software"), including 3D meshes, blend weights, 14 | blend shapes, textures, software, scripts, and animations. By downloading and/or using the Model & Software (including 15 | downloading, cloning, installing, and any other use of this github repository), you acknowledge that you have read these 16 | terms and conditions, understand them, and agree to be bound by them. If you do not agree with these terms and conditions, 17 | you must not download and/or use the Model & Software. Any infringement of the terms of this agreement will automatically 18 | terminate your rights under this [License](./LICENSE) 19 | 20 | ## Description 21 | 22 | This repository contains the fitting code used for the experiments in [ Resolving 3D Human Pose Ambiguities with 3D Scene Constraints](https://prox.is.tue.mpg.de). 23 | 24 | # PROX Dataset 25 | To run the fitting code, you would need to downlaod and extract at least one of the [PROX datasets](https://prox.is.tue.mpg.de/). The webpage provides the 2 PROX datasets: 26 | - `Quantitative PROX dataset`: Dataset of 180 static RGB-D frames with Ground Truth. 27 | The dataset captures static RGB-D frames of 1 subject in 1 scene and is described in Section 4.2 of the PROX paper. 28 | 29 | - `Qualitative PROX dataset`: Dataset of 100K RGB-D frames pseudo Ground Truth. 30 | The dataset captures dynamic RGB-D sequences of 20 subjects in 12 scenes and is described in Section 4.1.2 on the PROX paper. 31 | 32 | Both datasets have a very similar structure which is explained next. After extracting the dataset, you should have a directory with the following structure: 33 | ```bash 34 | prox_qualitative_dataset 35 | ├── body_segments 36 | ├── calibration 37 | ├── cam2world 38 | ├── fittings 39 | ├── keypoints 40 | ├── keypoints_overlay 41 | ├── recordings 42 | ├── scenes 43 | └── sdf 44 | ``` 45 | The content of each folder is explained below: 46 | - `body_segments` contains the contact body parts. 47 | - `calibration` contains the calibration information of the Color and IR cameras of the Kinect-One sensor. 48 | - `cam2world` contains the camera-to-world transformation matrices to spatially align the camera to the 3D scene scans. 49 | - `fittings` contains SMPL-X fittings parameters. 50 | - `keypoints` contains 2D keypoints in json files computed from [openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose). 51 | - `keypoints_overlay` contains 2D keypoints overlayed on the RGB images as generatd by [openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose). 52 | - `scenes` 3D scene meshes. 53 | - `sdf` Signed Distance Field of the 3D scenes. 54 | ## Recordings Documentation 55 | `recordings` contains the raw RGB-D recordings. The prox dataset come with 60 recordings, each recording folder name has the format of `SceneName_SubjectID_SequenceID`. 56 | Each recording folder includes the following sub_folders: 57 | ```bash 58 | SceneName_SubjectID_SequenceID 59 | ├── BodyIndex 60 | ├── BodyIndexColor 61 | ├── Color 62 | ├── Depth 63 | └── Skeleton 64 | ``` 65 | `BodyIndex`: Human masks computed by Kinect-One SDK (png, 512x424 px). 66 | `BodyIndexColor`: Human masks computed by running [DeepLabV3](https://pytorch.org/hub/pytorch_vision_deeplabv3_resnet101/) on the color fames. (png, 1920x1080 px). 67 | `Color`: RGB frames (jpg, 1920x1080 px). 68 | `Depth`: Depth frames (png, 512x424 px, ToF camera). 69 | `Infrared`: Infrared images (png, 512x424 px). 70 | `Skeleton`: Body skeletons captured by Kinect-One SDK (json). 71 | 72 | ### Visualization 73 | You can visualize the raw data by running the script: 74 | ```Shell 75 | python prox/viz/viz_raw_data.py RECORDING_DIR --show_color 1 --show_body_only 1 76 | ``` 77 | ### Color and Depth alignment 78 | The color and depth frame of the kinect are not spatially aligned and they don't have the same resolution. To project one frame to another, you can use the follwing command: 79 | ```Shell 80 | python prox/align_RGBD.py RECORDING_DIR --mode MODE 81 | ``` 82 | where mode can be `depth2color` or `color2depth`. 83 | ## Quantitative PROX dataset 84 | The Quantitative PROX dataset has the same structure as explained above in additon to one file `vicon2scene.json` which contains transformation matrix 85 | from Vicon coordintates system to the 3D scene coordintates system. The `fitting` folder of the quantitative dataset contains `SMPL-X` fittings computed using 86 | [[MoSH++](https://amass.is.tue.mpg.de/)] 87 | You can visualize `MoSH` results by running the following command: 88 | ```Shell 89 | python prox/viz/viz_mosh.py FITTING_DIR 90 | --base_dir ~/prox_dataset/quantitative --model_folder ~/prox_dataset/models/ --gender male 91 | ``` 92 | For example: 93 | ```Shell 94 | python prox/viz/viz_mosh.py ~/prox_dataset/quantitative/fittings/mosh/vicon_03301_01/ 95 | --base_dir ~/prox_dataset/quantitative --model_folder ~/prox_dataset/models/ --gender male 96 | ``` 97 | 98 | # Fitting 99 | To run the method you would first need to need to download and extract the [PROX dataset](https://prox.is.tue.mpg.de/) as explained in the previous section. Then run the following command to execute the code: 100 | ```Shell 101 | python prox/main.py --config cfg_files/CONF.yaml 102 | --recording_dir RECORDING_DIR 103 | --output_folder OUTPUT_FOLDER 104 | --visualize="True/False" 105 | --model_folder MODEL_FOLDER 106 | --vposer_ckpt VPOSER_FOLDER 107 | --part_segm_fn smplx_parts_segm.pkl 108 | ``` 109 | where the `RECORDING_DIR` is a path to one of the recordings from the PROX dataset. CONF is the fitting configuration, which code be: RGB, PROX, SMPLifyD or PROXD. For example: 110 | ```Shell 111 | python prox/main.py --config cfg_files/PROX.yaml 112 | --recording_dir ~/prox_dataset/recordings/N3OpenArea_00157_01 113 | --output_folder ~/PROX_results 114 | --vposer_ckpt ~/prox_dataset/models/vposer_v1_0/ 115 | --part_segm_fn ~/prox_dataset/models/smplx_parts_segm.pkl 116 | --model_folder ~/prox_dataset/models 117 | ``` 118 | This will generate several results: pkl files which include `SMPL-X` parameters, `SMPL-X` body meshes, rendering of the 119 | fitting results overlayed on the color images, rendering of the body in the 3D scene. 120 | 121 | You can also visualize the results in 3D by running the following script: 122 | ```Shell 123 | prox/viz/viz_fitting.py FITTING_DIR --base_dir BASE_DIR --model_folder ~/prox_dataset/models --gender GENDER 124 | ``` 125 | where the FITTING_DIR is a directory that contains the `SMPL-X` pkl parameters. 126 | ## PROXD Fittings 127 | We provide PROXD fittings for the dataset on the [website](https://prox.is.tue.mpg.de/) as well as preview videos. We provide the fittings as `.pkl` files which contains the `SMPL-X` parameters. For more details on `SMPL-X` parameterization and formulation, check this repository [SMPL-X](https://github.com/vchoutas/smplx). 128 | Similarly; you can visualize the results in 3D by running the following script: 129 | ```Shell 130 | prox/viz/viz_fitting.py FITTING_DIR --base_dir BASE_DIR --model_folder MODEL_FOLDER 131 | ``` 132 | You can also create meshes from the `.pkl` files and render the results using: 133 | ```Shell 134 | prox/renderer.pkl FITTING_DIR --base_dir BASE_DIR --model_folder MODEL_FOLDER 135 | ``` 136 | 137 | ## Note 138 | The master branch of this repository depends on the released versions of [SMPLify-X](https://github.com/vchoutas/smplify-x) and [Vposer](https://github.com/nghorbani/human_body_prior) on github. These versions differ from our internal versions and hence the produced results might differ from what is reported in the paper. 139 | We provide another branch `internal_vposer` which has a reimplemnetation of the internal human_body_prior. If you want to replicated the results reported in Table 1 in the paper; then please checkout this version by: 140 | ```Shell 141 | git checkout internal_vposer 142 | ``` 143 | Then download the vPoser Weights from our [website](https://prox.is.tue.mpg.de/) and use it for fitting: 144 | 145 | ```Shell 146 | python prox/main.py --config cfg_files/CONF.yaml 147 | --recording_dir RECORDING_DIR 148 | --vposer_ckpt ~/vposerDecoderWeights.npz 149 | --output_folder OUTPUT_FOLDER 150 | --visualize="True/False" 151 | --model_folder MODEL_FOLDER 152 | --part_segm_fn smplx_parts_segm.pkl 153 | ``` 154 | ## Dependencies 155 | Install requirements: 156 | ```Shell 157 | pip install -r requirements.txt 158 | ``` 159 | Then follow the installation instructions for each of the following before using the 160 | fitting code. 161 | 162 | 1. [Mesh Packages](https://github.com/MPI-IS/mesh) 163 | 2. [Chamfer Distance](https://github.com/ThibaultGROUEIX/chamfer_pytorch/tree/719b0f1ca5ba370616cb837c03ab88d9a88173ff) 164 | 3. [PyTorch Mesh self-intersection](https://github.com/vchoutas/torch-mesh-isect) for interpenetration penalty 165 | * Download the per-triangle part segmentation: [smplx_parts_segm.pkl](https://owncloud.tuebingen.mpg.de/index.php/s/MWnr8Kso4K8T8at) 166 | 167 | 168 | The code has been tested with Python 3.6, CUDA 10.0, CuDNN 7.3 and PyTorch 1.0 on Ubuntu 18.04. 169 | 170 | ## Citation 171 | 172 | If you find this Model & Software useful in your research we would kindly ask you to cite: 173 | 174 | ``` 175 | @inproceedings{PROX:2019, 176 | title = {Resolving {3D} Human Pose Ambiguities with {3D} Scene Constraints}, 177 | author = {Hassan, Mohamed and Choutas, Vasileios and Tzionas, Dimitrios and Black, Michael J.}, 178 | booktitle = {International Conference on Computer Vision}, 179 | month = oct, 180 | year = {2019}, 181 | url = {https://prox.is.tue.mpg.de}, 182 | month_numeric = {10} 183 | } 184 | ``` 185 | 186 | ## Acknowledgments 187 | 188 | The code is based on the [SMPLify-X](https://github.com/vchoutas/smplify-x) code. The Chamfer Distance code is taken from [3d-CODED](https://github.com/ThibaultGROUEIX/3D-CODED). We thank [Jean-Claude Passy](https://github.com/jcpassy) for managing the [Mesh Packages](https://github.com/MPI-IS/mesh) and porting it to Python 3 and . 189 | 190 | ## Contact 191 | For questions, please contact [prox@tue.mpg.de](mailto:prox@tue.mpg.de). 192 | 193 | For commercial licensing (and all related questions for business applications), please contact [ps-licensing@tue.mpg.de](mailto:ps-licensing@tue.mpg.de). 194 | -------------------------------------------------------------------------------- /cfg_files/PROX.yaml: -------------------------------------------------------------------------------- 1 | output_folder: '~/PROX' 2 | model_folder: 'models' 3 | vposer_ckpt: "models/vposer_v1_0/" 4 | part_segm_fn: "models/smplx_parts_segm.pkl" 5 | 6 | focal_length_x: 1060.5317 7 | focal_length_y: 1060.3856 8 | camera_center_x: 951.2999 9 | camera_center_y: 536.7704 10 | flip: True 11 | camera_mode: 'fixed' 12 | ################### 13 | contact: True 14 | contact_angle: 45 15 | rho_contact: 5e-2 16 | contact_loss_weights: 17 | - 0.0 18 | - 0.0 19 | - 0.0 20 | - 0.0 21 | - 0.0 22 | - 0.0 23 | - 1e4 24 | load_scene: True 25 | ################### 26 | sdf_penetration: True 27 | sdf_penetration_weights: 28 | - 0.0 29 | - 0.0 30 | - 0.0 31 | - 0.0 32 | - 0.0 33 | - 1e2 34 | - 1.0 35 | 36 | trans_opt_stages: 37 | - 3 38 | - 4 39 | - 5 40 | - 6 41 | 42 | shape_weights: 43 | - 1.0e2 44 | - 5.0e1 45 | - 1.0e1 46 | - 0.5e1 47 | - 0.5e1 48 | - 0.5e1 49 | - 0.5e1 50 | 51 | data_weights: 52 | - 1 53 | - 1 54 | - 1 55 | - 1 56 | - 1 57 | - 1 58 | - 1 59 | 60 | ################### 61 | dataset: 'openpose' 62 | joints_to_ign: 63 | - 1 64 | - 9 65 | - 12 66 | prior_folder: 'priors' 67 | result_folder: 'results' 68 | gender: "male" 69 | model_type: 'smplx' 70 | # Flag for using the GPU 71 | use_cuda: True 72 | float_dtype: "float32" 73 | use_joints_conf: True 74 | use_pca: True 75 | use_hands: True 76 | use_face: True 77 | flat_hand_mean: False 78 | # The number of mixture components for the pose prior 79 | num_gaussians: 8 80 | body_prior_type: 'l2' 81 | left_hand_prior_type: l2 82 | right_hand_prior_type: l2 83 | jaw_prior_type: 'l2' 84 | num_pca_comps: 12 85 | # The constant for the robust loss function 86 | rho: 100 87 | interpenetration: True 88 | df_cone_height: 0.0001 89 | # Upper arms and Spine 2 90 | # Neck and jaw 91 | ign_part_pairs: ["9,16", "9,17", "6,16", "6,17", "1,2", "12,22"] 92 | penalize_outside: True 93 | max_collisions: 128 94 | # LBFGS with line search 95 | optim_type: 'lbfgsls' 96 | ftol: 1e-9 97 | gtol: 1e-9 98 | lr: 1.0 99 | # Max number of optimization iterations 100 | maxiters: 30 101 | interactive: True 102 | visualize: False 103 | # Relative change in loss value 104 | body_pose_prior_weights: 105 | - 4.04e2 106 | - 4.04e2 107 | - 57.4e0 108 | - 4.78e0 109 | - 4.78e0 110 | - 4.78e0 111 | - 4.78e0 112 | coll_loss_weights: 113 | - 0.0 114 | - 0.0 115 | - 0.0 116 | - 0.01 117 | - 0.1 118 | - 0.0 119 | - 0.0 120 | expr_weights: 121 | - 1.0e2 122 | - 5.0e1 123 | - 1.0e1 124 | - 0.5e1 125 | - 0.5e1 126 | - 0.5e1 127 | - 0.5e1 128 | hand_pose_prior_weights: 129 | - 4.04e2 130 | - 4.04e2 131 | - 57.4 132 | - 4.78 133 | - 4.78 134 | - 4.78 135 | - 4.78 136 | jaw_pose_prior_weights: 137 | # First phase 138 | - 4.04e03,4.04e04,4.04e04 139 | # Second phase 140 | - 4.04e03,4.04e04,4.04e04 141 | # Third phase 142 | - 574,5740,5740 143 | # Fourth phase 144 | - 47.8,478,478 145 | # Fifth phase 146 | - 47.8,478,478 147 | # Sixth phase 148 | - 47.8,478,478 149 | # Seventh phase 150 | - 47.8,478,478 151 | hand_joints_weights: 152 | - 0.0 153 | - 0.0 154 | - 0.0 155 | - 0.1 156 | - 2.0 157 | - 2.0 158 | - 2.0 159 | face_joints_weights: 160 | - 0.0 161 | - 0.0 162 | - 0.0 163 | - 0.0 164 | - 2.0 165 | - 2.0 166 | - 2.0 167 | 168 | use_vposer: True 169 | -------------------------------------------------------------------------------- /cfg_files/PROXD.yaml: -------------------------------------------------------------------------------- 1 | focal_length_x: 1060.53 2 | focal_length_y: 1060.38 3 | camera_center_x: 951.30 4 | camera_center_y: 536.77 5 | flip: True 6 | camera_mode: 'fixed' 7 | output_folder: '/tmp/PROXD' 8 | ################### 9 | contact: True 10 | contact_angle: 45 11 | rho_contact: 5e-2 12 | contact_loss_weights: 13 | - 0.0 14 | - 0.0 15 | - 0.0 16 | - 0.0 17 | - 0.0 18 | - 0.0 19 | - 1e4 20 | load_scene: True 21 | ################### 22 | sdf_penetration: True 23 | sdf_penetration_weights: 24 | - 0.0 25 | - 0.0 26 | - 0.0 27 | - 0.0 28 | - 0.0 29 | - 1e2 30 | - 1.0 31 | 32 | trans_opt_stages: 33 | - 2 34 | - 3 35 | - 4 36 | - 5 37 | - 6 38 | mask_on_color: True 39 | # Depth 40 | init_mode: 'scan' 41 | m2s: True 42 | rho_m2s: 5e-1 43 | m2s_weights: 44 | - 1.0 45 | - 1.0 46 | - 1.0 47 | - 1.0 48 | - 1.0 49 | - 1.0 50 | - 1.0 51 | 52 | s2m: True 53 | rho_s2m: 2e-1 54 | s2m_weights: 55 | - 1.0 56 | - 1.0 57 | - 1e2 58 | - 5e2 59 | - 5e2 60 | - 5e2 61 | - 5e2 62 | 63 | read_depth: True 64 | read_mask: True 65 | 66 | shape_weights: 67 | - 1.0e2 68 | - 5.0e1 69 | - 1.0e1 70 | - 0.5e1 71 | - 0.5e1 72 | - 0.5e1 73 | - 0.5e1 74 | data_weights: 75 | - 1 76 | - 1 77 | - 1 78 | - 1 79 | - 1 80 | - 1 81 | - 1 82 | 83 | ################### 84 | dataset: 'openpose' 85 | joints_to_ign: 86 | - 1 87 | - 9 88 | - 12 89 | prior_folder: 'priors' 90 | model_folder: 'models' 91 | result_folder: 'results' 92 | gender: "male" 93 | model_type: 'smplx' 94 | # Flag for using the GPU 95 | use_cuda: True 96 | float_dtype: "float32" 97 | use_joints_conf: True 98 | use_pca: True 99 | use_hands: True 100 | use_face: True 101 | flat_hand_mean: False 102 | # The number of mixture components for the pose prior 103 | num_gaussians: 8 104 | body_prior_type: 'l2' 105 | left_hand_prior_type: l2 106 | right_hand_prior_type: l2 107 | jaw_prior_type: 'l2' 108 | num_pca_comps: 12 109 | # The constant for the robust loss function 110 | rho: 100 111 | interpenetration: True 112 | df_cone_height: 0.0001 113 | # Upper arms and Spine 2 114 | # Neck and jaw 115 | ign_part_pairs: ["9,16", "9,17", "6,16", "6,17", "1,2", "12,22"] 116 | penalize_outside: True 117 | max_collisions: 128 118 | # LBFGS with line search 119 | optim_type: 'lbfgsls' 120 | ftol: 1e-9 121 | gtol: 1e-9 122 | lr: 1.0 123 | # Max number of optimization iterations 124 | maxiters: 30 125 | interactive: True 126 | visualize: False 127 | # Relative change in loss value 128 | body_pose_prior_weights: 129 | - 4.04e2 130 | - 4.04e2 131 | - 57.4e0 132 | - 4.78e0 133 | - 4.78e0 134 | - 4.78e0 135 | - 4.78e0 136 | coll_loss_weights: 137 | - 0.0 138 | - 0.0 139 | - 0.0 140 | - 0.01 141 | - 0.01 142 | - 0.01 143 | - 0.01 144 | expr_weights: 145 | - 1.0e2 146 | - 5.0e1 147 | - 1.0e1 148 | - 0.5e1 149 | - 0.5e1 150 | - 0.5e1 151 | - 0.5e1 152 | hand_pose_prior_weights: 153 | - 4.04e2 154 | - 4.04e2 155 | - 57.4 156 | - 4.78 157 | - 4.78 158 | - 4.78 159 | - 4.78 160 | jaw_pose_prior_weights: 161 | # First phase 162 | - 4.04e03,4.04e04,4.04e04 163 | # Second phase 164 | - 4.04e03,4.04e04,4.04e04 165 | # Third phase 166 | - 574,5740,5740 167 | # Fourth phase 168 | - 47.8,478,478 169 | # Fifth phase 170 | - 47.8,478,478 171 | # Sixth phase 172 | - 47.8,478,478 173 | # Seventh phase 174 | - 47.8,478,478 175 | hand_joints_weights: 176 | - 0.0 177 | - 0.0 178 | - 0.0 179 | - 0.1 180 | - 2.0 181 | - 2.0 182 | - 2.0 183 | face_joints_weights: 184 | - 0.0 185 | - 0.0 186 | - 0.0 187 | - 0.0 188 | - 2.0 189 | - 2.0 190 | - 2.0 191 | 192 | use_vposer: True 193 | vposer_ckpt: "models/vposer_v1_0/" 194 | part_segm_fn: "models/smplx_parts_segm.pkl" 195 | -------------------------------------------------------------------------------- /cfg_files/RGB.yaml: -------------------------------------------------------------------------------- 1 | output_folder: '~/RGB' 2 | model_folder: 'models' 3 | vposer_ckpt: "models/vposer_v1_0/" 4 | part_segm_fn: "models/smplx_parts_segm.pkl" 5 | 6 | focal_length_x: 1060.5317 7 | focal_length_y: 1060.3856 8 | camera_center_x: 951.2999 9 | camera_center_y: 536.7704 10 | flip: True 11 | camera_mode: 'fixed' 12 | ################### 13 | shape_weights: 14 | - 1.0e2 15 | - 5.0e1 16 | - 1.0e1 17 | - 0.5e1 18 | - 0.5e1 19 | 20 | data_weights: 21 | - 1 22 | - 1 23 | - 1 24 | - 1 25 | - 1 26 | 27 | ################### 28 | dataset: 'openpose' 29 | joints_to_ign: 30 | - 1 31 | - 9 32 | - 12 33 | prior_folder: 'priors' 34 | result_folder: 'results' 35 | gender: "male" 36 | model_type: 'smplx' 37 | # Flag for using the GPU 38 | use_cuda: True 39 | float_dtype: "float32" 40 | use_joints_conf: True 41 | use_pca: True 42 | use_hands: True 43 | use_face: True 44 | flat_hand_mean: False 45 | # The number of mixture components for the pose prior 46 | num_gaussians: 8 47 | body_prior_type: 'l2' 48 | left_hand_prior_type: l2 49 | right_hand_prior_type: l2 50 | jaw_prior_type: 'l2' 51 | num_pca_comps: 12 52 | # The constant for the robust loss function 53 | rho: 100 54 | interpenetration: True 55 | df_cone_height: 0.0001 56 | # Upper arms and Spine 2 57 | # Neck and jaw 58 | ign_part_pairs: ["9,16", "9,17", "6,16", "6,17", "1,2", "12,22"] 59 | penalize_outside: True 60 | max_collisions: 128 61 | # LBFGS with line search 62 | optim_type: 'lbfgsls' 63 | ftol: 1e-9 64 | gtol: 1e-9 65 | lr: 1.0 66 | # Max number of optimization iterations 67 | maxiters: 30 68 | interactive: True 69 | visualize: False 70 | # Relative change in loss value 71 | body_pose_prior_weights: 72 | - 4.04e2 73 | - 4.04e2 74 | - 57.4e0 75 | - 4.78e0 76 | - 4.78e0 77 | coll_loss_weights: 78 | - 0.0 79 | - 0.0 80 | - 0.0 81 | - 0.01 82 | - 1.0 83 | expr_weights: 84 | - 1.0e2 85 | - 5.0e1 86 | - 1.0e1 87 | - 0.5e1 88 | - 0.5e1 89 | hand_pose_prior_weights: 90 | - 4.04e2 91 | - 4.04e2 92 | - 57.4 93 | - 4.78 94 | - 4.78 95 | jaw_pose_prior_weights: 96 | # First phase 97 | - 4.04e03,4.04e04,4.04e04 98 | # Second phase 99 | - 4.04e03,4.04e04,4.04e04 100 | # Third phase 101 | - 574,5740,5740 102 | # Fourth phase 103 | - 47.8,478,478 104 | # Fifth phase 105 | - 47.8,478,478 106 | hand_joints_weights: 107 | - 0.0 108 | - 0.0 109 | - 0.0 110 | - 0.1 111 | - 2.0 112 | face_joints_weights: 113 | - 0.0 114 | - 0.0 115 | - 0.0 116 | - 0.0 117 | - 2.0 118 | use_vposer: True 119 | -------------------------------------------------------------------------------- /cfg_files/SMPLifyD.yaml: -------------------------------------------------------------------------------- 1 | output_folder: '~/SMPLifyD' 2 | model_folder: 'models' 3 | vposer_ckpt: "models/vposer_v1_0/" 4 | part_segm_fn: "models/smplx_parts_segm.pkl" 5 | 6 | focal_length_x: 1060.5317 7 | focal_length_y: 1060.3856 8 | camera_center_x: 951.2999 9 | camera_center_y: 536.7704 10 | flip: True 11 | camera_mode: 'fixed' 12 | 13 | read_depth: True 14 | read_mask: True 15 | mask_on_color: True 16 | # Depth 17 | init_mode: 'scan' 18 | m2s: True 19 | rho_m2s: 5e-1 20 | m2s_weights: 21 | - 1.0 22 | - 1.0 23 | - 1.0 24 | - 1.0 25 | - 1.0 26 | 27 | s2m: True 28 | rho_s2m: 2e-1 29 | s2m_weights: 30 | - 1.0 31 | - 1.0 32 | - 1e2 33 | - 5e2 34 | - 5e2 35 | 36 | trans_opt_stages: 37 | - 2 38 | - 3 39 | - 4 40 | 41 | shape_weights: 42 | - 1.0e2 43 | - 5.0e1 44 | - 1.0e1 45 | - 0.5e1 46 | - 0.5e1 47 | 48 | data_weights: 49 | - 1 50 | - 1 51 | - 1 52 | - 1 53 | - 1 54 | 55 | ################### 56 | dataset: 'openpose' 57 | joints_to_ign: 58 | - 1 59 | - 9 60 | - 12 61 | prior_folder: 'priors' 62 | result_folder: 'results' 63 | gender: "male" 64 | model_type: 'smplx' 65 | # Flag for using the GPU 66 | use_cuda: True 67 | float_dtype: "float32" 68 | use_joints_conf: True 69 | use_pca: True 70 | use_hands: True 71 | use_face: True 72 | flat_hand_mean: False 73 | # The number of mixture components for the pose prior 74 | num_gaussians: 8 75 | body_prior_type: 'l2' 76 | left_hand_prior_type: l2 77 | right_hand_prior_type: l2 78 | jaw_prior_type: 'l2' 79 | num_pca_comps: 12 80 | # The constant for the robust loss function 81 | rho: 100 82 | interpenetration: True 83 | df_cone_height: 0.0001 84 | # Upper arms and Spine 2 85 | # Neck and jaw 86 | ign_part_pairs: ["9,16", "9,17", "6,16", "6,17", "1,2", "12,22"] 87 | penalize_outside: True 88 | max_collisions: 128 89 | # LBFGS with line search 90 | optim_type: 'lbfgsls' 91 | ftol: 1e-9 92 | gtol: 1e-9 93 | lr: 1.0 94 | # Max number of optimization iterations 95 | maxiters: 30 96 | interactive: True 97 | visualize: False 98 | # Relative change in loss value 99 | body_pose_prior_weights: 100 | - 4.04e2 101 | - 4.04e2 102 | - 57.4e0 103 | - 4.78e0 104 | - 4.78e0 105 | coll_loss_weights: 106 | - 0.0 107 | - 0.0 108 | - 0.0 109 | - 0.01 110 | - 1.0 111 | expr_weights: 112 | - 1.0e2 113 | - 5.0e1 114 | - 1.0e1 115 | - 0.5e1 116 | - 0.5e1 117 | hand_pose_prior_weights: 118 | - 4.04e2 119 | - 4.04e2 120 | - 57.4 121 | - 4.78 122 | - 4.78 123 | jaw_pose_prior_weights: 124 | # First phase 125 | - 4.04e03,4.04e04,4.04e04 126 | # Second phase 127 | - 4.04e03,4.04e04,4.04e04 128 | # Third phase 129 | - 574,5740,5740 130 | # Fourth phase 131 | - 47.8,478,478 132 | # Fifth phase 133 | - 47.8,478,478 134 | 135 | 136 | hand_joints_weights: 137 | - 0.0 138 | - 0.0 139 | - 0.0 140 | - 0.1 141 | - 2.0 142 | face_joints_weights: 143 | - 0.0 144 | - 0.0 145 | - 0.0 146 | - 0.0 147 | - 2.0 148 | 149 | use_vposer: True 150 | -------------------------------------------------------------------------------- /images/teaser.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mohamedhassanmus/prox/4bc254acb9d03e1a8fda0be76d762b02d17ace25/images/teaser.jpg -------------------------------------------------------------------------------- /prox/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems and the Max Planck Institute for Biological 14 | # Cybernetics. All rights reserved. 15 | # 16 | # Contact: ps-license@tuebingen.mpg.de 17 | 18 | -------------------------------------------------------------------------------- /prox/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mohamedhassanmus/prox/4bc254acb9d03e1a8fda0be76d762b02d17ace25/prox/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /prox/__pycache__/projection_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mohamedhassanmus/prox/4bc254acb9d03e1a8fda0be76d762b02d17ace25/prox/__pycache__/projection_utils.cpython-36.pyc -------------------------------------------------------------------------------- /prox/align_RGBD.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems and the Max Planck Institute for Biological 14 | # Cybernetics. All rights reserved. 15 | # 16 | # Contact: ps-license@tuebingen.mpg.de 17 | 18 | import os 19 | import os.path as osp 20 | import cv2 21 | import argparse 22 | from prox.projection_utils import Projection 23 | 24 | def main(args): 25 | recording_name = osp.basename(args.recording_dir) 26 | color_dir = os.path.join(args.recording_dir, 'Color') 27 | depth_dir = os.path.join(args.recording_dir,'Depth') 28 | scene_name = recording_name.split("_")[0] 29 | base_dir = os.path.abspath(osp.join(args.recording_dir, os.pardir, os.pardir)) 30 | calib_dir = osp.join(base_dir, 'calibration') 31 | 32 | projection = Projection(calib_dir) 33 | 34 | if args.mode == 'color2depth': 35 | color_aligned_dir = osp.join(args.recording_dir, 'Color_aligned') 36 | if not osp.exists(color_aligned_dir): 37 | os.mkdir(color_aligned_dir) 38 | else: 39 | depth_aligned_dir = osp.join(args.recording_dir, 'Depth_aligned') 40 | if not osp.exists(depth_aligned_dir): 41 | os.mkdir(depth_aligned_dir) 42 | 43 | for img_name in sorted(os.listdir(color_dir)): 44 | img_name = osp.splitext(img_name)[0] 45 | print('aligning frame {}'.format(img_name)) 46 | 47 | color_img = cv2.imread(os.path.join(color_dir, img_name + '.jpg')) 48 | 49 | depth_img = cv2.imread(os.path.join(depth_dir, img_name + '.png'), -1).astype(float) 50 | depth_raw = depth_img.copy() 51 | depth_img /= 8.0 52 | depth_img /= 1000.0 53 | 54 | color_img = cv2.flip(color_img, 1) 55 | depth_img = cv2.flip(depth_img, 1) 56 | depth_raw = cv2.flip(depth_raw, 1) 57 | 58 | if args.mode == 'color2depth': 59 | color_aligned = projection.align_color2depth(depth_img, color_img) 60 | cv2.imwrite(osp.join(color_aligned_dir, img_name + '.jpg'), color_aligned) 61 | 62 | else: 63 | depth_aligned = projection.align_depth2color(depth_img, depth_raw) 64 | cv2.imwrite(osp.join(depth_aligned_dir, img_name + '.png'), depth_aligned) 65 | 66 | if __name__ == '__main__': 67 | parser = argparse.ArgumentParser() 68 | parser.add_argument('recording_dir', type=str, default=os.getcwd(), 69 | help='path to recording') 70 | parser.add_argument('--mode', default='color2depth', type=str, 71 | choices=['color2depth', 'depth2color'], 72 | help='') 73 | 74 | 75 | args = parser.parse_args() 76 | main(args) 77 | -------------------------------------------------------------------------------- /prox/camera.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems and the Max Planck Institute for Biological 14 | # Cybernetics. All rights reserved. 15 | # 16 | # Contact: ps-license@tuebingen.mpg.de 17 | 18 | from __future__ import absolute_import 19 | from __future__ import print_function 20 | from __future__ import division 21 | 22 | from collections import namedtuple 23 | 24 | import torch 25 | import torch.nn as nn 26 | 27 | from smplx.lbs import transform_mat 28 | 29 | 30 | PerspParams = namedtuple('ModelOutput', 31 | ['rotation', 'translation', 'center', 32 | 'focal_length']) 33 | 34 | 35 | def create_camera(camera_type='persp', **kwargs): 36 | if camera_type.lower() == 'persp': 37 | return PerspectiveCamera(**kwargs) 38 | else: 39 | raise ValueError('Uknown camera type: {}'.format(camera_type)) 40 | 41 | 42 | class PerspectiveCamera(nn.Module): 43 | 44 | FOCAL_LENGTH = 5000 45 | 46 | def __init__(self, rotation=None, translation=None, 47 | focal_length_x=None, focal_length_y=None, 48 | batch_size=1, 49 | center=None, dtype=torch.float32, **kwargs): 50 | super(PerspectiveCamera, self).__init__() 51 | self.batch_size = batch_size 52 | self.dtype = dtype 53 | # Make a buffer so that PyTorch does not complain when creating 54 | # the camera matrix 55 | self.register_buffer('zero', 56 | torch.zeros([batch_size], dtype=dtype)) 57 | 58 | if focal_length_x is None or type(focal_length_x) == float: 59 | focal_length_x = torch.full( 60 | [batch_size], 61 | self.FOCAL_LENGTH if focal_length_x is None else 62 | focal_length_x, 63 | dtype=dtype) 64 | 65 | if focal_length_y is None or type(focal_length_y) == float: 66 | focal_length_y = torch.full( 67 | [batch_size], 68 | self.FOCAL_LENGTH if focal_length_y is None else 69 | focal_length_y, 70 | dtype=dtype) 71 | 72 | self.register_buffer('focal_length_x', focal_length_x) 73 | self.register_buffer('focal_length_y', focal_length_y) 74 | 75 | if center is None: 76 | center = torch.zeros([batch_size, 2], dtype=dtype) 77 | self.register_buffer('center', center) 78 | 79 | if rotation is None: 80 | rotation = torch.eye( 81 | 3, dtype=dtype).unsqueeze(dim=0).repeat(batch_size, 1, 1) 82 | 83 | rotation = nn.Parameter(rotation, requires_grad=True) 84 | self.register_parameter('rotation', rotation) 85 | 86 | if translation is None: 87 | translation = torch.zeros([batch_size, 3], dtype=dtype) 88 | 89 | translation = nn.Parameter(translation, 90 | requires_grad=True) 91 | self.register_parameter('translation', translation) 92 | 93 | def forward(self, points): 94 | device = points.device 95 | 96 | with torch.no_grad(): 97 | camera_mat = torch.zeros([self.batch_size, 2, 2], 98 | dtype=self.dtype, device=points.device) 99 | camera_mat[:, 0, 0] = self.focal_length_x 100 | camera_mat[:, 1, 1] = self.focal_length_y 101 | 102 | camera_transform = transform_mat(self.rotation, 103 | self.translation.unsqueeze(dim=-1)) 104 | homog_coord = torch.ones(list(points.shape)[:-1] + [1], 105 | dtype=points.dtype, 106 | device=device) 107 | # Convert the points to homogeneous coordinates 108 | points_h = torch.cat([points, homog_coord], dim=-1) 109 | 110 | projected_points = torch.einsum('bki,bji->bjk', 111 | [camera_transform, points_h]) 112 | 113 | img_points = torch.div(projected_points[:, :, :2], 114 | projected_points[:, :, 2].unsqueeze(dim=-1)) 115 | img_points = torch.einsum('bki,bji->bjk', [camera_mat, img_points]) \ 116 | + self.center.unsqueeze(dim=1) 117 | return img_points 118 | -------------------------------------------------------------------------------- /prox/cmd_parser.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems and the Max Planck Institute for Biological 14 | # Cybernetics. All rights reserved. 15 | # 16 | # Contact: ps-license@tuebingen.mpg.de 17 | 18 | from __future__ import absolute_import 19 | from __future__ import print_function 20 | from __future__ import division 21 | 22 | import sys 23 | import os 24 | 25 | import configargparse 26 | 27 | 28 | def parse_config(argv=None): 29 | arg_formatter = configargparse.ArgumentDefaultsHelpFormatter 30 | 31 | cfg_parser = configargparse.YAMLConfigFileParser 32 | description = 'PyTorch implementation of SMPLifyX' 33 | parser = configargparse.ArgParser(formatter_class=arg_formatter, 34 | config_file_parser_class=cfg_parser, 35 | description=description, 36 | prog='SMPLifyX') 37 | 38 | parser.add_argument('--recording_dir', 39 | default=os.getcwd(), 40 | help='The directory that contains the data.') 41 | parser.add_argument('--max_persons', type=int, default=3, 42 | help='The maximum number of persons to process') 43 | parser.add_argument('-c', '--config', 44 | required=True, is_config_file=True, 45 | help='config file path') 46 | parser.add_argument('--loss_type', default='smplify', type=str, 47 | help='The type of loss to use') 48 | parser.add_argument('--interactive', 49 | type=lambda arg: arg.lower() == 'true', 50 | default=False, 51 | help='Print info messages during the process') 52 | parser.add_argument('--save_meshes', 53 | type=lambda arg: arg.lower() == 'true', 54 | default=True, 55 | help='Save final output meshes') 56 | parser.add_argument('--visualize', 57 | type=lambda arg: arg.lower() == 'true', 58 | default=False, 59 | help='Display plots while running the optimization') 60 | parser.add_argument('--degrees', type=float, default=[0, 90, 180, 270], 61 | help='Degrees of rotation for rendering the final' + 62 | ' result') 63 | parser.add_argument('--use_cuda', 64 | type=lambda arg: arg.lower() == 'true', 65 | default=True, 66 | help='Use CUDA for the computations') 67 | parser.add_argument('--dataset', default='hands_cmu_gt', type=str, 68 | help='The name of the dataset that will be used') 69 | parser.add_argument('--joints_to_ign', default=-1, type=int, 70 | nargs='*', 71 | help='Indices of joints to be ignored') 72 | parser.add_argument('--output_folder', 73 | default='output', 74 | type=str, 75 | help='The folder where the output is stored') 76 | parser.add_argument('--img_folder', type=str, default='Color', 77 | help='The folder where the images are stored') 78 | parser.add_argument('--summary_folder', type=str, default='summaries', 79 | help='Where to store the TensorBoard summaries') 80 | parser.add_argument('--result_folder', type=str, default='results', 81 | help='The folder with the pkls of the output' + 82 | ' parameters') 83 | parser.add_argument('--mesh_folder', type=str, default='meshes', 84 | help='The folder where the output meshes are stored') 85 | parser.add_argument('--gender_lbl_type', default='none', 86 | choices=['none', 'gt', 'pd'], type=str, 87 | help='The type of gender label to use') 88 | parser.add_argument('--gender', type=str, 89 | default='neutral', 90 | choices=['neutral', 'male', 'female'], 91 | help='Use gender neutral or gender specific SMPL' + 92 | 'model') 93 | parser.add_argument('--float_dtype', type=str, default='float32', 94 | help='The types of floats used') 95 | parser.add_argument('--model_type', default='smpl', type=str, 96 | choices=['smpl', 'smplh', 'smplx'], 97 | help='The type of the model that we will fit to the' + 98 | ' data.') 99 | parser.add_argument('--camera_type', type=str, default='persp', 100 | choices=['persp'], 101 | help='The type of camera used') 102 | parser.add_argument('--optim_jaw', default=True, 103 | type=lambda x: x.lower() in ['true', '1'], 104 | help='Optimize over the jaw pose') 105 | parser.add_argument('--optim_hands', default=True, 106 | type=lambda x: x.lower() in ['true', '1'], 107 | help='Optimize over the hand pose') 108 | parser.add_argument('--optim_expression', default=True, 109 | type=lambda x: x.lower() in ['true', '1'], 110 | help='Optimize over the expression') 111 | parser.add_argument('--optim_shape', default=True, 112 | type=lambda x: x.lower() in ['true', '1'], 113 | help='Optimize over the shape space') 114 | 115 | parser.add_argument('--model_folder', 116 | default='models', 117 | type=str, 118 | help='The directory where the models are stored.') 119 | parser.add_argument('--use_joints_conf', default=True, 120 | type=lambda x: x.lower() in ['true', '1'], 121 | help='Use the confidence scores for the optimization') 122 | parser.add_argument('--batch_size', type=int, default=1, 123 | help='The size of the batch') 124 | parser.add_argument('--num_gaussians', 125 | default=8, 126 | type=int, 127 | help='The number of gaussian for the Pose Mixture' + 128 | ' Prior.') 129 | parser.add_argument('--use_pca', default=True, 130 | type=lambda x: x.lower() in ['true', '1'], 131 | help='Use the low dimensional PCA space for the hands') 132 | parser.add_argument('--num_pca_comps', default=6, type=int, 133 | help='The number of PCA components for the hand.') 134 | parser.add_argument('--flat_hand_mean', default=False, 135 | type=lambda arg: arg.lower() in ['true', '1'], 136 | help='Use the flat hand as the mean pose') 137 | parser.add_argument('--body_prior_type', default='mog', type=str, 138 | help='The type of prior that will be used to' + 139 | ' regularize the optimization. Can be a Mixture of' + 140 | ' Gaussians (mog)') 141 | parser.add_argument('--left_hand_prior_type', default='mog', type=str, 142 | choices=['mog', 'l2', 'None'], 143 | help='The type of prior that will be used to' + 144 | ' regularize the optimization of the pose of the' + 145 | ' left hand. Can be a Mixture of' + 146 | ' Gaussians (mog)') 147 | parser.add_argument('--right_hand_prior_type', default='mog', type=str, 148 | choices=['mog', 'l2', 'None'], 149 | help='The type of prior that will be used to' + 150 | ' regularize the optimization of the pose of the' + 151 | ' right hand. Can be a Mixture of' + 152 | ' Gaussians (mog)') 153 | parser.add_argument('--jaw_prior_type', default='l2', type=str, 154 | choices=['l2', 'None'], 155 | help='The type of prior that will be used to' + 156 | ' regularize the optimization of the pose of the' + 157 | ' jaw.') 158 | parser.add_argument('--use_vposer', default=False, 159 | type=lambda arg: arg.lower() in ['true', '1'], 160 | help='Use the VAE pose embedding') 161 | parser.add_argument('--vposer_ckpt', type=str, default='', 162 | help='The path to the V-Poser checkpoint') 163 | # Left/Right shoulder and hips 164 | parser.add_argument('--init_joints_idxs', nargs='*', type=int, 165 | default=[9, 12, 2, 5], 166 | help='Which joints to use for initializing the camera') 167 | parser.add_argument('--body_tri_idxs', default='5.12,2.9', 168 | type=lambda x: [list(map(int, pair.split('.'))) 169 | for pair in x.split(',')], 170 | help='The indices of the joints used to estimate' + 171 | ' the initial depth of the camera. The format' + 172 | ' should be vIdx1.vIdx2,vIdx3.vIdx4') 173 | 174 | parser.add_argument('--prior_folder', type=str, default='prior', 175 | help='The folder where the prior is stored') 176 | parser.add_argument('--rho', 177 | default=100, 178 | type=float, 179 | help='Value of constant of robust loss') 180 | parser.add_argument('--interpenetration', 181 | default=False, 182 | type=lambda x: x.lower() in ['true', '1'], 183 | help='Whether to use the interpenetration term') 184 | parser.add_argument('--penalize_outside', 185 | default=False, 186 | type=lambda x: x.lower() in ['true', '1'], 187 | help='Penalize outside') 188 | parser.add_argument('--data_weights', nargs='*', 189 | default=[1, ] * 5, type=float, 190 | help='The weight of the data term') 191 | parser.add_argument('--body_pose_prior_weights', 192 | default=[4.04 * 1e2, 4.04 * 1e2, 57.4, 4.78], 193 | nargs='*', 194 | type=float, 195 | help='The weights of the body pose regularizer') 196 | parser.add_argument('--shape_weights', 197 | default=[1e2, 5 * 1e1, 1e1, .5 * 1e1], 198 | type=float, nargs='*', 199 | help='The weights of the Shape regularizer') 200 | parser.add_argument('--expr_weights', 201 | default=[1e2, 5 * 1e1, 1e1, .5 * 1e1], 202 | type=float, nargs='*', 203 | help='The weights of the Expressions regularizer') 204 | parser.add_argument('--face_joints_weights', 205 | default=[0.0, 0.0, 0.0, 2.0], type=float, 206 | nargs='*', 207 | help='The weights for the facial keypoints' + 208 | ' for each stage of the optimization') 209 | parser.add_argument('--hand_joints_weights', 210 | default=[0.0, 0.0, 0.0, 2.0], 211 | type=float, nargs='*', 212 | help='The weights for the 2D joint error of the hands') 213 | parser.add_argument('--jaw_pose_prior_weights', 214 | nargs='*', 215 | help='The weights of the pose regularizer of the' + 216 | ' hands') 217 | parser.add_argument('--hand_pose_prior_weights', 218 | default=[1e2, 5 * 1e1, 1e1, .5 * 1e1], 219 | type=float, nargs='*', 220 | help='The weights of the pose regularizer of the' + 221 | ' hands') 222 | parser.add_argument('--coll_loss_weights', 223 | default=[0.0, 0.0, 0.0, 2.0], type=float, 224 | nargs='*', 225 | help='The weight for the collision term') 226 | 227 | parser.add_argument('--depth_loss_weight', default=1e2, type=float, 228 | help='The weight for the regularizer for the' + 229 | ' z coordinate of the camera translation') 230 | parser.add_argument('--df_cone_height', default=0.5, type=float, 231 | help='The default value for the height of the cone' + 232 | ' that is used to calculate the penetration distance' + 233 | ' field') 234 | parser.add_argument('--max_collisions', default=8, type=int, 235 | help='The maximum number of bounding box collisions') 236 | parser.add_argument('--point2plane', default=False, 237 | type=lambda arg: arg.lower() in ['true', '1'], 238 | help='Use point to plane distance') 239 | parser.add_argument('--part_segm_fn', default='', type=str, 240 | help='The file with the part segmentation for the' + 241 | ' faces of the model') 242 | parser.add_argument('--ign_part_pairs', default=None, 243 | nargs='*', type=str, 244 | help='Pairs of parts whose collisions will be ignored') 245 | parser.add_argument('--use_hands', default=False, 246 | type=lambda x: x.lower() in ['true', '1'], 247 | help='Use the hand keypoints in the SMPL' + 248 | 'optimization process') 249 | parser.add_argument('--use_face', default=False, 250 | type=lambda x: x.lower() in ['true', '1'], 251 | help='Use the facial keypoints in the optimization' + 252 | ' process') 253 | parser.add_argument('--use_face_contour', default=False, 254 | type=lambda x: x.lower() in ['true', '1'], 255 | help='Use the dynamic contours of the face') 256 | parser.add_argument('--side_view_thsh', 257 | default=25, 258 | type=float, 259 | help='This is thresholding value that determines' + 260 | ' whether the human is captured in a side view.' + 261 | 'If the pixel distance between the shoulders is less' + 262 | ' than this value, two initializations of SMPL fits' + 263 | ' are tried.') 264 | parser.add_argument('--optim_type', type=str, default='adam', 265 | help='The optimizer used') 266 | parser.add_argument('--lr', type=float, default=1e-6, 267 | help='The learning rate for the algorithm') 268 | parser.add_argument('--gtol', type=float, default=1e-8, 269 | help='The tolerance threshold for the gradient') 270 | parser.add_argument('--ftol', type=float, default=2e-9, 271 | help='The tolerance threshold for the function') 272 | parser.add_argument('--maxiters', type=int, default=100, 273 | help='The maximum iterations for the optimization') 274 | ####################################################################### 275 | ### PROX 276 | parser.add_argument('--frame_ids', 277 | default=None, type=int, 278 | nargs='*', 279 | help='') 280 | parser.add_argument('--start', type=int, default=0, 281 | help='id of the starting frame') 282 | parser.add_argument('--step', type=int, default=1, 283 | help='step') 284 | 285 | parser.add_argument('--flip', default=False, 286 | type=lambda arg: arg.lower() in ['true', '1'], 287 | help='flip image and keypoints') 288 | parser.add_argument('--camera_mode', type=str, default='moving', 289 | choices=['moving', 'fixed'], 290 | help='The mode of camera used') 291 | parser.add_argument('--focal_length_x', 292 | default=5000, 293 | type=float, 294 | help='Value of focal length.') 295 | parser.add_argument('--focal_length_y', 296 | default=5000, 297 | type=float, 298 | help='Value of focal length.') 299 | parser.add_argument('--camera_center_x', 300 | default=None, 301 | type=float, 302 | help='Value of camera center x.') 303 | parser.add_argument('--camera_center_y', 304 | default=None, 305 | type=float, 306 | help='Value of camera center y.') 307 | parser.add_argument('--render_results', 308 | type=lambda arg: arg.lower() == 'true', 309 | default=True, 310 | help='render final results') 311 | parser.add_argument('--trans_opt_stages', 312 | default=[3,4], type=int, 313 | nargs='*', 314 | help='stages where translation will be optimized') 315 | parser.add_argument('--viz_mode', default='o3d', type=str, 316 | choices=['mv', 'o3d'], 317 | help='') 318 | ## Depth fitting 319 | parser.add_argument('--s2m_weights', default=[0.0, 0.0, 0.0, 0.0, 0.0], nargs='*', type=float, 320 | help='') 321 | parser.add_argument('--s2m', 322 | type=lambda arg: arg.lower() in ['true', '1'], 323 | default=False, 324 | help='Whether to save the meshes') 325 | parser.add_argument('--m2s', 326 | type=lambda arg: arg.lower() in ['true', '1'], 327 | default=False, 328 | help='Whether to save the meshes') 329 | parser.add_argument('--m2s_weights', default=[0.0, 0.0, 0.0, 0.0, 0.0], nargs='*', type=float, 330 | help='') 331 | parser.add_argument('--rho_s2m', 332 | default=1, 333 | type=float, 334 | help='Value of constant of robust loss') 335 | parser.add_argument('--rho_m2s', 336 | default=1, 337 | type=float, 338 | help='Value of constant of robust loss') 339 | parser.add_argument('--read_depth', default=False, 340 | type=lambda arg: arg.lower() in ['true', '1'], 341 | help='Read depth frames') 342 | parser.add_argument('--read_mask', default=False, 343 | type=lambda arg: arg.lower() in ['true', '1'], 344 | help='Read masks') 345 | parser.add_argument('--mask_folder', type=str, default='BodyIndex', 346 | help='The folder where the keypoints are stored') 347 | parser.add_argument('--mask_on_color', default=False, 348 | type=lambda arg: arg.lower() in ['true', '1'], 349 | help='') 350 | parser.add_argument('--init_mode', default=None, type=str, 351 | choices=[None, 'scan', 'both'], 352 | help='') 353 | ################################ 354 | # sdf penetration 355 | parser.add_argument('--sdf_penetration', default=False, 356 | type=lambda arg: arg.lower() in ['true', '1'], 357 | help='') 358 | parser.add_argument('--sdf_penetration_weights', default=[0.0, 0.0, 0.0, 0.0, 0.0], nargs='*', type=float, 359 | help='') 360 | ## contact 361 | parser.add_argument('--contact', 362 | type=lambda arg: arg.lower() in ['true', '1'], 363 | default=False, 364 | help='') 365 | parser.add_argument('--rho_contact', 366 | default=1, 367 | type=float, 368 | help='Value of constant of robust loss') 369 | parser.add_argument('--contact_angle', 370 | default=45, 371 | type=float, 372 | help='used to refine normals. (angle in degrees)') 373 | parser.add_argument('--contact_loss_weights', 374 | default=[0.0, 0.0, 0.0, 0.0, 0.0], type=float, 375 | nargs='*', 376 | help='The weight for the contact term') 377 | parser.add_argument('--contact_body_parts', 378 | default=['L_Leg', 'R_Leg', 'L_Hand', 'R_Hand', 'gluteus', 'back', 'thighs'], type=str, 379 | nargs='*', 380 | help='') 381 | parser.add_argument('--load_scene', type=lambda arg: arg.lower() in ['true', '1'], 382 | default=False, help='') 383 | 384 | 385 | 386 | args = parser.parse_args() 387 | args_dict = vars(args) 388 | return args_dict 389 | -------------------------------------------------------------------------------- /prox/data_parser.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems and the Max Planck Institute for Biological 14 | # Cybernetics. All rights reserved. 15 | # 16 | # Contact: ps-license@tuebingen.mpg.de 17 | 18 | from __future__ import absolute_import 19 | from __future__ import print_function 20 | from __future__ import division 21 | 22 | import os 23 | import os.path as osp 24 | 25 | import json 26 | 27 | from collections import namedtuple 28 | 29 | import cv2 30 | import numpy as np 31 | 32 | import torch 33 | from torch.utils.data import Dataset 34 | 35 | 36 | from misc_utils import smpl_to_openpose 37 | from projection_utils import Projection 38 | 39 | 40 | Keypoints = namedtuple('Keypoints', 41 | ['keypoints', 'gender_gt', 'gender_pd']) 42 | 43 | Keypoints.__new__.__defaults__ = (None,) * len(Keypoints._fields) 44 | 45 | 46 | def create_dataset(dataset='openpose', data_folder='data', **kwargs): 47 | if dataset.lower() == 'openpose': 48 | return OpenPose(data_folder, **kwargs) 49 | else: 50 | raise ValueError('Unknown dataset: {}'.format(dataset)) 51 | 52 | 53 | def read_keypoints(keypoint_fn, use_hands=True, use_face=True, 54 | use_face_contour=False): 55 | with open(keypoint_fn) as keypoint_file: 56 | data = json.load(keypoint_file) 57 | 58 | keypoints = [] 59 | 60 | gender_pd = [] 61 | gender_gt = [] 62 | for idx, person_data in enumerate(data['people']): 63 | body_keypoints = np.array(person_data['pose_keypoints_2d'], 64 | dtype=np.float32) 65 | body_keypoints = body_keypoints.reshape([-1, 3]) 66 | if use_hands: 67 | left_hand_keyp = np.array( 68 | person_data['hand_left_keypoints_2d'], 69 | dtype=np.float32).reshape([-1, 3]) 70 | right_hand_keyp = np.array( 71 | person_data['hand_right_keypoints_2d'], 72 | dtype=np.float32).reshape([-1, 3]) 73 | 74 | body_keypoints = np.concatenate( 75 | [body_keypoints, left_hand_keyp, right_hand_keyp], axis=0) 76 | if use_face: 77 | # TODO: Make parameters, 17 is the offset for the eye brows, 78 | # etc. 51 is the total number of FLAME compatible landmarks 79 | face_keypoints = np.array( 80 | person_data['face_keypoints_2d'], 81 | dtype=np.float32).reshape([-1, 3])[17: 17 + 51, :] 82 | 83 | contour_keyps = np.array( 84 | [], dtype=body_keypoints.dtype).reshape(0, 3) 85 | if use_face_contour: 86 | contour_keyps = np.array( 87 | person_data['face_keypoints_2d'], 88 | dtype=np.float32).reshape([-1, 3])[:17, :] 89 | 90 | body_keypoints = np.concatenate( 91 | [body_keypoints, face_keypoints, contour_keyps], axis=0) 92 | 93 | if 'gender_pd' in person_data: 94 | gender_pd.append(person_data['gender_pd']) 95 | if 'gender_gt' in person_data: 96 | gender_gt.append(person_data['gender_gt']) 97 | 98 | keypoints.append(body_keypoints) 99 | 100 | return Keypoints(keypoints=keypoints, gender_pd=gender_pd, 101 | gender_gt=gender_gt) 102 | 103 | 104 | class OpenPose(Dataset): 105 | 106 | NUM_BODY_JOINTS = 25 107 | NUM_HAND_JOINTS = 20 108 | 109 | def __init__(self, data_folder, img_folder='images', 110 | keyp_folder='keypoints', 111 | calib_dir='', 112 | use_hands=False, 113 | use_face=False, 114 | dtype=torch.float32, 115 | model_type='smplx', 116 | joints_to_ign=None, 117 | use_face_contour=False, 118 | openpose_format='coco25', 119 | depth_folder='Depth', 120 | mask_folder='BodyIndex', 121 | mask_color_folder='BodyIndexColor', 122 | read_depth=False, 123 | read_mask=False, 124 | mask_on_color=False, 125 | depth_scale=1e-3, 126 | flip=False, 127 | start=0, 128 | step=1, 129 | scale_factor=1, 130 | frame_ids=None, 131 | init_mode='sk', 132 | **kwargs): 133 | super(OpenPose, self).__init__() 134 | 135 | self.use_hands = use_hands 136 | self.use_face = use_face 137 | self.model_type = model_type 138 | self.dtype = dtype 139 | self.joints_to_ign = joints_to_ign 140 | self.use_face_contour = use_face_contour 141 | 142 | self.openpose_format = openpose_format 143 | 144 | self.num_joints = (self.NUM_BODY_JOINTS + 145 | 2 * self.NUM_HAND_JOINTS * use_hands) 146 | self.img_folder = osp.join(data_folder, img_folder) 147 | self.keyp_folder = osp.join(keyp_folder) 148 | self.depth_folder = os.path.join(data_folder, depth_folder) 149 | self.mask_folder = os.path.join(data_folder, mask_folder) 150 | self.mask_color_folder = os.path.join(data_folder, mask_color_folder) 151 | 152 | self.img_paths = [osp.join(self.img_folder, img_fn) 153 | for img_fn in os.listdir(self.img_folder) 154 | if img_fn.endswith('.png') or 155 | img_fn.endswith('.jpg') and 156 | not img_fn.startswith('.')] 157 | self.img_paths = sorted(self.img_paths) 158 | if frame_ids is None: 159 | self.img_paths = self.img_paths[start::step] 160 | else: 161 | self.img_paths = [self.img_paths[id -1] for id in frame_ids] 162 | 163 | self.cnt = 0 164 | self.depth_scale = depth_scale 165 | self.flip = flip 166 | self.read_depth = read_depth 167 | self.read_mask = read_mask 168 | self.scale_factor = scale_factor 169 | self.init_mode = init_mode 170 | self.mask_on_color = mask_on_color 171 | self.projection = Projection(calib_dir) 172 | 173 | def get_model2data(self): 174 | return smpl_to_openpose(self.model_type, use_hands=self.use_hands, 175 | use_face=self.use_face, 176 | use_face_contour=self.use_face_contour, 177 | openpose_format=self.openpose_format) 178 | 179 | def get_left_shoulder(self): 180 | return 2 181 | 182 | def get_right_shoulder(self): 183 | return 5 184 | 185 | def get_joint_weights(self): 186 | # The weights for the joint terms in the optimization 187 | optim_weights = np.ones(self.num_joints + 2 * self.use_hands + 188 | self.use_face * 51 + 189 | 17 * self.use_face_contour, 190 | dtype=np.float32) 191 | 192 | # Neck, Left and right hip 193 | # These joints are ignored because SMPL has no neck joint and the 194 | # annotation of the hips is ambiguous. 195 | if self.joints_to_ign is not None and -1 not in self.joints_to_ign: 196 | optim_weights[self.joints_to_ign] = 0. 197 | return torch.tensor(optim_weights, dtype=self.dtype) 198 | 199 | def __len__(self): 200 | return len(self.img_paths) 201 | 202 | def __getitem__(self, idx): 203 | img_path = self.img_paths[idx] 204 | return self.read_item(img_path) 205 | 206 | def read_item(self, img_path): 207 | img = cv2.imread(img_path).astype(np.float32)[:, :, ::-1] / 255.0 208 | if self.flip: 209 | img = cv2.flip(img, 1) 210 | img_fn = osp.split(img_path)[1] 211 | img_fn, _ = osp.splitext(osp.split(img_path)[1]) 212 | 213 | keypoint_fn = osp.join(self.keyp_folder, 214 | img_fn + '_keypoints.json') 215 | keyp_tuple = read_keypoints(keypoint_fn, use_hands=self.use_hands, 216 | use_face=self.use_face, 217 | use_face_contour=self.use_face_contour) 218 | 219 | if len(keyp_tuple.keypoints) < 1: 220 | return {} 221 | keypoints = np.stack(keyp_tuple.keypoints) 222 | 223 | depth_im = None 224 | if self.read_depth: 225 | depth_im = cv2.imread(os.path.join(self.depth_folder, img_fn + '.png'), flags=-1).astype(float) 226 | depth_im = depth_im / 8. 227 | depth_im = depth_im * self.depth_scale 228 | if self.flip: 229 | depth_im = cv2.flip(depth_im, 1) 230 | 231 | mask = None 232 | if self.read_mask: 233 | if self.mask_on_color: 234 | mask = cv2.imread(os.path.join(self.mask_color_folder, img_fn + '.png'), cv2.IMREAD_GRAYSCALE) 235 | else: 236 | mask = cv2.imread(os.path.join(self.mask_folder, img_fn + '.png'), cv2.IMREAD_GRAYSCALE) 237 | mask = cv2.threshold(mask, 254, 255, cv2.THRESH_BINARY)[1] 238 | if self.flip: 239 | mask = cv2.flip(mask, 1) 240 | 241 | scan_dict = None 242 | init_trans = None 243 | if depth_im is not None and mask is not None: 244 | scan_dict = self.projection.create_scan(mask, depth_im, mask_on_color=self.mask_on_color) 245 | init_trans = np.mean(scan_dict.get('points'), axis=0) 246 | 247 | output_dict = {'fn': img_fn, 248 | 'img_path': img_path, 249 | 'keypoints': keypoints, 250 | 'img': img, 251 | 'init_trans': init_trans, 252 | 'depth_im': depth_im, 253 | 'mask': mask, 254 | 'scan_dict':scan_dict} 255 | if keyp_tuple.gender_gt is not None: 256 | if len(keyp_tuple.gender_gt) > 0: 257 | output_dict['gender_gt'] = keyp_tuple.gender_gt 258 | if keyp_tuple.gender_pd is not None: 259 | if len(keyp_tuple.gender_pd) > 0: 260 | output_dict['gender_pd'] = keyp_tuple.gender_pd 261 | return output_dict 262 | 263 | def __iter__(self): 264 | return self 265 | 266 | def __next__(self): 267 | return self.next() 268 | 269 | def next(self): 270 | if self.cnt >= len(self.img_paths): 271 | raise StopIteration 272 | 273 | img_path = self.img_paths[self.cnt] 274 | self.cnt += 1 275 | 276 | return self.read_item(img_path) 277 | -------------------------------------------------------------------------------- /prox/dist_chamfer.py: -------------------------------------------------------------------------------- 1 | # Taking from https://github.com/ThibaultGROUEIX/chamfer_pytorch/blob/719b0f1ca5ba370616cb837c03ab88d9a88173ff/dist_chamfer_idx.py 2 | from torch import nn 3 | from torch.autograd import Function 4 | import torch 5 | import chamfer 6 | 7 | 8 | # Chamfer's distance module @thibaultgroueix 9 | # GPU tensors only 10 | class chamferFunction(Function): 11 | @staticmethod 12 | def forward(ctx, xyz1, xyz2): 13 | batchsize, n, _ = xyz1.size() 14 | _, m, _ = xyz2.size() 15 | 16 | dist1 = torch.zeros(batchsize, n) 17 | dist2 = torch.zeros(batchsize, m) 18 | 19 | idx1 = torch.zeros(batchsize, n).type(torch.IntTensor) 20 | idx2 = torch.zeros(batchsize, m).type(torch.IntTensor) 21 | 22 | dist1 = dist1.cuda() 23 | dist2 = dist2.cuda() 24 | idx1 = idx1.cuda() 25 | idx2 = idx2.cuda() 26 | 27 | chamfer.forward(xyz1, xyz2, dist1, dist2, idx1, idx2) 28 | ctx.save_for_backward(xyz1, xyz2, idx1, idx2) 29 | return dist1, dist2, idx1, idx2 30 | 31 | @staticmethod 32 | def backward(ctx, graddist1, graddist2, gradidx1, gradidx2): 33 | xyz1, xyz2, idx1, idx2 = ctx.saved_tensors 34 | graddist1 = graddist1.contiguous() 35 | graddist2 = graddist2.contiguous() 36 | 37 | gradxyz1 = torch.zeros(xyz1.size()) 38 | gradxyz2 = torch.zeros(xyz2.size()) 39 | 40 | gradxyz1 = gradxyz1.cuda() 41 | gradxyz2 = gradxyz2.cuda() 42 | chamfer.backward( 43 | xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2 44 | ) 45 | return gradxyz1, gradxyz2 46 | 47 | 48 | class chamferDist(nn.Module): 49 | def __init__(self): 50 | super(chamferDist, self).__init__() 51 | 52 | def forward(self, input1, input2): 53 | return chamferFunction.apply(input1, input2) 54 | -------------------------------------------------------------------------------- /prox/fit_single_frame.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems and the Max Planck Institute for Biological 14 | # Cybernetics. All rights reserved. 15 | # 16 | # Contact: ps-license@tuebingen.mpg.de 17 | 18 | from __future__ import absolute_import 19 | from __future__ import print_function 20 | from __future__ import division 21 | 22 | 23 | import time 24 | try: 25 | import cPickle as pickle 26 | except ImportError: 27 | import pickle 28 | 29 | import sys 30 | import os 31 | import os.path as osp 32 | 33 | import numpy as np 34 | import torch 35 | 36 | from tqdm import tqdm 37 | 38 | from collections import defaultdict 39 | 40 | import cv2 41 | import PIL.Image as pil_img 42 | import json 43 | from optimizers import optim_factory 44 | 45 | import fitting 46 | from human_body_prior.tools.model_loader import load_vposer 47 | from psbody.mesh import Mesh 48 | import scipy.sparse as sparse 49 | 50 | 51 | def fit_single_frame(img, 52 | keypoints, 53 | init_trans, 54 | scan, 55 | scene_name, 56 | body_model, 57 | camera, 58 | joint_weights, 59 | body_pose_prior, 60 | jaw_prior, 61 | left_hand_prior, 62 | right_hand_prior, 63 | shape_prior, 64 | expr_prior, 65 | angle_prior, 66 | result_fn='out.pkl', 67 | mesh_fn='out.obj', 68 | body_scene_rendering_fn='body_scene.png', 69 | out_img_fn='overlay.png', 70 | loss_type='smplify', 71 | use_cuda=True, 72 | init_joints_idxs=(9, 12, 2, 5), 73 | use_face=True, 74 | use_hands=True, 75 | data_weights=None, 76 | body_pose_prior_weights=None, 77 | hand_pose_prior_weights=None, 78 | jaw_pose_prior_weights=None, 79 | shape_weights=None, 80 | expr_weights=None, 81 | hand_joints_weights=None, 82 | face_joints_weights=None, 83 | depth_loss_weight=1e2, 84 | interpenetration=True, 85 | coll_loss_weights=None, 86 | df_cone_height=0.5, 87 | penalize_outside=True, 88 | max_collisions=8, 89 | point2plane=False, 90 | part_segm_fn='', 91 | focal_length_x=5000., 92 | focal_length_y=5000., 93 | side_view_thsh=25., 94 | rho=100, 95 | vposer_latent_dim=32, 96 | vposer_ckpt='', 97 | use_joints_conf=False, 98 | interactive=True, 99 | visualize=False, 100 | save_meshes=True, 101 | degrees=None, 102 | batch_size=1, 103 | dtype=torch.float32, 104 | ign_part_pairs=None, 105 | left_shoulder_idx=2, 106 | right_shoulder_idx=5, 107 | #################### 108 | ### PROX 109 | render_results=True, 110 | camera_mode='moving', 111 | ## Depth 112 | s2m=False, 113 | s2m_weights=None, 114 | m2s=False, 115 | m2s_weights=None, 116 | rho_s2m=1, 117 | rho_m2s=1, 118 | init_mode=None, 119 | trans_opt_stages=None, 120 | viz_mode='mv', 121 | #penetration 122 | sdf_penetration=False, 123 | sdf_penetration_weights=0.0, 124 | sdf_dir=None, 125 | cam2world_dir=None, 126 | #contact 127 | contact=False, 128 | rho_contact=1.0, 129 | contact_loss_weights=None, 130 | contact_angle=15, 131 | contact_body_parts=None, 132 | body_segments_dir=None, 133 | load_scene=False, 134 | scene_dir=None, 135 | **kwargs): 136 | assert batch_size == 1, 'PyTorch L-BFGS only supports batch_size == 1' 137 | body_model.reset_params() 138 | body_model.transl.requires_grad = True 139 | 140 | device = torch.device('cuda') if use_cuda else torch.device('cpu') 141 | 142 | if visualize: 143 | pil_img.fromarray((img * 255).astype(np.uint8)).show() 144 | 145 | if degrees is None: 146 | degrees = [0, 90, 180, 270] 147 | 148 | if data_weights is None: 149 | data_weights = [1, ] * 5 150 | 151 | if body_pose_prior_weights is None: 152 | body_pose_prior_weights = [4.04 * 1e2, 4.04 * 1e2, 57.4, 4.78] 153 | 154 | msg = ( 155 | 'Number of Body pose prior weights {}'.format( 156 | len(body_pose_prior_weights)) + 157 | ' does not match the number of data term weights {}'.format( 158 | len(data_weights))) 159 | assert (len(data_weights) == 160 | len(body_pose_prior_weights)), msg 161 | 162 | if use_hands: 163 | if hand_pose_prior_weights is None: 164 | hand_pose_prior_weights = [1e2, 5 * 1e1, 1e1, .5 * 1e1] 165 | msg = ('Number of Body pose prior weights does not match the' + 166 | ' number of hand pose prior weights') 167 | assert (len(hand_pose_prior_weights) == 168 | len(body_pose_prior_weights)), msg 169 | if hand_joints_weights is None: 170 | hand_joints_weights = [0.0, 0.0, 0.0, 1.0] 171 | msg = ('Number of Body pose prior weights does not match the' + 172 | ' number of hand joint distance weights') 173 | assert (len(hand_joints_weights) == 174 | len(body_pose_prior_weights)), msg 175 | 176 | if shape_weights is None: 177 | shape_weights = [1e2, 5 * 1e1, 1e1, .5 * 1e1] 178 | msg = ('Number of Body pose prior weights = {} does not match the' + 179 | ' number of Shape prior weights = {}') 180 | assert (len(shape_weights) == 181 | len(body_pose_prior_weights)), msg.format( 182 | len(shape_weights), 183 | len(body_pose_prior_weights)) 184 | 185 | if use_face: 186 | if jaw_pose_prior_weights is None: 187 | jaw_pose_prior_weights = [[x] * 3 for x in shape_weights] 188 | else: 189 | jaw_pose_prior_weights = map(lambda x: map(float, x.split(',')), 190 | jaw_pose_prior_weights) 191 | jaw_pose_prior_weights = [list(w) for w in jaw_pose_prior_weights] 192 | msg = ('Number of Body pose prior weights does not match the' + 193 | ' number of jaw pose prior weights') 194 | assert (len(jaw_pose_prior_weights) == 195 | len(body_pose_prior_weights)), msg 196 | 197 | if expr_weights is None: 198 | expr_weights = [1e2, 5 * 1e1, 1e1, .5 * 1e1] 199 | msg = ('Number of Body pose prior weights = {} does not match the' + 200 | ' number of Expression prior weights = {}') 201 | assert (len(expr_weights) == 202 | len(body_pose_prior_weights)), msg.format( 203 | len(body_pose_prior_weights), 204 | len(expr_weights)) 205 | 206 | if face_joints_weights is None: 207 | face_joints_weights = [0.0, 0.0, 0.0, 1.0] 208 | msg = ('Number of Body pose prior weights does not match the' + 209 | ' number of face joint distance weights') 210 | assert (len(face_joints_weights) == 211 | len(body_pose_prior_weights)), msg 212 | 213 | if coll_loss_weights is None: 214 | coll_loss_weights = [0.0] * len(body_pose_prior_weights) 215 | msg = ('Number of Body pose prior weights does not match the' + 216 | ' number of collision loss weights') 217 | assert (len(coll_loss_weights) == 218 | len(body_pose_prior_weights)), msg 219 | 220 | use_vposer = kwargs.get('use_vposer', True) 221 | vposer, pose_embedding = [None, ] * 2 222 | if use_vposer: 223 | pose_embedding = torch.zeros([batch_size, 32], 224 | dtype=dtype, device=device, 225 | requires_grad=True) 226 | 227 | vposer_ckpt = osp.expandvars(vposer_ckpt) 228 | vposer, _ = load_vposer(vposer_ckpt, vp_model='snapshot') 229 | vposer = vposer.to(device=device) 230 | vposer.eval() 231 | 232 | if use_vposer: 233 | body_mean_pose = torch.zeros([batch_size, vposer_latent_dim], 234 | dtype=dtype) 235 | else: 236 | body_mean_pose = body_pose_prior.get_mean().detach().cpu() 237 | 238 | keypoint_data = torch.tensor(keypoints, dtype=dtype) 239 | gt_joints = keypoint_data[:, :, :2] 240 | if use_joints_conf: 241 | joints_conf = keypoint_data[:, :, 2].reshape(1, -1) 242 | 243 | # Transfer the data to the correct device 244 | gt_joints = gt_joints.to(device=device, dtype=dtype) 245 | if use_joints_conf: 246 | joints_conf = joints_conf.to(device=device, dtype=dtype) 247 | 248 | scan_tensor = None 249 | if scan is not None: 250 | scan_tensor = torch.tensor(scan.get('points'), device=device, dtype=dtype).unsqueeze(0) 251 | 252 | # load pre-computed signed distance field 253 | sdf = None 254 | sdf_normals = None 255 | grid_min = None 256 | grid_max = None 257 | voxel_size = None 258 | if sdf_penetration: 259 | with open(osp.join(sdf_dir, scene_name + '.json'), 'r') as f: 260 | sdf_data = json.load(f) 261 | grid_min = torch.tensor(np.array(sdf_data['min']), dtype=dtype, device=device) 262 | grid_max = torch.tensor(np.array(sdf_data['max']), dtype=dtype, device=device) 263 | grid_dim = sdf_data['dim'] 264 | voxel_size = (grid_max - grid_min) / grid_dim 265 | sdf = np.load(osp.join(sdf_dir, scene_name + '_sdf.npy')).reshape(grid_dim, grid_dim, grid_dim) 266 | sdf = torch.tensor(sdf, dtype=dtype, device=device) 267 | if osp.exists(osp.join(sdf_dir, scene_name + '_normals.npy')): 268 | sdf_normals = np.load(osp.join(sdf_dir, scene_name + '_normals.npy')).reshape(grid_dim, grid_dim, grid_dim, 3) 269 | sdf_normals = torch.tensor(sdf_normals, dtype=dtype, device=device) 270 | else: 271 | print("Normals not found...") 272 | 273 | 274 | with open(os.path.join(cam2world_dir, scene_name + '.json'), 'r') as f: 275 | cam2world = np.array(json.load(f)) 276 | R = torch.tensor(cam2world[:3, :3].reshape(3, 3), dtype=dtype, device=device) 277 | t = torch.tensor(cam2world[:3, 3].reshape(1, 3), dtype=dtype, device=device) 278 | 279 | # Create the search tree 280 | search_tree = None 281 | pen_distance = None 282 | filter_faces = None 283 | if interpenetration: 284 | from mesh_intersection.bvh_search_tree import BVH 285 | import mesh_intersection.loss as collisions_loss 286 | from mesh_intersection.filter_faces import FilterFaces 287 | 288 | assert use_cuda, 'Interpenetration term can only be used with CUDA' 289 | assert torch.cuda.is_available(), \ 290 | 'No CUDA Device! Interpenetration term can only be used' + \ 291 | ' with CUDA' 292 | 293 | search_tree = BVH(max_collisions=max_collisions) 294 | 295 | pen_distance = \ 296 | collisions_loss.DistanceFieldPenetrationLoss( 297 | sigma=df_cone_height, point2plane=point2plane, 298 | vectorized=True, penalize_outside=penalize_outside) 299 | 300 | if part_segm_fn: 301 | # Read the part segmentation 302 | part_segm_fn = os.path.expandvars(part_segm_fn) 303 | with open(part_segm_fn, 'rb') as faces_parents_file: 304 | face_segm_data = pickle.load(faces_parents_file, 305 | encoding='latin1') 306 | faces_segm = face_segm_data['segm'] 307 | faces_parents = face_segm_data['parents'] 308 | # Create the module used to filter invalid collision pairs 309 | filter_faces = FilterFaces( 310 | faces_segm=faces_segm, faces_parents=faces_parents, 311 | ign_part_pairs=ign_part_pairs).to(device=device) 312 | 313 | # load vertix ids of contact parts 314 | contact_verts_ids = ftov = None 315 | if contact: 316 | contact_verts_ids = [] 317 | for part in contact_body_parts: 318 | with open(os.path.join(body_segments_dir, part + '.json'), 'r') as f: 319 | data = json.load(f) 320 | contact_verts_ids.append(list(set(data["verts_ind"]))) 321 | contact_verts_ids = np.concatenate(contact_verts_ids) 322 | 323 | vertices = body_model(return_verts=True, body_pose= torch.zeros((batch_size, 63), dtype=dtype, device=device)).vertices 324 | vertices_np = vertices.detach().cpu().numpy().squeeze() 325 | body_faces_np = body_model.faces_tensor.detach().cpu().numpy().reshape(-1, 3) 326 | m = Mesh(v=vertices_np, f=body_faces_np) 327 | ftov = m.faces_by_vertex(as_sparse_matrix=True) 328 | 329 | ftov = sparse.coo_matrix(ftov) 330 | indices = torch.LongTensor(np.vstack((ftov.row, ftov.col))).to(device) 331 | values = torch.FloatTensor(ftov.data).to(device) 332 | shape = ftov.shape 333 | ftov = torch.sparse.FloatTensor(indices, values, torch.Size(shape)) 334 | 335 | # Read the scene scan if any 336 | scene_v = scene_vn = scene_f = None 337 | if scene_name is not None: 338 | if load_scene: 339 | scene = Mesh(filename=os.path.join(scene_dir, scene_name + '.ply')) 340 | 341 | scene.vn = scene.estimate_vertex_normals() 342 | 343 | scene_v = torch.tensor(scene.v[np.newaxis, :], 344 | dtype=dtype, 345 | device=device).contiguous() 346 | scene_vn = torch.tensor(scene.vn[np.newaxis, :], 347 | dtype=dtype, 348 | device=device) 349 | scene_f = torch.tensor(scene.f.astype(int)[np.newaxis, :], 350 | dtype=torch.long, 351 | device=device) 352 | 353 | # Weights used for the pose prior and the shape prior 354 | opt_weights_dict = {'data_weight': data_weights, 355 | 'body_pose_weight': body_pose_prior_weights, 356 | 'shape_weight': shape_weights} 357 | if use_face: 358 | opt_weights_dict['face_weight'] = face_joints_weights 359 | opt_weights_dict['expr_prior_weight'] = expr_weights 360 | opt_weights_dict['jaw_prior_weight'] = jaw_pose_prior_weights 361 | if use_hands: 362 | opt_weights_dict['hand_weight'] = hand_joints_weights 363 | opt_weights_dict['hand_prior_weight'] = hand_pose_prior_weights 364 | if interpenetration: 365 | opt_weights_dict['coll_loss_weight'] = coll_loss_weights 366 | if s2m: 367 | opt_weights_dict['s2m_weight'] = s2m_weights 368 | if m2s: 369 | opt_weights_dict['m2s_weight'] = m2s_weights 370 | if sdf_penetration: 371 | opt_weights_dict['sdf_penetration_weight'] = sdf_penetration_weights 372 | if contact: 373 | opt_weights_dict['contact_loss_weight'] = contact_loss_weights 374 | 375 | keys = opt_weights_dict.keys() 376 | opt_weights = [dict(zip(keys, vals)) for vals in 377 | zip(*(opt_weights_dict[k] for k in keys 378 | if opt_weights_dict[k] is not None))] 379 | for weight_list in opt_weights: 380 | for key in weight_list: 381 | weight_list[key] = torch.tensor(weight_list[key], 382 | device=device, 383 | dtype=dtype) 384 | 385 | # load indices of the head of smpl-x model 386 | with open( osp.join(body_segments_dir, 'body_mask.json'), 'r') as fp: 387 | head_indx = np.array(json.load(fp)) 388 | N = body_model.get_num_verts() 389 | body_indx = np.setdiff1d(np.arange(N), head_indx) 390 | head_mask = np.in1d(np.arange(N), head_indx) 391 | body_mask = np.in1d(np.arange(N), body_indx) 392 | 393 | # The indices of the joints used for the initialization of the camera 394 | init_joints_idxs = torch.tensor(init_joints_idxs, device=device) 395 | 396 | edge_indices = kwargs.get('body_tri_idxs') 397 | 398 | # which initialization mode to choose: similar traingles, mean of the scan or the average of both 399 | if init_mode == 'scan': 400 | init_t = init_trans 401 | elif init_mode == 'both': 402 | init_t = (init_trans.to(device) + fitting.guess_init(body_model, gt_joints, edge_indices, 403 | use_vposer=use_vposer, vposer=vposer, 404 | pose_embedding=pose_embedding, 405 | model_type=kwargs.get('model_type', 'smpl'), 406 | focal_length=focal_length_x, dtype=dtype) ) /2.0 407 | 408 | else: 409 | init_t = fitting.guess_init(body_model, gt_joints, edge_indices, 410 | use_vposer=use_vposer, vposer=vposer, 411 | pose_embedding=pose_embedding, 412 | model_type=kwargs.get('model_type', 'smpl'), 413 | focal_length=focal_length_x, dtype=dtype) 414 | 415 | camera_loss = fitting.create_loss('camera_init', 416 | trans_estimation=init_t, 417 | init_joints_idxs=init_joints_idxs, 418 | depth_loss_weight=depth_loss_weight, 419 | camera_mode=camera_mode, 420 | dtype=dtype).to(device=device) 421 | camera_loss.trans_estimation[:] = init_t 422 | 423 | loss = fitting.create_loss(loss_type=loss_type, 424 | joint_weights=joint_weights, 425 | rho=rho, 426 | use_joints_conf=use_joints_conf, 427 | use_face=use_face, use_hands=use_hands, 428 | vposer=vposer, 429 | pose_embedding=pose_embedding, 430 | body_pose_prior=body_pose_prior, 431 | shape_prior=shape_prior, 432 | angle_prior=angle_prior, 433 | expr_prior=expr_prior, 434 | left_hand_prior=left_hand_prior, 435 | right_hand_prior=right_hand_prior, 436 | jaw_prior=jaw_prior, 437 | interpenetration=interpenetration, 438 | pen_distance=pen_distance, 439 | search_tree=search_tree, 440 | tri_filtering_module=filter_faces, 441 | s2m=s2m, 442 | m2s=m2s, 443 | rho_s2m=rho_s2m, 444 | rho_m2s=rho_m2s, 445 | head_mask=head_mask, 446 | body_mask=body_mask, 447 | sdf_penetration=sdf_penetration, 448 | voxel_size=voxel_size, 449 | grid_min=grid_min, 450 | grid_max=grid_max, 451 | sdf=sdf, 452 | sdf_normals=sdf_normals, 453 | R=R, 454 | t=t, 455 | contact=contact, 456 | contact_verts_ids=contact_verts_ids, 457 | rho_contact=rho_contact, 458 | contact_angle=contact_angle, 459 | dtype=dtype, 460 | **kwargs) 461 | loss = loss.to(device=device) 462 | 463 | with fitting.FittingMonitor( 464 | batch_size=batch_size, visualize=visualize, viz_mode=viz_mode, **kwargs) as monitor: 465 | 466 | img = torch.tensor(img, dtype=dtype) 467 | 468 | H, W, _ = img.shape 469 | 470 | # Reset the parameters to estimate the initial translation of the 471 | # body model 472 | if camera_mode == 'moving': 473 | body_model.reset_params(body_pose=body_mean_pose) 474 | # Update the value of the translation of the camera as well as 475 | # the image center. 476 | with torch.no_grad(): 477 | camera.translation[:] = init_t.view_as(camera.translation) 478 | camera.center[:] = torch.tensor([W, H], dtype=dtype) * 0.5 479 | 480 | # Re-enable gradient calculation for the camera translation 481 | camera.translation.requires_grad = True 482 | 483 | camera_opt_params = [camera.translation, body_model.global_orient] 484 | 485 | elif camera_mode == 'fixed': 486 | body_model.reset_params(body_pose=body_mean_pose, transl=init_t) 487 | camera_opt_params = [body_model.transl, body_model.global_orient] 488 | 489 | # If the distance between the 2D shoulders is smaller than a 490 | # predefined threshold then try 2 fits, the initial one and a 180 491 | # degree rotation 492 | shoulder_dist = torch.dist(gt_joints[:, left_shoulder_idx], 493 | gt_joints[:, right_shoulder_idx]) 494 | try_both_orient = shoulder_dist.item() < side_view_thsh 495 | 496 | 497 | 498 | camera_optimizer, camera_create_graph = optim_factory.create_optimizer( 499 | camera_opt_params, 500 | **kwargs) 501 | 502 | # The closure passed to the optimizer 503 | fit_camera = monitor.create_fitting_closure( 504 | camera_optimizer, body_model, camera, gt_joints, 505 | camera_loss, create_graph=camera_create_graph, 506 | use_vposer=use_vposer, vposer=vposer, 507 | pose_embedding=pose_embedding, 508 | scan_tensor=scan_tensor, 509 | return_full_pose=False, return_verts=False) 510 | 511 | # Step 1: Optimize over the torso joints the camera translation 512 | # Initialize the computational graph by feeding the initial translation 513 | # of the camera and the initial pose of the body model. 514 | camera_init_start = time.time() 515 | cam_init_loss_val = monitor.run_fitting(camera_optimizer, 516 | fit_camera, 517 | camera_opt_params, body_model, 518 | use_vposer=use_vposer, 519 | pose_embedding=pose_embedding, 520 | vposer=vposer) 521 | 522 | if interactive: 523 | if use_cuda and torch.cuda.is_available(): 524 | torch.cuda.synchronize() 525 | tqdm.write('Camera initialization done after {:.4f}'.format( 526 | time.time() - camera_init_start)) 527 | tqdm.write('Camera initialization final loss {:.4f}'.format( 528 | cam_init_loss_val)) 529 | 530 | # If the 2D detections/positions of the shoulder joints are too 531 | # close the rotate the body by 180 degrees and also fit to that 532 | # orientation 533 | if try_both_orient: 534 | body_orient = body_model.global_orient.detach().cpu().numpy() 535 | flipped_orient = cv2.Rodrigues(body_orient)[0].dot( 536 | cv2.Rodrigues(np.array([0., np.pi, 0]))[0]) 537 | flipped_orient = cv2.Rodrigues(flipped_orient)[0].ravel() 538 | 539 | flipped_orient = torch.tensor(flipped_orient, 540 | dtype=dtype, 541 | device=device).unsqueeze(dim=0) 542 | orientations = [body_orient, flipped_orient] 543 | else: 544 | orientations = [body_model.global_orient.detach().cpu().numpy()] 545 | 546 | # store here the final error for both orientations, 547 | # and pick the orientation resulting in the lowest error 548 | results = [] 549 | body_transl = body_model.transl.clone().detach() 550 | # Step 2: Optimize the full model 551 | final_loss_val = 0 552 | for or_idx, orient in enumerate(tqdm(orientations, desc='Orientation')): 553 | opt_start = time.time() 554 | 555 | new_params = defaultdict(transl=body_transl, 556 | global_orient=orient, 557 | body_pose=body_mean_pose) 558 | body_model.reset_params(**new_params) 559 | if use_vposer: 560 | with torch.no_grad(): 561 | pose_embedding.fill_(0) 562 | 563 | for opt_idx, curr_weights in enumerate(tqdm(opt_weights, desc='Stage')): 564 | if opt_idx not in trans_opt_stages: 565 | body_model.transl.requires_grad = False 566 | else: 567 | body_model.transl.requires_grad = True 568 | body_params = list(body_model.parameters()) 569 | 570 | final_params = list( 571 | filter(lambda x: x.requires_grad, body_params)) 572 | 573 | if use_vposer: 574 | final_params.append(pose_embedding) 575 | 576 | body_optimizer, body_create_graph = optim_factory.create_optimizer( 577 | final_params, 578 | **kwargs) 579 | body_optimizer.zero_grad() 580 | 581 | curr_weights['bending_prior_weight'] = ( 582 | 3.17 * curr_weights['body_pose_weight']) 583 | if use_hands: 584 | joint_weights[:, 25:76] = curr_weights['hand_weight'] 585 | if use_face: 586 | joint_weights[:, 76:] = curr_weights['face_weight'] 587 | loss.reset_loss_weights(curr_weights) 588 | 589 | closure = monitor.create_fitting_closure( 590 | body_optimizer, body_model, 591 | camera=camera, gt_joints=gt_joints, 592 | joints_conf=joints_conf, 593 | joint_weights=joint_weights, 594 | loss=loss, create_graph=body_create_graph, 595 | use_vposer=use_vposer, vposer=vposer, 596 | pose_embedding=pose_embedding, 597 | scan_tensor=scan_tensor, 598 | scene_v=scene_v, scene_vn=scene_vn, scene_f=scene_f,ftov=ftov, 599 | return_verts=True, return_full_pose=True) 600 | 601 | if interactive: 602 | if use_cuda and torch.cuda.is_available(): 603 | torch.cuda.synchronize() 604 | stage_start = time.time() 605 | final_loss_val = monitor.run_fitting( 606 | body_optimizer, 607 | closure, final_params, 608 | body_model, 609 | pose_embedding=pose_embedding, vposer=vposer, 610 | use_vposer=use_vposer) 611 | 612 | if interactive: 613 | if use_cuda and torch.cuda.is_available(): 614 | torch.cuda.synchronize() 615 | elapsed = time.time() - stage_start 616 | if interactive: 617 | tqdm.write('Stage {:03d} done after {:.4f} seconds'.format( 618 | opt_idx, elapsed)) 619 | 620 | if interactive: 621 | if use_cuda and torch.cuda.is_available(): 622 | torch.cuda.synchronize() 623 | elapsed = time.time() - opt_start 624 | tqdm.write( 625 | 'Body fitting Orientation {} done after {:.4f} seconds'.format( 626 | or_idx, elapsed)) 627 | tqdm.write('Body final loss val = {:.5f}'.format( 628 | final_loss_val)) 629 | 630 | # Get the result of the fitting process 631 | # Store in it the errors list in order to compare multiple 632 | # orientations, if they exist 633 | result = {'camera_' + str(key): val.detach().cpu().numpy() 634 | for key, val in camera.named_parameters()} 635 | result.update({key: val.detach().cpu().numpy() 636 | for key, val in body_model.named_parameters()}) 637 | if use_vposer: 638 | result['pose_embedding'] = pose_embedding.detach().cpu().numpy() 639 | body_pose = vposer.decode( 640 | pose_embedding, 641 | output_type='aa').view(1, -1) if use_vposer else None 642 | result['body_pose'] = body_pose.detach().cpu().numpy() 643 | 644 | results.append({'loss': final_loss_val, 645 | 'result': result}) 646 | 647 | with open(result_fn, 'wb') as result_file: 648 | if len(results) > 1: 649 | min_idx = (0 if results[0]['loss'] < results[1]['loss'] 650 | else 1) 651 | else: 652 | min_idx = 0 653 | pickle.dump(results[min_idx]['result'], result_file, protocol=2) 654 | 655 | 656 | if save_meshes or visualize: 657 | body_pose = vposer.decode( 658 | pose_embedding, 659 | output_type='aa').view(1, -1) if use_vposer else None 660 | 661 | model_type = kwargs.get('model_type', 'smpl') 662 | append_wrists = model_type == 'smpl' and use_vposer 663 | if append_wrists: 664 | wrist_pose = torch.zeros([body_pose.shape[0], 6], 665 | dtype=body_pose.dtype, 666 | device=body_pose.device) 667 | body_pose = torch.cat([body_pose, wrist_pose], dim=1) 668 | 669 | model_output = body_model(return_verts=True, body_pose=body_pose) 670 | vertices = model_output.vertices.detach().cpu().numpy().squeeze() 671 | 672 | import trimesh 673 | 674 | out_mesh = trimesh.Trimesh(vertices, body_model.faces, process=False) 675 | out_mesh.export(mesh_fn) 676 | 677 | if render_results: 678 | import pyrender 679 | 680 | # common 681 | H, W = 1080, 1920 682 | camera_center = np.array([951.30, 536.77]) 683 | camera_pose = np.eye(4) 684 | camera_pose = np.array([1.0, -1.0, -1.0, 1.0]).reshape(-1, 1) * camera_pose 685 | camera = pyrender.camera.IntrinsicsCamera( 686 | fx=1060.53, fy=1060.38, 687 | cx=camera_center[0], cy=camera_center[1]) 688 | light = pyrender.DirectionalLight(color=np.ones(3), intensity=2.0) 689 | 690 | material = pyrender.MetallicRoughnessMaterial( 691 | metallicFactor=0.0, 692 | alphaMode='OPAQUE', 693 | baseColorFactor=(1.0, 1.0, 0.9, 1.0)) 694 | body_mesh = pyrender.Mesh.from_trimesh( 695 | out_mesh, material=material) 696 | 697 | ## rendering body 698 | img = img.detach().cpu().numpy() 699 | H, W, _ = img.shape 700 | 701 | scene = pyrender.Scene(bg_color=[0.0, 0.0, 0.0, 0.0], 702 | ambient_light=(0.3, 0.3, 0.3)) 703 | scene.add(camera, pose=camera_pose) 704 | scene.add(light, pose=camera_pose) 705 | # for node in light_nodes: 706 | # scene.add_node(node) 707 | 708 | scene.add(body_mesh, 'mesh') 709 | 710 | r = pyrender.OffscreenRenderer(viewport_width=W, 711 | viewport_height=H, 712 | point_size=1.0) 713 | color, _ = r.render(scene, flags=pyrender.RenderFlags.RGBA) 714 | color = color.astype(np.float32) / 255.0 715 | 716 | valid_mask = (color[:, :, -1] > 0)[:, :, np.newaxis] 717 | input_img = img 718 | output_img = (color[:, :, :-1] * valid_mask + 719 | (1 - valid_mask) * input_img) 720 | 721 | img = pil_img.fromarray((output_img * 255).astype(np.uint8)) 722 | img.save(out_img_fn) 723 | 724 | ##redering body+scene 725 | body_mesh = pyrender.Mesh.from_trimesh( 726 | out_mesh, material=material) 727 | static_scene = trimesh.load(osp.join(scene_dir, scene_name + '.ply')) 728 | trans = np.linalg.inv(cam2world) 729 | static_scene.apply_transform(trans) 730 | 731 | static_scene_mesh = pyrender.Mesh.from_trimesh( 732 | static_scene) 733 | 734 | scene = pyrender.Scene() 735 | scene.add(camera, pose=camera_pose) 736 | scene.add(light, pose=camera_pose) 737 | 738 | scene.add(static_scene_mesh, 'mesh') 739 | scene.add(body_mesh, 'mesh') 740 | 741 | r = pyrender.OffscreenRenderer(viewport_width=W, 742 | viewport_height=H) 743 | color, _ = r.render(scene) 744 | color = color.astype(np.float32) / 255.0 745 | img = pil_img.fromarray((color * 255).astype(np.uint8)) 746 | img.save(body_scene_rendering_fn) 747 | -------------------------------------------------------------------------------- /prox/fitting.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems and the Max Planck Institute for Biological 14 | # Cybernetics. All rights reserved. 15 | # 16 | # Contact: ps-license@tuebingen.mpg.de 17 | 18 | from __future__ import absolute_import 19 | from __future__ import print_function 20 | from __future__ import division 21 | 22 | import open3d as o3d 23 | import sys 24 | import numpy as np 25 | 26 | import torch 27 | import torch.nn as nn 28 | import torch.nn.functional as F 29 | 30 | from psbody.mesh.visibility import visibility_compute 31 | from psbody.mesh import Mesh 32 | 33 | import misc_utils as utils 34 | import dist_chamfer as ext 35 | distChamfer = ext.chamferDist() 36 | 37 | @torch.no_grad() 38 | def guess_init(model, 39 | joints_2d, 40 | edge_idxs, 41 | focal_length=5000, 42 | pose_embedding=None, 43 | vposer=None, 44 | use_vposer=True, 45 | dtype=torch.float32, 46 | model_type='smpl', 47 | **kwargs): 48 | ''' Initializes the camera translation vector 49 | 50 | Parameters 51 | ---------- 52 | model: nn.Module 53 | The PyTorch module of the body 54 | joints_2d: torch.tensor 1xJx2 55 | The 2D tensor of the joints 56 | edge_idxs: list of lists 57 | A list of pairs, each of which represents a limb used to estimate 58 | the camera translation 59 | focal_length: float, optional (default = 5000) 60 | The focal length of the camera 61 | pose_embedding: torch.tensor 1x32 62 | The tensor that contains the embedding of V-Poser that is used to 63 | generate the pose of the model 64 | dtype: torch.dtype, optional (torch.float32) 65 | The floating point type used 66 | vposer: nn.Module, optional (None) 67 | The PyTorch module that implements the V-Poser decoder 68 | Returns 69 | ------- 70 | init_t: torch.tensor 1x3, dtype = torch.float32 71 | The vector with the estimated camera location 72 | 73 | ''' 74 | 75 | body_pose = vposer.decode( 76 | pose_embedding, output_type='aa').view(1, -1) if use_vposer else None 77 | if use_vposer and model_type == 'smpl': 78 | wrist_pose = torch.zeros([body_pose.shape[0], 6], 79 | dtype=body_pose.dtype, 80 | device=body_pose.device) 81 | body_pose = torch.cat([body_pose, wrist_pose], dim=1) 82 | 83 | output = model(body_pose=body_pose, return_verts=False, 84 | return_full_pose=False) 85 | joints_3d = output.joints 86 | joints_2d = joints_2d.to(device=joints_3d.device) 87 | 88 | diff3d = [] 89 | diff2d = [] 90 | for edge in edge_idxs: 91 | diff3d.append(joints_3d[:, edge[0]] - joints_3d[:, edge[1]]) 92 | diff2d.append(joints_2d[:, edge[0]] - joints_2d[:, edge[1]]) 93 | 94 | diff3d = torch.stack(diff3d, dim=1) 95 | diff2d = torch.stack(diff2d, dim=1) 96 | 97 | length_2d = diff2d.pow(2).sum(dim=-1).sqrt() 98 | length_3d = diff3d.pow(2).sum(dim=-1).sqrt() 99 | 100 | height2d = length_2d.mean(dim=1) 101 | height3d = length_3d.mean(dim=1) 102 | 103 | est_d = focal_length * (height3d / height2d) 104 | 105 | # just set the z value 106 | batch_size = joints_3d.shape[0] 107 | x_coord = torch.zeros([batch_size], device=joints_3d.device, 108 | dtype=dtype) 109 | y_coord = x_coord.clone() 110 | init_t = torch.stack([x_coord, y_coord, est_d], dim=1) 111 | return init_t 112 | 113 | 114 | class FittingMonitor(object): 115 | def __init__(self, summary_steps=1, visualize=False, 116 | maxiters=100, ftol=2e-09, gtol=1e-05, 117 | body_color=(1.0, 1.0, 0.9, 1.0), 118 | model_type='smpl', 119 | viz_mode='mv', 120 | **kwargs): 121 | super(FittingMonitor, self).__init__() 122 | 123 | self.maxiters = maxiters 124 | self.ftol = ftol 125 | self.gtol = gtol 126 | 127 | self.summary_steps = summary_steps 128 | self.body_color = body_color 129 | self.model_type = model_type 130 | 131 | self.visualize = visualize 132 | self.viz_mode = viz_mode 133 | 134 | def __enter__(self): 135 | self.steps = 0 136 | if self.visualize: 137 | if self.viz_mode == 'o3d': 138 | self.vis_o3d = o3d.Visualizer() 139 | self.vis_o3d.create_window() 140 | self.body_o3d = o3d.TriangleMesh() 141 | self.scan = o3d.PointCloud() 142 | else: 143 | self.mv = MeshViewer(body_color=self.body_color) 144 | return self 145 | 146 | def __exit__(self, exception_type, exception_value, traceback): 147 | if self.visualize: 148 | if self.viz_mode == 'o3d': 149 | self.vis_o3d.close() 150 | else: 151 | self.mv.close_viewer() 152 | 153 | def set_colors(self, vertex_color): 154 | batch_size = self.colors.shape[0] 155 | 156 | self.colors = np.tile( 157 | np.array(vertex_color).reshape(1, 3), 158 | [batch_size, 1]) 159 | 160 | def run_fitting(self, optimizer, closure, params, body_model, 161 | use_vposer=True, pose_embedding=None, vposer=None, 162 | **kwargs): 163 | ''' Helper function for running an optimization process 164 | Parameters 165 | ---------- 166 | optimizer: torch.optim.Optimizer 167 | The PyTorch optimizer object 168 | closure: function 169 | The function used to calculate the gradients 170 | params: list 171 | List containing the parameters that will be optimized 172 | body_model: nn.Module 173 | The body model PyTorch module 174 | use_vposer: bool 175 | Flag on whether to use VPoser (default=True). 176 | pose_embedding: torch.tensor, BxN 177 | The tensor that contains the latent pose variable. 178 | vposer: nn.Module 179 | The VPoser module 180 | Returns 181 | ------- 182 | loss: float 183 | The final loss value 184 | ''' 185 | append_wrists = self.model_type == 'smpl' and use_vposer 186 | prev_loss = None 187 | for n in range(self.maxiters): 188 | loss = optimizer.step(closure) 189 | 190 | if torch.isnan(loss).sum() > 0: 191 | print('NaN loss value, stopping!') 192 | break 193 | 194 | if torch.isinf(loss).sum() > 0: 195 | print('Infinite loss value, stopping!') 196 | break 197 | 198 | if n > 0 and prev_loss is not None and self.ftol > 0: 199 | loss_rel_change = utils.rel_change(prev_loss, loss.item()) 200 | 201 | if loss_rel_change <= self.ftol: 202 | break 203 | 204 | if all([torch.abs(var.grad.view(-1).max()).item() < self.gtol 205 | for var in params if var.grad is not None]): 206 | break 207 | prev_loss = loss.item() 208 | 209 | return prev_loss 210 | 211 | def create_fitting_closure(self, 212 | optimizer, body_model, camera=None, 213 | gt_joints=None, loss=None, 214 | joints_conf=None, 215 | joint_weights=None, 216 | return_verts=True, return_full_pose=False, 217 | use_vposer=False, vposer=None, 218 | pose_embedding=None, 219 | scan_tensor=None, 220 | create_graph=False, 221 | **kwargs): 222 | faces_tensor = body_model.faces_tensor.view(-1) 223 | append_wrists = self.model_type == 'smpl' and use_vposer 224 | 225 | def fitting_func(backward=True): 226 | if backward: 227 | optimizer.zero_grad() 228 | 229 | body_pose = vposer.decode( 230 | pose_embedding, output_type='aa').view( 231 | 1, -1) if use_vposer else None 232 | 233 | if append_wrists: 234 | wrist_pose = torch.zeros([body_pose.shape[0], 6], 235 | dtype=body_pose.dtype, 236 | device=body_pose.device) 237 | body_pose = torch.cat([body_pose, wrist_pose], dim=1) 238 | 239 | body_model_output = body_model(return_verts=return_verts, 240 | body_pose=body_pose, 241 | return_full_pose=return_full_pose) 242 | total_loss = loss(body_model_output, camera=camera, 243 | gt_joints=gt_joints, 244 | body_model=body_model, 245 | body_model_faces=faces_tensor, 246 | joints_conf=joints_conf, 247 | joint_weights=joint_weights, 248 | pose_embedding=pose_embedding, 249 | use_vposer=use_vposer, 250 | scan_tensor=scan_tensor, 251 | visualize=self.visualize, 252 | **kwargs) 253 | 254 | if backward: 255 | total_loss.backward(create_graph=create_graph) 256 | 257 | 258 | if self.visualize: 259 | model_output = body_model(return_verts=True, 260 | body_pose=body_pose) 261 | vertices = model_output.vertices.detach().cpu().numpy() 262 | 263 | if self.steps == 0 and self.viz_mode == 'o3d': 264 | 265 | self.body_o3d.vertices = o3d.Vector3dVector(vertices.squeeze()) 266 | self.body_o3d.triangles = o3d.Vector3iVector(body_model.faces) 267 | self.body_o3d.vertex_normals = o3d.Vector3dVector([]) 268 | self.body_o3d.triangle_normals = o3d.Vector3dVector([]) 269 | self.body_o3d.compute_vertex_normals() 270 | self.vis_o3d.add_geometry(self.body_o3d) 271 | 272 | if scan_tensor is not None: 273 | self.scan.points = o3d.Vector3dVector(scan_tensor.detach().cpu().numpy().squeeze()) 274 | N = np.asarray(self.scan.points).shape[0] 275 | self.scan.colors = o3d.Vector3dVector(np.tile([1.00, 0.75, 0.80], [N, 1])) 276 | self.vis_o3d.add_geometry(self.scan) 277 | 278 | self.vis_o3d.update_geometry() 279 | self.vis_o3d.poll_events() 280 | self.vis_o3d.update_renderer() 281 | elif self.steps % self.summary_steps == 0: 282 | if self.viz_mode == 'o3d': 283 | self.body_o3d.vertices = o3d.Vector3dVector(vertices.squeeze()) 284 | self.body_o3d.triangles = o3d.Vector3iVector(body_model.faces) 285 | self.body_o3d.vertex_normals = o3d.Vector3dVector([]) 286 | self.body_o3d.triangle_normals = o3d.Vector3dVector([]) 287 | self.body_o3d.compute_vertex_normals() 288 | 289 | self.vis_o3d.update_geometry() 290 | self.vis_o3d.poll_events() 291 | self.vis_o3d.update_renderer() 292 | else: 293 | self.mv.update_mesh(vertices.squeeze(), 294 | body_model.faces) 295 | self.steps += 1 296 | 297 | return total_loss 298 | 299 | return fitting_func 300 | 301 | 302 | def create_loss(loss_type='smplify', **kwargs): 303 | if loss_type == 'smplify': 304 | return SMPLifyLoss(**kwargs) 305 | elif loss_type == 'camera_init': 306 | return SMPLifyCameraInitLoss(**kwargs) 307 | else: 308 | raise ValueError('Unknown loss type: {}'.format(loss_type)) 309 | 310 | 311 | class SMPLifyLoss(nn.Module): 312 | 313 | def __init__(self, search_tree=None, 314 | pen_distance=None, tri_filtering_module=None, 315 | rho=100, 316 | body_pose_prior=None, 317 | shape_prior=None, 318 | expr_prior=None, 319 | angle_prior=None, 320 | jaw_prior=None, 321 | use_joints_conf=True, 322 | use_face=True, use_hands=True, 323 | left_hand_prior=None, right_hand_prior=None, 324 | interpenetration=True, dtype=torch.float32, 325 | data_weight=1.0, 326 | body_pose_weight=0.0, 327 | shape_weight=0.0, 328 | bending_prior_weight=0.0, 329 | hand_prior_weight=0.0, 330 | expr_prior_weight=0.0, jaw_prior_weight=0.0, 331 | coll_loss_weight=0.0, 332 | s2m=False, 333 | m2s=False, 334 | rho_s2m=1, 335 | rho_m2s=1, 336 | s2m_weight=0.0, 337 | m2s_weight=0.0, 338 | head_mask=None, 339 | body_mask=None, 340 | sdf_penetration=False, 341 | voxel_size=None, 342 | grid_min=None, 343 | grid_max=None, 344 | sdf=None, 345 | sdf_normals=None, 346 | sdf_penetration_weight=0.0, 347 | R=None, 348 | t=None, 349 | contact=False, 350 | contact_loss_weight=0.0, 351 | contact_verts_ids=None, 352 | rho_contact=0.0, 353 | contact_angle=0.0, 354 | **kwargs): 355 | 356 | super(SMPLifyLoss, self).__init__() 357 | 358 | self.use_joints_conf = use_joints_conf 359 | self.angle_prior = angle_prior 360 | 361 | self.robustifier = utils.GMoF(rho=rho) 362 | self.rho = rho 363 | 364 | self.s2m = s2m 365 | self.m2s = m2s 366 | self.s2m_robustifier = utils.GMoF(rho=rho_s2m) 367 | self.m2s_robustifier = utils.GMoF(rho=rho_m2s) 368 | 369 | self.body_pose_prior = body_pose_prior 370 | 371 | self.shape_prior = shape_prior 372 | 373 | self.body_mask = body_mask 374 | self.head_mask = head_mask 375 | 376 | self.R = R 377 | self.t = t 378 | 379 | self.interpenetration = interpenetration 380 | if self.interpenetration: 381 | self.search_tree = search_tree 382 | self.tri_filtering_module = tri_filtering_module 383 | self.pen_distance = pen_distance 384 | 385 | 386 | self.use_hands = use_hands 387 | if self.use_hands: 388 | self.left_hand_prior = left_hand_prior 389 | self.right_hand_prior = right_hand_prior 390 | 391 | self.use_face = use_face 392 | if self.use_face: 393 | self.expr_prior = expr_prior 394 | self.jaw_prior = jaw_prior 395 | 396 | self.register_buffer('data_weight', 397 | torch.tensor(data_weight, dtype=dtype)) 398 | self.register_buffer('body_pose_weight', 399 | torch.tensor(body_pose_weight, dtype=dtype)) 400 | self.register_buffer('shape_weight', 401 | torch.tensor(shape_weight, dtype=dtype)) 402 | self.register_buffer('bending_prior_weight', 403 | torch.tensor(bending_prior_weight, dtype=dtype)) 404 | if self.use_hands: 405 | self.register_buffer('hand_prior_weight', 406 | torch.tensor(hand_prior_weight, dtype=dtype)) 407 | if self.use_face: 408 | self.register_buffer('expr_prior_weight', 409 | torch.tensor(expr_prior_weight, dtype=dtype)) 410 | self.register_buffer('jaw_prior_weight', 411 | torch.tensor(jaw_prior_weight, dtype=dtype)) 412 | if self.interpenetration: 413 | self.register_buffer('coll_loss_weight', 414 | torch.tensor(coll_loss_weight, dtype=dtype)) 415 | 416 | self.register_buffer('s2m_weight', 417 | torch.tensor(s2m_weight, dtype=dtype)) 418 | self.register_buffer('m2s_weight', 419 | torch.tensor(m2s_weight, dtype=dtype)) 420 | 421 | self.sdf_penetration = sdf_penetration 422 | if self.sdf_penetration: 423 | self.sdf = sdf 424 | self.sdf_normals = sdf_normals 425 | self.voxel_size = voxel_size 426 | self.grid_min = grid_min 427 | self.grid_max = grid_max 428 | self.register_buffer('sdf_penetration_weight', 429 | torch.tensor(sdf_penetration_weight, dtype=dtype)) 430 | self.contact = contact 431 | if self.contact: 432 | self.contact_verts_ids = contact_verts_ids 433 | self.rho_contact = rho_contact 434 | self.contact_angle = contact_angle 435 | self.register_buffer('contact_loss_weight', 436 | torch.tensor(contact_loss_weight, dtype=dtype)) 437 | self.contact_robustifier = utils.GMoF_unscaled(rho=self.rho_contact) 438 | 439 | def reset_loss_weights(self, loss_weight_dict): 440 | for key in loss_weight_dict: 441 | if hasattr(self, key): 442 | weight_tensor = getattr(self, key) 443 | if 'torch.Tensor' in str(type(loss_weight_dict[key])): 444 | weight_tensor = loss_weight_dict[key].clone().detach() 445 | else: 446 | weight_tensor = torch.tensor(loss_weight_dict[key], 447 | dtype=weight_tensor.dtype, 448 | device=weight_tensor.device) 449 | setattr(self, key, weight_tensor) 450 | 451 | def forward(self, body_model_output, camera, gt_joints, joints_conf, 452 | body_model_faces, joint_weights, 453 | use_vposer=False, pose_embedding=None, 454 | scan_tensor=None, visualize=False, 455 | scene_v=None, scene_vn=None, scene_f=None,ftov=None, 456 | **kwargs): 457 | projected_joints = camera(body_model_output.joints) 458 | # Calculate the weights for each joints 459 | weights = (joint_weights * joints_conf 460 | if self.use_joints_conf else 461 | joint_weights).unsqueeze(dim=-1) 462 | 463 | # Calculate the distance of the projected joints from 464 | # the ground truth 2D detections 465 | joint_diff = self.robustifier(gt_joints - projected_joints) 466 | joint_loss = (torch.sum(weights ** 2 * joint_diff) * 467 | self.data_weight ** 2) 468 | 469 | # Calculate the loss from the Pose prior 470 | if use_vposer: 471 | pprior_loss = (pose_embedding.pow(2).sum() * 472 | self.body_pose_weight ** 2) 473 | else: 474 | pprior_loss = torch.sum(self.body_pose_prior( 475 | body_model_output.body_pose, 476 | body_model_output.betas)) * self.body_pose_weight ** 2 477 | 478 | shape_loss = torch.sum(self.shape_prior( 479 | body_model_output.betas)) * self.shape_weight ** 2 480 | # Calculate the prior over the joint rotations. This a heuristic used 481 | # to prevent extreme rotation of the elbows and knees 482 | body_pose = body_model_output.full_pose[:, 3:66] 483 | angle_prior_loss = torch.sum( 484 | self.angle_prior(body_pose)) * self.bending_prior_weight ** 2 485 | 486 | # Apply the prior on the pose space of the hand 487 | left_hand_prior_loss, right_hand_prior_loss = 0.0, 0.0 488 | if self.use_hands and self.left_hand_prior is not None: 489 | left_hand_prior_loss = torch.sum( 490 | self.left_hand_prior( 491 | body_model_output.left_hand_pose)) * \ 492 | self.hand_prior_weight ** 2 493 | 494 | if self.use_hands and self.right_hand_prior is not None: 495 | right_hand_prior_loss = torch.sum( 496 | self.right_hand_prior( 497 | body_model_output.right_hand_pose)) * \ 498 | self.hand_prior_weight ** 2 499 | 500 | expression_loss = 0.0 501 | jaw_prior_loss = 0.0 502 | if self.use_face: 503 | expression_loss = torch.sum(self.expr_prior( 504 | body_model_output.expression)) * \ 505 | self.expr_prior_weight ** 2 506 | 507 | if hasattr(self, 'jaw_prior'): 508 | jaw_prior_loss = torch.sum( 509 | self.jaw_prior( 510 | body_model_output.jaw_pose.mul( 511 | self.jaw_prior_weight))) 512 | 513 | pen_loss = 0.0 514 | # Calculate the loss due to interpenetration 515 | if (self.interpenetration and self.coll_loss_weight.item() > 0): 516 | batch_size = projected_joints.shape[0] 517 | triangles = torch.index_select( 518 | body_model_output.vertices, 1, 519 | body_model_faces).view(batch_size, -1, 3, 3) 520 | 521 | with torch.no_grad(): 522 | collision_idxs = self.search_tree(triangles) 523 | 524 | # Remove unwanted collisions 525 | if self.tri_filtering_module is not None: 526 | collision_idxs = self.tri_filtering_module(collision_idxs) 527 | 528 | if collision_idxs.ge(0).sum().item() > 0: 529 | pen_loss = torch.sum( 530 | self.coll_loss_weight * 531 | self.pen_distance(triangles, collision_idxs)) 532 | 533 | s2m_dist = 0.0 534 | m2s_dist = 0.0 535 | # calculate the scan2mesh and mesh2scan loss from the sparse point cloud 536 | if (self.s2m or self.m2s) and ( 537 | self.s2m_weight > 0 or self.m2s_weight > 0) and scan_tensor is not None: 538 | vertices_np = body_model_output.vertices.detach().cpu().numpy().squeeze() 539 | body_faces_np = body_model_faces.detach().cpu().numpy().reshape(-1, 3) 540 | m = Mesh(v=vertices_np, f=body_faces_np) 541 | 542 | (vis, n_dot) = visibility_compute(v=m.v, f=m.f, cams=np.array([[0.0, 0.0, 0.0]])) 543 | vis = vis.squeeze() 544 | 545 | if self.s2m and self.s2m_weight > 0 and vis.sum() > 0: 546 | s2m_dist, _, _, _ = distChamfer(scan_tensor, 547 | body_model_output.vertices[:, np.where(vis > 0)[0], :]) 548 | s2m_dist = self.s2m_robustifier(s2m_dist.sqrt()) 549 | s2m_dist = self.s2m_weight * s2m_dist.sum() 550 | if self.m2s and self.m2s_weight > 0 and vis.sum() > 0: 551 | _, m2s_dist, _, _ = distChamfer(scan_tensor, 552 | body_model_output.vertices[:, np.where(np.logical_and(vis > 0, self.body_mask))[0], :]) 553 | 554 | m2s_dist = self.m2s_robustifier(m2s_dist.sqrt()) 555 | m2s_dist = self.m2s_weight * m2s_dist.sum() 556 | 557 | # Transform vertices to world coordinates 558 | if self.R is not None and self.t is not None: 559 | vertices = body_model_output.vertices 560 | nv = vertices.shape[1] 561 | vertices.squeeze_() 562 | vertices = self.R.mm(vertices.t()).t() + self.t.repeat([nv, 1]) 563 | vertices.unsqueeze_(0) 564 | 565 | # Compute scene penetration using signed distance field (SDF) 566 | sdf_penetration_loss = 0.0 567 | if self.sdf_penetration and self.sdf_penetration_weight > 0: 568 | grid_dim = self.sdf.shape[0] 569 | sdf_ids = torch.round( 570 | (vertices.squeeze() - self.grid_min) / self.voxel_size).to(dtype=torch.long) 571 | sdf_ids.clamp_(min=0, max=grid_dim-1) 572 | 573 | norm_vertices = (vertices - self.grid_min) / (self.grid_max - self.grid_min) * 2 - 1 574 | body_sdf = F.grid_sample(self.sdf.view(1, 1, grid_dim, grid_dim, grid_dim), 575 | norm_vertices[:, :, [2, 1, 0]].view(1, nv, 1, 1, 3), 576 | padding_mode='border') 577 | sdf_normals = self.sdf_normals[sdf_ids[:,0], sdf_ids[:,1], sdf_ids[:,2]] 578 | # if there are no penetrating vertices then set sdf_penetration_loss = 0 579 | if body_sdf.lt(0).sum().item() < 1: 580 | sdf_penetration_loss = torch.tensor(0.0, dtype=joint_loss.dtype, device=joint_loss.device) 581 | else: 582 | if sdf_normals is None: 583 | sdf_penetration_loss = self.sdf_penetration_weight * (body_sdf[body_sdf < 0].unsqueeze(dim=-1).abs()).pow(2).sum(dim=-1).sqrt().sum() 584 | else: 585 | sdf_penetration_loss = self.sdf_penetration_weight * (body_sdf[body_sdf < 0].unsqueeze(dim=-1).abs() * sdf_normals[body_sdf.view(-1) < 0, :]).pow(2).sum(dim=-1).sqrt().sum() 586 | 587 | # Compute the contact loss 588 | contact_loss = 0.0 589 | if self.contact and self.contact_loss_weight >0: 590 | # select contact vertices 591 | contact_body_vertices = vertices[:, self.contact_verts_ids, :] 592 | contact_dist, _, idx1, _ = distChamfer( 593 | contact_body_vertices.contiguous(), scene_v) 594 | 595 | body_triangles = torch.index_select( 596 | vertices, 1, 597 | body_model_faces).view(1, -1, 3, 3) 598 | # Calculate the edges of the triangles 599 | # Size: BxFx3 600 | edge0 = body_triangles[:, :, 1] - body_triangles[:, :, 0] 601 | edge1 = body_triangles[:, :, 2] - body_triangles[:, :, 0] 602 | # Compute the cross product of the edges to find the normal vector of 603 | # the triangle 604 | body_normals = torch.cross(edge0, edge1, dim=2) 605 | # Normalize the result to get a unit vector 606 | body_normals = body_normals / \ 607 | torch.norm(body_normals, 2, dim=2, keepdim=True) 608 | # compute the vertex normals 609 | body_v_normals = torch.mm(ftov, body_normals.squeeze()) 610 | body_v_normals = body_v_normals / \ 611 | torch.norm(body_v_normals, 2, dim=1, keepdim=True) 612 | 613 | # vertix normals of contact vertices 614 | contact_body_verts_normals = body_v_normals[self.contact_verts_ids, :] 615 | # scene normals of the closest points on the scene surface to the contact vertices 616 | contact_scene_normals = scene_vn[:, idx1.squeeze().to( 617 | dtype=torch.long), :].squeeze() 618 | 619 | # compute the angle between contact_verts normals and scene normals 620 | angles = torch.asin( 621 | torch.norm(torch.cross(contact_body_verts_normals, contact_scene_normals), 2, dim=1, keepdim=True)) *180 / np.pi 622 | 623 | # consider only the vertices which their normals match 624 | valid_contact_mask = (angles.le(self.contact_angle) + angles.ge(180 - self.contact_angle)).ge(1) 625 | valid_contact_ids = valid_contact_mask.squeeze().nonzero().squeeze() 626 | 627 | contact_dist = self.contact_robustifier(contact_dist[:, valid_contact_ids].sqrt()) 628 | contact_loss = self.contact_loss_weight * contact_dist.mean() 629 | 630 | total_loss = (joint_loss + pprior_loss + shape_loss + 631 | angle_prior_loss + pen_loss + 632 | jaw_prior_loss + expression_loss + 633 | left_hand_prior_loss + right_hand_prior_loss + m2s_dist + s2m_dist 634 | + sdf_penetration_loss + contact_loss) 635 | if visualize: 636 | print('total:{:.2f}, joint_loss:{:0.2f}, s2m:{:0.2f}, m2s:{:0.2f}, penetration:{:0.2f}, contact:{:0.2f}'. 637 | format(total_loss.item(), joint_loss.item() ,torch.tensor(s2m_dist).item(), 638 | torch.tensor(m2s_dist).item() ,torch.tensor(sdf_penetration_loss).item(), torch.tensor(contact_loss).item())) 639 | return total_loss 640 | 641 | 642 | class SMPLifyCameraInitLoss(nn.Module): 643 | 644 | def __init__(self, init_joints_idxs, trans_estimation=None, 645 | reduction='sum', 646 | data_weight=1.0, 647 | depth_loss_weight=1e2, 648 | camera_mode='moving', 649 | dtype=torch.float32, 650 | **kwargs): 651 | super(SMPLifyCameraInitLoss, self).__init__() 652 | self.dtype = dtype 653 | self.camera_mode = camera_mode 654 | 655 | if trans_estimation is not None: 656 | self.register_buffer( 657 | 'trans_estimation', 658 | utils.to_tensor(trans_estimation, dtype=dtype)) 659 | else: 660 | self.trans_estimation = trans_estimation 661 | 662 | self.register_buffer('data_weight', 663 | torch.tensor(data_weight, dtype=dtype)) 664 | self.register_buffer( 665 | 'init_joints_idxs', 666 | utils.to_tensor(init_joints_idxs, dtype=torch.long)) 667 | self.register_buffer('depth_loss_weight', 668 | torch.tensor(depth_loss_weight, dtype=dtype)) 669 | 670 | def reset_loss_weights(self, loss_weight_dict): 671 | for key in loss_weight_dict: 672 | if hasattr(self, key): 673 | weight_tensor = getattr(self, key) 674 | weight_tensor = torch.tensor(loss_weight_dict[key], 675 | dtype=weight_tensor.dtype, 676 | device=weight_tensor.device) 677 | setattr(self, key, weight_tensor) 678 | 679 | def forward(self, body_model_output, camera, gt_joints, body_model, 680 | **kwargs): 681 | 682 | projected_joints = camera(body_model_output.joints) 683 | 684 | joint_error = torch.pow( 685 | torch.index_select(gt_joints, 1, self.init_joints_idxs) - 686 | torch.index_select(projected_joints, 1, self.init_joints_idxs), 687 | 2) 688 | joint_loss = torch.sum(joint_error) * self.data_weight ** 2 689 | 690 | depth_loss = 0.0 691 | if (self.depth_loss_weight.item() > 0 and self.trans_estimation is not 692 | None): 693 | if self.camera_mode == 'moving': 694 | depth_loss = self.depth_loss_weight ** 2 * torch.sum(( 695 | camera.translation[:, 696 | 2] - self.trans_estimation[:, 2]).pow(2)) 697 | elif self.camera_mode == 'fixed': 698 | depth_loss = self.depth_loss_weight ** 2 * torch.sum(( 699 | body_model.transl[:, 2] - self.trans_estimation[:, 2]).pow(2)) 700 | 701 | 702 | 703 | return joint_loss + depth_loss 704 | -------------------------------------------------------------------------------- /prox/main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems and the Max Planck Institute for Biological 14 | # Cybernetics. All rights reserved. 15 | # 16 | # Contact: ps-license@tuebingen.mpg.de 17 | 18 | from __future__ import absolute_import 19 | from __future__ import print_function 20 | from __future__ import division 21 | 22 | import sys 23 | import os 24 | 25 | import os.path as osp 26 | 27 | import time 28 | import yaml 29 | import open3d as o3d 30 | import torch 31 | 32 | import smplx 33 | 34 | 35 | from misc_utils import JointMapper 36 | from cmd_parser import parse_config 37 | from data_parser import create_dataset 38 | from fit_single_frame import fit_single_frame 39 | 40 | from camera import create_camera 41 | from prior import create_prior 42 | 43 | torch.backends.cudnn.enabled = False 44 | 45 | def main(**args): 46 | data_folder = args.get('recording_dir') 47 | recording_name = osp.basename(args.get('recording_dir')) 48 | scene_name = recording_name.split("_")[0] 49 | base_dir = os.path.abspath(osp.join(args.get('recording_dir'), os.pardir, os.pardir)) 50 | keyp_dir = osp.join(base_dir, 'keypoints') 51 | keyp_folder = osp.join(keyp_dir, recording_name) 52 | cam2world_dir = osp.join(base_dir, 'cam2world') 53 | scene_dir = osp.join(base_dir, 'scenes') 54 | calib_dir = osp.join(base_dir, 'calibration') 55 | sdf_dir = osp.join(base_dir, 'sdf') 56 | body_segments_dir = osp.join(base_dir, 'body_segments') 57 | 58 | 59 | output_folder = args.get('output_folder') 60 | output_folder = osp.expandvars(output_folder) 61 | output_folder = osp.join(output_folder, recording_name) 62 | if not osp.exists(output_folder): 63 | os.makedirs(output_folder) 64 | 65 | # Store the arguments for the current experiment 66 | conf_fn = osp.join(output_folder, 'conf.yaml') 67 | with open(conf_fn, 'w') as conf_file: 68 | yaml.dump(args, conf_file) 69 | #remove 'output_folder' from args list 70 | args.pop('output_folder') 71 | 72 | result_folder = args.pop('result_folder', 'results') 73 | result_folder = osp.join(output_folder, result_folder) 74 | if not osp.exists(result_folder): 75 | os.makedirs(result_folder) 76 | 77 | mesh_folder = args.pop('mesh_folder', 'meshes') 78 | mesh_folder = osp.join(output_folder, mesh_folder) 79 | if not osp.exists(mesh_folder): 80 | os.makedirs(mesh_folder) 81 | 82 | out_img_folder = osp.join(output_folder, 'images') 83 | if not osp.exists(out_img_folder): 84 | os.makedirs(out_img_folder) 85 | 86 | body_scene_rendering_dir = os.path.join(output_folder, 'renderings') 87 | if not osp.exists(body_scene_rendering_dir): 88 | os.mkdir(body_scene_rendering_dir) 89 | 90 | float_dtype = args['float_dtype'] 91 | if float_dtype == 'float64': 92 | dtype = torch.float64 93 | elif float_dtype == 'float32': 94 | dtype = torch.float64 95 | else: 96 | print('Unknown float type {}, exiting!'.format(float_dtype)) 97 | sys.exit(-1) 98 | 99 | use_cuda = args.get('use_cuda', True) 100 | if use_cuda and not torch.cuda.is_available(): 101 | print('CUDA is not available, exiting!') 102 | sys.exit(-1) 103 | 104 | img_folder = args.pop('img_folder', 'Color') 105 | dataset_obj = create_dataset(img_folder=img_folder,data_folder=data_folder, keyp_folder=keyp_folder, calib_dir=calib_dir,**args) 106 | 107 | start = time.time() 108 | 109 | input_gender = args.pop('gender', 'neutral') 110 | gender_lbl_type = args.pop('gender_lbl_type', 'none') 111 | max_persons = args.pop('max_persons', -1) 112 | 113 | float_dtype = args.get('float_dtype', 'float32') 114 | if float_dtype == 'float64': 115 | dtype = torch.float64 116 | elif float_dtype == 'float32': 117 | dtype = torch.float32 118 | else: 119 | raise ValueError('Unknown float type {}, exiting!'.format(float_dtype)) 120 | 121 | joint_mapper = JointMapper(dataset_obj.get_model2data()) 122 | 123 | model_params = dict(model_path=args.get('model_folder'), 124 | joint_mapper=joint_mapper, 125 | create_global_orient=True, 126 | create_body_pose=not args.get('use_vposer'), 127 | create_betas=True, 128 | create_left_hand_pose=True, 129 | create_right_hand_pose=True, 130 | create_expression=True, 131 | create_jaw_pose=True, 132 | create_leye_pose=True, 133 | create_reye_pose=True, 134 | create_transl=True, 135 | dtype=dtype, 136 | **args) 137 | 138 | male_model = smplx.create(gender='male', **model_params) 139 | # SMPL-H has no gender-neutral model 140 | if args.get('model_type') != 'smplh': 141 | neutral_model = smplx.create(gender='neutral', **model_params) 142 | female_model = smplx.create(gender='female', **model_params) 143 | 144 | # Create the camera object 145 | camera_center = None \ 146 | if args.get('camera_center_x') is None or args.get('camera_center_y') is None \ 147 | else torch.tensor([args.get('camera_center_x'), args.get('camera_center_y')], dtype=dtype).view(-1, 2) 148 | camera = create_camera(focal_length_x=args.get('focal_length_x'), 149 | focal_length_y=args.get('focal_length_y'), 150 | center= camera_center, 151 | batch_size=args.get('batch_size'), 152 | dtype=dtype) 153 | 154 | if hasattr(camera, 'rotation'): 155 | camera.rotation.requires_grad = False 156 | 157 | use_hands = args.get('use_hands', True) 158 | use_face = args.get('use_face', True) 159 | 160 | body_pose_prior = create_prior( 161 | prior_type=args.get('body_prior_type'), 162 | dtype=dtype, 163 | **args) 164 | 165 | jaw_prior, expr_prior = None, None 166 | if use_face: 167 | jaw_prior = create_prior( 168 | prior_type=args.get('jaw_prior_type'), 169 | dtype=dtype, 170 | **args) 171 | expr_prior = create_prior( 172 | prior_type=args.get('expr_prior_type', 'l2'), 173 | dtype=dtype, **args) 174 | 175 | left_hand_prior, right_hand_prior = None, None 176 | if use_hands: 177 | lhand_args = args.copy() 178 | lhand_args['num_gaussians'] = args.get('num_pca_comps') 179 | left_hand_prior = create_prior( 180 | prior_type=args.get('left_hand_prior_type'), 181 | dtype=dtype, 182 | use_left_hand=True, 183 | **lhand_args) 184 | 185 | rhand_args = args.copy() 186 | rhand_args['num_gaussians'] = args.get('num_pca_comps') 187 | right_hand_prior = create_prior( 188 | prior_type=args.get('right_hand_prior_type'), 189 | dtype=dtype, 190 | use_right_hand=True, 191 | **rhand_args) 192 | 193 | shape_prior = create_prior( 194 | prior_type=args.get('shape_prior_type', 'l2'), 195 | dtype=dtype, **args) 196 | 197 | angle_prior = create_prior(prior_type='angle', dtype=dtype) 198 | 199 | if use_cuda and torch.cuda.is_available(): 200 | device = torch.device('cuda') 201 | 202 | camera = camera.to(device=device) 203 | female_model = female_model.to(device=device) 204 | male_model = male_model.to(device=device) 205 | if args.get('model_type') != 'smplh': 206 | neutral_model = neutral_model.to(device=device) 207 | body_pose_prior = body_pose_prior.to(device=device) 208 | angle_prior = angle_prior.to(device=device) 209 | shape_prior = shape_prior.to(device=device) 210 | if use_face: 211 | expr_prior = expr_prior.to(device=device) 212 | jaw_prior = jaw_prior.to(device=device) 213 | if use_hands: 214 | left_hand_prior = left_hand_prior.to(device=device) 215 | right_hand_prior = right_hand_prior.to(device=device) 216 | else: 217 | device = torch.device('cpu') 218 | 219 | # A weight for every joint of the model 220 | joint_weights = dataset_obj.get_joint_weights().to(device=device, 221 | dtype=dtype) 222 | # Add a fake batch dimension for broadcasting 223 | joint_weights.unsqueeze_(dim=0) 224 | 225 | for idx, data in enumerate(dataset_obj): 226 | 227 | img = data['img'] 228 | fn = data['fn'] 229 | keypoints = data['keypoints'] 230 | depth_im = data['depth_im'] 231 | mask = data['mask'] 232 | init_trans = None if data['init_trans'] is None else torch.tensor(data['init_trans'], dtype=dtype).view(-1,3) 233 | scan = data['scan_dict'] 234 | print('Processing: {}'.format(data['img_path'])) 235 | 236 | curr_result_folder = osp.join(result_folder, fn) 237 | if not osp.exists(curr_result_folder): 238 | os.makedirs(curr_result_folder) 239 | curr_mesh_folder = osp.join(mesh_folder, fn) 240 | if not osp.exists(curr_mesh_folder): 241 | os.makedirs(curr_mesh_folder) 242 | #TODO: SMPLifyD and PROX won't work for multiple persons 243 | for person_id in range(keypoints.shape[0]): 244 | if person_id >= max_persons and max_persons > 0: 245 | continue 246 | 247 | curr_result_fn = osp.join(curr_result_folder, 248 | '{:03d}.pkl'.format(person_id)) 249 | curr_mesh_fn = osp.join(curr_mesh_folder, 250 | '{:03d}.ply'.format(person_id)) 251 | curr_body_scene_rendering_fn = osp.join(body_scene_rendering_dir, fn + '.png') 252 | 253 | curr_img_folder = osp.join(output_folder, 'images', fn, 254 | '{:03d}'.format(person_id)) 255 | if not osp.exists(curr_img_folder): 256 | os.makedirs(curr_img_folder) 257 | 258 | if gender_lbl_type != 'none': 259 | if gender_lbl_type == 'pd' and 'gender_pd' in data: 260 | gender = data['gender_pd'][person_id] 261 | if gender_lbl_type == 'gt' and 'gender_gt' in data: 262 | gender = data['gender_gt'][person_id] 263 | else: 264 | gender = input_gender 265 | 266 | if gender == 'neutral': 267 | body_model = neutral_model 268 | elif gender == 'female': 269 | body_model = female_model 270 | elif gender == 'male': 271 | body_model = male_model 272 | 273 | out_img_fn = osp.join(curr_img_folder, 'output.png') 274 | 275 | fit_single_frame(img, keypoints[[person_id]], init_trans, scan, 276 | cam2world_dir=cam2world_dir, 277 | scene_dir=scene_dir, 278 | sdf_dir=sdf_dir, 279 | body_segments_dir=body_segments_dir, 280 | scene_name=scene_name, 281 | body_model=body_model, 282 | camera=camera, 283 | joint_weights=joint_weights, 284 | dtype=dtype, 285 | output_folder=output_folder, 286 | result_folder=curr_result_folder, 287 | out_img_fn=out_img_fn, 288 | result_fn=curr_result_fn, 289 | mesh_fn=curr_mesh_fn, 290 | body_scene_rendering_fn=curr_body_scene_rendering_fn, 291 | shape_prior=shape_prior, 292 | expr_prior=expr_prior, 293 | body_pose_prior=body_pose_prior, 294 | left_hand_prior=left_hand_prior, 295 | right_hand_prior=right_hand_prior, 296 | jaw_prior=jaw_prior, 297 | angle_prior=angle_prior, 298 | **args) 299 | 300 | elapsed = time.time() - start 301 | time_msg = time.strftime('%H hours, %M minutes, %S seconds', 302 | time.gmtime(elapsed)) 303 | print('Processing the data took: {}'.format(time_msg)) 304 | 305 | 306 | if __name__ == "__main__": 307 | args = parse_config() 308 | main(**args) 309 | -------------------------------------------------------------------------------- /prox/misc_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems and the Max Planck Institute for Biological 14 | # Cybernetics. All rights reserved. 15 | # 16 | # Contact: ps-license@tuebingen.mpg.de 17 | 18 | from __future__ import absolute_import 19 | from __future__ import print_function 20 | from __future__ import division 21 | 22 | 23 | import numpy as np 24 | 25 | import torch 26 | import torch.nn as nn 27 | 28 | import pyrender 29 | 30 | def to_tensor(tensor, dtype=torch.float32): 31 | if torch.Tensor == type(tensor): 32 | return tensor.clone().detach() 33 | else: 34 | return torch.tensor(tensor, dtype) 35 | 36 | 37 | def rel_change(prev_val, curr_val): 38 | return (prev_val - curr_val) / max([np.abs(prev_val), np.abs(curr_val), 1]) 39 | 40 | 41 | def max_grad_change(grad_arr): 42 | return grad_arr.abs().max() 43 | 44 | 45 | class JointMapper(nn.Module): 46 | def __init__(self, joint_maps=None): 47 | super(JointMapper, self).__init__() 48 | if joint_maps is None: 49 | self.joint_maps = joint_maps 50 | else: 51 | self.register_buffer('joint_maps', 52 | torch.tensor(joint_maps, dtype=torch.long)) 53 | 54 | def forward(self, joints, **kwargs): 55 | if self.joint_maps is None: 56 | return joints 57 | else: 58 | return torch.index_select(joints, 1, self.joint_maps) 59 | 60 | 61 | class GMoF(nn.Module): 62 | def __init__(self, rho=1): 63 | super(GMoF, self).__init__() 64 | self.rho = rho 65 | 66 | def extra_repr(self): 67 | return 'rho = {}'.format(self.rho) 68 | 69 | def forward(self, residual): 70 | squared_res = residual ** 2 71 | dist = torch.div(squared_res, squared_res + self.rho ** 2) 72 | return self.rho ** 2 * dist 73 | 74 | class GMoF_unscaled(nn.Module): 75 | def __init__(self, rho=1): 76 | super(GMoF_unscaled, self).__init__() 77 | self.rho = rho 78 | 79 | def extra_repr(self): 80 | return 'rho = {}'.format(self.rho) 81 | 82 | def forward(self, residual): 83 | squared_res = residual ** 2 84 | dist = torch.div(squared_res, squared_res + self.rho ** 2) 85 | return dist 86 | 87 | def smpl_to_openpose(model_type='smplx', use_hands=True, use_face=True, 88 | use_face_contour=False, openpose_format='coco25'): 89 | ''' Returns the indices of the permutation that maps OpenPose to SMPL 90 | 91 | Parameters 92 | ---------- 93 | model_type: str, optional 94 | The type of SMPL-like model that is used. The default mapping 95 | returned is for the SMPLX model 96 | use_hands: bool, optional 97 | Flag for adding to the returned permutation the mapping for the 98 | hand keypoints. Defaults to True 99 | use_face: bool, optional 100 | Flag for adding to the returned permutation the mapping for the 101 | face keypoints. Defaults to True 102 | use_face_contour: bool, optional 103 | Flag for appending the facial contour keypoints. Defaults to False 104 | openpose_format: bool, optional 105 | The output format of OpenPose. For now only COCO-25 and COCO-19 is 106 | supported. Defaults to 'coco25' 107 | 108 | ''' 109 | if openpose_format.lower() == 'coco25': 110 | if model_type == 'smpl': 111 | return np.array([24, 12, 17, 19, 21, 16, 18, 20, 0, 2, 5, 8, 1, 4, 112 | 7, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34], 113 | dtype=np.int32) 114 | elif model_type == 'smplh': 115 | body_mapping = np.array([52, 12, 17, 19, 21, 16, 18, 20, 0, 2, 5, 116 | 8, 1, 4, 7, 53, 54, 55, 56, 57, 58, 59, 117 | 60, 61, 62], dtype=np.int32) 118 | mapping = [body_mapping] 119 | if use_hands: 120 | lhand_mapping = np.array([20, 34, 35, 36, 63, 22, 23, 24, 64, 121 | 25, 26, 27, 65, 31, 32, 33, 66, 28, 122 | 29, 30, 67], dtype=np.int32) 123 | rhand_mapping = np.array([21, 49, 50, 51, 68, 37, 38, 39, 69, 124 | 40, 41, 42, 70, 46, 47, 48, 71, 43, 125 | 44, 45, 72], dtype=np.int32) 126 | mapping += [lhand_mapping, rhand_mapping] 127 | return np.concatenate(mapping) 128 | # SMPLX 129 | elif model_type == 'smplx': 130 | body_mapping = np.array([55, 12, 17, 19, 21, 16, 18, 20, 0, 2, 5, 131 | 8, 1, 4, 7, 56, 57, 58, 59, 60, 61, 62, 132 | 63, 64, 65], dtype=np.int32) 133 | mapping = [body_mapping] 134 | if use_hands: 135 | lhand_mapping = np.array([20, 37, 38, 39, 66, 25, 26, 27, 136 | 67, 28, 29, 30, 68, 34, 35, 36, 69, 137 | 31, 32, 33, 70], dtype=np.int32) 138 | rhand_mapping = np.array([21, 52, 53, 54, 71, 40, 41, 42, 72, 139 | 43, 44, 45, 73, 49, 50, 51, 74, 46, 140 | 47, 48, 75], dtype=np.int32) 141 | 142 | mapping += [lhand_mapping, rhand_mapping] 143 | if use_face: 144 | # end_idx = 127 + 17 * use_face_contour 145 | face_mapping = np.arange(76, 127 + 17 * use_face_contour, 146 | dtype=np.int32) 147 | mapping += [face_mapping] 148 | 149 | return np.concatenate(mapping) 150 | else: 151 | raise ValueError('Unknown model type: {}'.format(model_type)) 152 | elif openpose_format == 'coco19': 153 | if model_type == 'smpl': 154 | return np.array([24, 12, 17, 19, 21, 16, 18, 20, 0, 2, 5, 8, 155 | 1, 4, 7, 25, 26, 27, 28], 156 | dtype=np.int32) 157 | elif model_type == 'smplh': 158 | body_mapping = np.array([52, 12, 17, 19, 21, 16, 18, 20, 0, 2, 5, 159 | 8, 1, 4, 7, 53, 54, 55, 56], 160 | dtype=np.int32) 161 | mapping = [body_mapping] 162 | if use_hands: 163 | lhand_mapping = np.array([20, 34, 35, 36, 57, 22, 23, 24, 58, 164 | 25, 26, 27, 59, 31, 32, 33, 60, 28, 165 | 29, 30, 61], dtype=np.int32) 166 | rhand_mapping = np.array([21, 49, 50, 51, 62, 37, 38, 39, 63, 167 | 40, 41, 42, 64, 46, 47, 48, 65, 43, 168 | 44, 45, 66], dtype=np.int32) 169 | mapping += [lhand_mapping, rhand_mapping] 170 | return np.concatenate(mapping) 171 | # SMPLX 172 | elif model_type == 'smplx': 173 | body_mapping = np.array([55, 12, 17, 19, 21, 16, 18, 20, 0, 2, 5, 174 | 8, 1, 4, 7, 56, 57, 58, 59], 175 | dtype=np.int32) 176 | mapping = [body_mapping] 177 | if use_hands: 178 | lhand_mapping = np.array([20, 37, 38, 39, 60, 25, 26, 27, 179 | 61, 28, 29, 30, 62, 34, 35, 36, 63, 180 | 31, 32, 33, 64], dtype=np.int32) 181 | rhand_mapping = np.array([21, 52, 53, 54, 65, 40, 41, 42, 66, 182 | 43, 44, 45, 67, 49, 50, 51, 68, 46, 183 | 47, 48, 69], dtype=np.int32) 184 | 185 | mapping += [lhand_mapping, rhand_mapping] 186 | if use_face: 187 | face_mapping = np.arange(70, 70 + 51 + 188 | 17 * use_face_contour, 189 | dtype=np.int32) 190 | mapping += [face_mapping] 191 | 192 | return np.concatenate(mapping) 193 | else: 194 | raise ValueError('Unknown model type: {}'.format(model_type)) 195 | else: 196 | raise ValueError('Unknown joint format: {}'.format(openpose_format)) 197 | -------------------------------------------------------------------------------- /prox/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems and the Max Planck Institute for Biological 14 | # Cybernetics. All rights reserved. 15 | # 16 | # Contact: ps-license@tuebingen.mpg.de 17 | -------------------------------------------------------------------------------- /prox/optimizers/lbfgs_ls.py: -------------------------------------------------------------------------------- 1 | # PyTorch implementation of L-BFGS with Strong Wolfe line search 2 | # Will be removed once https://github.com/pytorch/pytorch/pull/8824 3 | # is merged 4 | 5 | import torch 6 | from functools import reduce 7 | 8 | from torch.optim import Optimizer 9 | 10 | 11 | def _cubic_interpolate(x1, f1, g1, x2, f2, g2, bounds=None): 12 | # ported from https://github.com/torch/optim/blob/master/polyinterp.lua 13 | # Compute bounds of interpolation area 14 | if bounds is not None: 15 | xmin_bound, xmax_bound = bounds 16 | else: 17 | xmin_bound, xmax_bound = (x1, x2) if x1 <= x2 else (x2, x1) 18 | 19 | # Code for most common case: cubic interpolation of 2 points 20 | # w/ function and derivative values for both 21 | # Solution in this case (where x2 is the farthest point): 22 | # d1 = g1 + g2 - 3*(f1-f2)/(x1-x2); 23 | # d2 = sqrt(d1^2 - g1*g2); 24 | # min_pos = x2 - (x2 - x1)*((g2 + d2 - d1)/(g2 - g1 + 2*d2)); 25 | # t_new = min(max(min_pos,xmin_bound),xmax_bound); 26 | d1 = g1 + g2 - 3 * (f1 - f2) / (x1 - x2) 27 | d2_square = d1 ** 2 - g1 * g2 28 | if d2_square >= 0: 29 | d2 = d2_square.sqrt() 30 | if x1 <= x2: 31 | min_pos = x2 - (x2 - x1) * ((g2 + d2 - d1) / (g2 - g1 + 2 * d2)) 32 | else: 33 | min_pos = x1 - (x1 - x2) * ((g1 + d2 - d1) / (g1 - g2 + 2 * d2)) 34 | return min(max(min_pos, xmin_bound), xmax_bound) 35 | else: 36 | return (xmin_bound + xmax_bound) / 2. 37 | 38 | 39 | def _strong_Wolfe(obj_func, x, t, d, f, g, gtd, c1=1e-4, c2=0.9, tolerance_change=1e-9, 40 | max_iter=20, 41 | max_ls=25): 42 | # ported from https://github.com/torch/optim/blob/master/lswolfe.lua 43 | d_norm = d.abs().max() 44 | g = g.clone() 45 | # evaluate objective and gradient using initial step 46 | f_new, g_new = obj_func(x, t, d) 47 | ls_func_evals = 1 48 | gtd_new = g_new.dot(d) 49 | 50 | # bracket an interval containing a point satisfying the Wolfe criteria 51 | t_prev, f_prev, g_prev, gtd_prev = 0, f, g, gtd 52 | done = False 53 | ls_iter = 0 54 | while ls_iter < max_ls: 55 | # check conditions 56 | if f_new > (f + c1 * t * gtd) or (ls_iter > 1 and f_new >= f_prev): 57 | bracket = [t_prev, t] 58 | bracket_f = [f_prev, f_new] 59 | bracket_g = [g_prev, g_new.clone()] 60 | bracket_gtd = [gtd_prev, gtd_new] 61 | break 62 | 63 | if abs(gtd_new) <= -c2 * gtd: 64 | bracket = [t] 65 | bracket_f = [f_new] 66 | bracket_g = [g_new] 67 | bracket_gtd = [gtd_new] 68 | done = True 69 | break 70 | 71 | if gtd_new >= 0: 72 | bracket = [t_prev, t] 73 | bracket_f = [f_prev, f_new] 74 | bracket_g = [g_prev, g_new.clone()] 75 | bracket_gtd = [gtd_prev, gtd_new] 76 | break 77 | 78 | # interpolate 79 | min_step = t + 0.01 * (t - t_prev) 80 | max_step = t * 10 81 | tmp = t 82 | t = _cubic_interpolate(t_prev, f_prev, gtd_prev, t, f_new, gtd_new, 83 | bounds=(min_step, max_step)) 84 | 85 | # next step 86 | t_prev = tmp 87 | f_prev = f_new 88 | g_prev = g_new.clone() 89 | gtd_prev = gtd_new 90 | f_new, g_new = obj_func(x, t, d) 91 | ls_func_evals += 1 92 | gtd_new = g_new.dot(d) 93 | ls_iter += 1 94 | 95 | # reached max number of iterations? 96 | if ls_iter == max_ls: 97 | bracket = [0, t] 98 | bracket_f = [f, f_new] 99 | bracket_g = [g, g_new] 100 | bracket_gtd = [gtd, gtd_new] 101 | 102 | # zoom phase: we now have a point satisfying the criteria, or 103 | # a bracket around it. We refine the bracket until we find the 104 | # exact point satisfying the criteria 105 | insuf_progress = False 106 | # find high and low points in bracket 107 | low_pos, high_pos = (0, 1) if bracket_f[0] <= bracket_f[-1] else (1, 0) 108 | while not done and ls_iter < max_iter: 109 | # compute new trial value 110 | t = _cubic_interpolate(bracket[0], bracket_f[0], bracket_gtd[0], 111 | bracket[1], bracket_f[1], bracket_gtd[1]) 112 | 113 | # test what we are making sufficient progress 114 | eps = 0.1 * (max(bracket) - min(bracket)) 115 | if min(max(bracket) - t, t - min(bracket)) < eps: 116 | # interpolation close to boundary 117 | if insuf_progress or t >= max(bracket) or t <= min(bracket): 118 | # evaluate at 0.1 away from boundary 119 | if abs(t - max(bracket)) < abs(t - min(bracket)): 120 | t = max(bracket) - eps 121 | else: 122 | t = min(bracket) + eps 123 | insuf_progress = False 124 | else: 125 | insuf_progress = True 126 | else: 127 | insuf_progress = False 128 | 129 | # Evaluate new point 130 | f_new, g_new = obj_func(x, t, d) 131 | ls_func_evals += 1 132 | gtd_new = g_new.dot(d) 133 | ls_iter += 1 134 | 135 | if f_new > (f + c1 * t * gtd) or f_new >= bracket_f[low_pos]: 136 | # Armijo condition not satisfied or not lower than lowest point 137 | bracket[high_pos] = t 138 | bracket_f[high_pos] = f_new 139 | bracket_g[high_pos] = g_new.clone() 140 | bracket_gtd[high_pos] = gtd_new 141 | low_pos, high_pos = (0, 1) if bracket_f[0] <= bracket_f[1] else (1, 0) 142 | else: 143 | if abs(gtd_new) <= -c2 * gtd: 144 | # Wolfe conditions satisfied 145 | done = True 146 | elif gtd_new * (bracket[high_pos] - bracket[low_pos]) >= 0: 147 | # old high becomes new low 148 | bracket[high_pos] = bracket[low_pos] 149 | bracket_f[high_pos] = bracket_f[low_pos] 150 | bracket_g[high_pos] = bracket_g[low_pos] 151 | bracket_gtd[high_pos] = bracket_gtd[low_pos] 152 | 153 | # new point becomes new low 154 | bracket[low_pos] = t 155 | bracket_f[low_pos] = f_new 156 | bracket_g[low_pos] = g_new.clone() 157 | bracket_gtd[low_pos] = gtd_new 158 | 159 | # line-search bracket is so small 160 | if abs(bracket[1] - bracket[0]) * d_norm < tolerance_change: 161 | break 162 | 163 | # return stuff 164 | t = bracket[low_pos] 165 | f_new = bracket_f[low_pos] 166 | g_new = bracket_g[low_pos] 167 | return f_new, g_new, t, ls_func_evals 168 | 169 | 170 | # LBFGS with strong Wolfe line search introduces in PR #8824 171 | # Will be removed once merged with master 172 | class LBFGS(Optimizer): 173 | """Implements L-BFGS algorithm, heavily inspired by `minFunc 174 | `. 175 | .. warning:: 176 | This optimizer doesn't support per-parameter options and parameter 177 | groups (there can be only one). 178 | .. warning:: 179 | Right now all parameters have to be on a single device. This will be 180 | improved in the future. 181 | .. note:: 182 | This is a very memory intensive optimizer (it requires additional 183 | ``param_bytes * (history_size + 1)`` bytes). If it doesn't fit in memory 184 | try reducing the history size, or use a different algorithm. 185 | Arguments: 186 | lr (float): learning rate (default: 1) 187 | max_iter (int): maximal number of iterations per optimization step 188 | (default: 20) 189 | max_eval (int): maximal number of function evaluations per optimization 190 | step (default: max_iter * 1.25). 191 | tolerance_grad (float): termination tolerance on first order optimality 192 | (default: 1e-5). 193 | tolerance_change (float): termination tolerance on function 194 | value/parameter changes (default: 1e-9). 195 | history_size (int): update history size (default: 100). 196 | line_search_fn (str): either 'strong_Wolfe' or None (default: None). 197 | """ 198 | 199 | def __init__(self, params, lr=1, max_iter=20, max_eval=None, 200 | tolerance_grad=1e-5, tolerance_change=1e-9, history_size=100, 201 | line_search_fn=None): 202 | if max_eval is None: 203 | max_eval = max_iter * 5 // 4 204 | defaults = dict(lr=lr, max_iter=max_iter, max_eval=max_eval, 205 | tolerance_grad=tolerance_grad, tolerance_change=tolerance_change, 206 | history_size=history_size, line_search_fn=line_search_fn) 207 | super(LBFGS, self).__init__(params, defaults) 208 | 209 | if len(self.param_groups) != 1: 210 | raise ValueError("LBFGS doesn't support per-parameter options " 211 | "(parameter groups)") 212 | 213 | self._params = self.param_groups[0]['params'] 214 | self._numel_cache = None 215 | 216 | def _numel(self): 217 | if self._numel_cache is None: 218 | self._numel_cache = reduce(lambda total, p: total + p.numel(), self._params, 0) 219 | return self._numel_cache 220 | 221 | def _gather_flat_grad(self): 222 | views = [] 223 | for p in self._params: 224 | if p.grad is None: 225 | view = p.new(p.numel()).zero_() 226 | elif p.grad.is_sparse: 227 | view = p.grad.to_dense().view(-1) 228 | else: 229 | view = p.grad.view(-1) 230 | views.append(view) 231 | return torch.cat(views, 0) 232 | 233 | def _add_grad(self, step_size, update): 234 | offset = 0 235 | for p in self._params: 236 | numel = p.numel() 237 | # view as to avoid deprecated pointwise semantics 238 | p.data.add_(step_size, update[offset:offset + numel].view_as(p.data)) 239 | offset += numel 240 | assert offset == self._numel() 241 | 242 | def _clone_param(self): 243 | return [p.clone() for p in self._params] 244 | 245 | def _set_param(self, params_data): 246 | for p, pdata in zip(self._params, params_data): 247 | p.data.copy_(pdata) 248 | 249 | def _directional_evaluate(self, closure, x, t, d): 250 | self._add_grad(t, d) 251 | loss = float(closure()) 252 | flat_grad = self._gather_flat_grad() 253 | self._set_param(x) 254 | return loss, flat_grad 255 | 256 | def step(self, closure): 257 | """Performs a single optimization step. 258 | Arguments: 259 | closure (callable): A closure that reevaluates the model 260 | and returns the loss. 261 | """ 262 | assert len(self.param_groups) == 1 263 | 264 | group = self.param_groups[0] 265 | lr = group['lr'] 266 | max_iter = group['max_iter'] 267 | max_eval = group['max_eval'] 268 | tolerance_grad = group['tolerance_grad'] 269 | tolerance_change = group['tolerance_change'] 270 | line_search_fn = group['line_search_fn'] 271 | history_size = group['history_size'] 272 | 273 | # NOTE: LBFGS has only global state, but we register it as state for 274 | # the first param, because this helps with casting in load_state_dict 275 | state = self.state[self._params[0]] 276 | state.setdefault('func_evals', 0) 277 | state.setdefault('n_iter', 0) 278 | 279 | # evaluate initial f(x) and df/dx 280 | orig_loss = closure() 281 | loss = float(orig_loss) 282 | current_evals = 1 283 | state['func_evals'] += 1 284 | 285 | flat_grad = self._gather_flat_grad() 286 | opt_cond = flat_grad.abs().max() <= tolerance_grad 287 | 288 | # optimal condition 289 | if opt_cond: 290 | return orig_loss 291 | 292 | # tensors cached in state (for tracing) 293 | d = state.get('d') 294 | t = state.get('t') 295 | old_dirs = state.get('old_dirs') 296 | old_stps = state.get('old_stps') 297 | ro = state.get('ro') 298 | H_diag = state.get('H_diag') 299 | prev_flat_grad = state.get('prev_flat_grad') 300 | prev_loss = state.get('prev_loss') 301 | 302 | n_iter = 0 303 | # optimize for a max of max_iter iterations 304 | while n_iter < max_iter: 305 | # keep track of nb of iterations 306 | n_iter += 1 307 | state['n_iter'] += 1 308 | 309 | ############################################################ 310 | # compute gradient descent direction 311 | ############################################################ 312 | if state['n_iter'] == 1: 313 | d = flat_grad.neg() 314 | old_dirs = [] 315 | old_stps = [] 316 | ro = [] 317 | H_diag = 1 318 | else: 319 | # do lbfgs update (update memory) 320 | y = flat_grad.sub(prev_flat_grad) 321 | s = d.mul(t) 322 | ys = y.dot(s) # y*s 323 | if ys > 1e-10: 324 | # updating memory 325 | if len(old_dirs) == history_size: 326 | # shift history by one (limited-memory) 327 | old_dirs.pop(0) 328 | old_stps.pop(0) 329 | ro.pop(0) 330 | 331 | # store new direction/step 332 | old_dirs.append(y) 333 | old_stps.append(s) 334 | ro.append(1. / ys) 335 | 336 | # update scale of initial Hessian approximation 337 | H_diag = ys / y.dot(y) # (y*y) 338 | 339 | # compute the approximate (L-BFGS) inverse Hessian 340 | # multiplied by the gradient 341 | num_old = len(old_dirs) 342 | 343 | if 'al' not in state: 344 | state['al'] = [None] * history_size 345 | al = state['al'] 346 | 347 | # iteration in L-BFGS loop collapsed to use just one buffer 348 | q = flat_grad.neg() 349 | for i in range(num_old - 1, -1, -1): 350 | al[i] = old_stps[i].dot(q) * ro[i] 351 | q.add_(-al[i], old_dirs[i]) 352 | 353 | # multiply by initial Hessian 354 | # r/d is the final direction 355 | d = r = torch.mul(q, H_diag) 356 | for i in range(num_old): 357 | be_i = old_dirs[i].dot(r) * ro[i] 358 | r.add_(al[i] - be_i, old_stps[i]) 359 | 360 | if prev_flat_grad is None: 361 | prev_flat_grad = flat_grad.clone() 362 | else: 363 | prev_flat_grad.copy_(flat_grad) 364 | prev_loss = loss 365 | 366 | ############################################################ 367 | # compute step length 368 | ############################################################ 369 | # reset initial guess for step size 370 | if state['n_iter'] == 1: 371 | t = min(1., 1. / flat_grad.abs().sum()) * lr 372 | else: 373 | t = lr 374 | 375 | # directional derivative 376 | gtd = flat_grad.dot(d) # g * d 377 | 378 | # directional derivative is below tolerance 379 | if gtd > -tolerance_change: 380 | break 381 | 382 | # optional line search: user function 383 | ls_func_evals = 0 384 | if line_search_fn is not None: 385 | # perform line search, using user function 386 | if line_search_fn != "strong_Wolfe": 387 | raise RuntimeError("only 'strong_Wolfe' is supported") 388 | else: 389 | x_init = self._clone_param() 390 | 391 | def obj_func(x, t, d): 392 | return self._directional_evaluate(closure, x, t, d) 393 | loss, flat_grad, t, ls_func_evals = _strong_Wolfe(obj_func, x_init, t, d, 394 | loss, 395 | flat_grad, 396 | gtd, 397 | max_iter=max_iter) 398 | self._add_grad(t, d) 399 | opt_cond = flat_grad.abs().max() <= tolerance_grad 400 | else: 401 | # no line search, simply move with fixed-step 402 | self._add_grad(t, d) 403 | if n_iter != max_iter: 404 | # re-evaluate function only if not in last iteration 405 | # the reason we do this: in a stochastic setting, 406 | # no use to re-evaluate that function here 407 | loss = float(closure()) 408 | flat_grad = self._gather_flat_grad() 409 | opt_cond = flat_grad.abs().max() <= tolerance_grad 410 | ls_func_evals = 1 411 | 412 | # update func eval 413 | current_evals += ls_func_evals 414 | state['func_evals'] += ls_func_evals 415 | 416 | ############################################################ 417 | # check conditions 418 | ############################################################ 419 | if n_iter == max_iter: 420 | break 421 | 422 | if current_evals >= max_eval: 423 | break 424 | 425 | # optimal condition 426 | if opt_cond: 427 | break 428 | 429 | # lack of progress 430 | if d.mul(t).abs().max() <= tolerance_change: 431 | break 432 | 433 | if abs(loss - prev_loss) < tolerance_change: 434 | break 435 | 436 | state['d'] = d 437 | state['t'] = t 438 | state['old_dirs'] = old_dirs 439 | state['old_stps'] = old_stps 440 | state['ro'] = ro 441 | state['H_diag'] = H_diag 442 | state['prev_flat_grad'] = prev_flat_grad 443 | state['prev_loss'] = prev_loss 444 | 445 | return orig_loss 446 | -------------------------------------------------------------------------------- /prox/optimizers/optim_factory.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems and the Max Planck Institute for Biological 14 | # Cybernetics. All rights reserved. 15 | # 16 | # Contact: ps-license@tuebingen.mpg.de 17 | 18 | 19 | from __future__ import absolute_import 20 | from __future__ import print_function 21 | from __future__ import division 22 | 23 | import torch.optim as optim 24 | from .lbfgs_ls import LBFGS as LBFGSLs 25 | 26 | 27 | def create_optimizer(parameters, optim_type='lbfgs', 28 | lr=1e-3, 29 | momentum=0.9, 30 | use_nesterov=True, 31 | beta1=0.9, 32 | beta2=0.999, 33 | epsilon=1e-8, 34 | use_locking=False, 35 | weight_decay=0.0, 36 | centered=False, 37 | rmsprop_alpha=0.99, 38 | maxiters=20, 39 | gtol=1e-6, 40 | ftol=1e-9, 41 | **kwargs): 42 | ''' Creates the optimizer 43 | ''' 44 | if optim_type == 'adam': 45 | return (optim.Adam(parameters, lr=lr, betas=(beta1, beta2), 46 | weight_decay=weight_decay), 47 | False) 48 | elif optim_type == 'lbfgs': 49 | return (optim.LBFGS(parameters, lr=lr, max_iter=maxiters), False) 50 | elif optim_type == 'lbfgsls': 51 | return LBFGSLs(parameters, lr=lr, max_iter=maxiters, 52 | line_search_fn='strong_Wolfe'), False 53 | elif optim_type == 'rmsprop': 54 | return (optim.RMSprop(parameters, lr=lr, epsilon=epsilon, 55 | alpha=rmsprop_alpha, 56 | weight_decay=weight_decay, 57 | momentum=momentum, centered=centered), 58 | False) 59 | elif optim_type == 'sgd': 60 | return (optim.SGD(parameters, lr=lr, momentum=momentum, 61 | weight_decay=weight_decay, 62 | nesterov=use_nesterov), 63 | False) 64 | else: 65 | raise ValueError('Optimizer {} not supported!'.format(optim_type)) 66 | -------------------------------------------------------------------------------- /prox/prior.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems and the Max Planck Institute for Biological 14 | # Cybernetics. All rights reserved. 15 | # 16 | # Contact: ps-license@tuebingen.mpg.de 17 | 18 | from __future__ import absolute_import 19 | from __future__ import print_function 20 | from __future__ import division 21 | 22 | import sys 23 | import os 24 | 25 | import time 26 | import pickle 27 | 28 | import numpy as np 29 | 30 | import torch 31 | import torch.nn as nn 32 | 33 | DEFAULT_DTYPE = torch.float32 34 | 35 | 36 | def create_prior(prior_type, **kwargs): 37 | if prior_type == 'gmm': 38 | prior = MaxMixturePrior(**kwargs) 39 | elif prior_type == 'l2': 40 | return L2Prior(**kwargs) 41 | elif prior_type == 'angle': 42 | return SMPLifyAnglePrior(**kwargs) 43 | elif prior_type == 'none' or prior_type is None: 44 | # Don't use any pose prior 45 | def no_prior(*args, **kwargs): 46 | return 0.0 47 | prior = no_prior 48 | else: 49 | raise ValueError('Prior {}'.format(prior_type) + ' is not implemented') 50 | return prior 51 | 52 | 53 | class SMPLifyAnglePrior(nn.Module): 54 | def __init__(self, dtype=torch.float32, **kwargs): 55 | super(SMPLifyAnglePrior, self).__init__() 56 | 57 | # Indices for the roration angle of 58 | # 55: left elbow, 90deg bend at -np.pi/2 59 | # 58: right elbow, 90deg bend at np.pi/2 60 | # 12: left knee, 90deg bend at np.pi/2 61 | # 15: right knee, 90deg bend at np.pi/2 62 | angle_prior_idxs = np.array([55, 58, 12, 15], dtype=np.int64) 63 | angle_prior_idxs = torch.tensor(angle_prior_idxs, dtype=torch.long) 64 | self.register_buffer('angle_prior_idxs', angle_prior_idxs) 65 | 66 | angle_prior_signs = np.array([1, -1, -1, -1], 67 | dtype=np.float32 if dtype == torch.float32 68 | else np.float64) 69 | angle_prior_signs = torch.tensor(angle_prior_signs, 70 | dtype=dtype) 71 | self.register_buffer('angle_prior_signs', angle_prior_signs) 72 | 73 | def forward(self, pose, with_global_pose=False): 74 | ''' Returns the angle prior loss for the given pose 75 | 76 | Args: 77 | pose: (Bx[23 + 1] * 3) torch tensor with the axis-angle 78 | representation of the rotations of the joints of the SMPL model. 79 | Kwargs: 80 | with_global_pose: Whether the pose vector also contains the global 81 | orientation of the SMPL model. If not then the indices must be 82 | corrected. 83 | Returns: 84 | A sze (B) tensor containing the angle prior loss for each element 85 | in the batch. 86 | ''' 87 | angle_prior_idxs = self.angle_prior_idxs - (not with_global_pose) * 3 88 | return torch.exp(pose[:, angle_prior_idxs] * 89 | self.angle_prior_signs)#.pow(2) 90 | 91 | 92 | class L2Prior(nn.Module): 93 | def __init__(self, dtype=DEFAULT_DTYPE, reduction='sum', **kwargs): 94 | super(L2Prior, self).__init__() 95 | 96 | def forward(self, module_input, *args): 97 | return torch.sum(module_input.pow(2)) 98 | 99 | 100 | class MaxMixturePrior(nn.Module): 101 | 102 | def __init__(self, prior_folder='prior', 103 | num_gaussians=6, dtype=DEFAULT_DTYPE, epsilon=1e-16, 104 | use_merged=True, 105 | **kwargs): 106 | super(MaxMixturePrior, self).__init__() 107 | 108 | if dtype == DEFAULT_DTYPE: 109 | np_dtype = np.float32 110 | elif dtype == torch.float64: 111 | np_dtype = np.float64 112 | else: 113 | print('Unknown float type {}, exiting!'.format(dtype)) 114 | sys.exit(-1) 115 | 116 | self.num_gaussians = num_gaussians 117 | self.epsilon = epsilon 118 | self.use_merged = use_merged 119 | gmm_fn = 'gmm_{:02d}.pkl'.format(num_gaussians) 120 | 121 | full_gmm_fn = os.path.join(prior_folder, gmm_fn) 122 | if not os.path.exists(full_gmm_fn): 123 | print('The path to the mixture prior "{}"'.format(full_gmm_fn) + 124 | ' does not exist, exiting!') 125 | sys.exit(-1) 126 | 127 | with open(full_gmm_fn, 'rb') as f: 128 | gmm = pickle.load(f, encoding='latin1') 129 | 130 | if type(gmm) == dict: 131 | means = gmm['means'].astype(np_dtype) 132 | covs = gmm['covars'].astype(np_dtype) 133 | weights = gmm['weights'].astype(np_dtype) 134 | elif 'sklearn.mixture.gmm.GMM' in str(type(gmm)): 135 | means = gmm.means_.astype(np_dtype) 136 | covs = gmm.covars_.astype(np_dtype) 137 | weights = gmm.weights_.astype(np_dtype) 138 | else: 139 | print('Unknown type for the prior: {}, exiting!'.format(type(gmm))) 140 | sys.exit(-1) 141 | 142 | self.register_buffer('means', torch.tensor(means, dtype=dtype)) 143 | 144 | self.register_buffer('covs', torch.tensor(covs, dtype=dtype)) 145 | 146 | precisions = [np.linalg.inv(cov) for cov in covs] 147 | precisions = np.stack(precisions).astype(np_dtype) 148 | 149 | self.register_buffer('precisions', 150 | torch.tensor(precisions, dtype=dtype)) 151 | 152 | # The constant term: 153 | sqrdets = np.array([(np.sqrt(np.linalg.det(c))) 154 | for c in gmm['covars']]) 155 | const = (2 * np.pi)**(69 / 2.) 156 | 157 | nll_weights = np.asarray(gmm['weights'] / (const * 158 | (sqrdets / sqrdets.min()))) 159 | nll_weights = torch.tensor(nll_weights, dtype=dtype).unsqueeze(dim=0) 160 | self.register_buffer('nll_weights', nll_weights) 161 | 162 | weights = torch.tensor(gmm['weights'], dtype=dtype).unsqueeze(dim=0) 163 | self.register_buffer('weights', weights) 164 | 165 | self.register_buffer('pi_term', 166 | torch.log(torch.tensor(2 * np.pi, dtype=dtype))) 167 | 168 | cov_dets = [np.log(np.linalg.det(cov.astype(np_dtype)) + epsilon) 169 | for cov in covs] 170 | self.register_buffer('cov_dets', 171 | torch.tensor(cov_dets, dtype=dtype)) 172 | 173 | # The dimensionality of the random variable 174 | self.random_var_dim = self.means.shape[1] 175 | 176 | def get_mean(self): 177 | ''' Returns the mean of the mixture ''' 178 | mean_pose = torch.matmul(self.weights, self.means) 179 | return mean_pose 180 | 181 | def merged_log_likelihood(self, pose, betas): 182 | diff_from_mean = pose.unsqueeze(dim=1) - self.means 183 | 184 | prec_diff_prod = torch.einsum('mij,bmj->bmi', 185 | [self.precisions, diff_from_mean]) 186 | diff_prec_quadratic = (prec_diff_prod * diff_from_mean).sum(dim=-1) 187 | 188 | curr_loglikelihood = 0.5 * diff_prec_quadratic - \ 189 | torch.log(self.nll_weights) 190 | # curr_loglikelihood = 0.5 * (self.cov_dets.unsqueeze(dim=0) + 191 | # self.random_var_dim * self.pi_term + 192 | # diff_prec_quadratic 193 | # ) - torch.log(self.weights) 194 | 195 | min_likelihood, _ = torch.min(curr_loglikelihood, dim=1) 196 | return min_likelihood 197 | 198 | def log_likelihood(self, pose, betas, *args, **kwargs): 199 | ''' Create graph operation for negative log-likelihood calculation 200 | ''' 201 | likelihoods = [] 202 | 203 | for idx in range(self.num_gaussians): 204 | mean = self.means[idx] 205 | prec = self.precisions[idx] 206 | cov = self.covs[idx] 207 | diff_from_mean = pose - mean 208 | 209 | curr_loglikelihood = torch.einsum('bj,ji->bi', 210 | [diff_from_mean, prec]) 211 | curr_loglikelihood = torch.einsum('bi,bi->b', 212 | [curr_loglikelihood, 213 | diff_from_mean]) 214 | cov_term = torch.log(torch.det(cov) + self.epsilon) 215 | curr_loglikelihood += 0.5 * (cov_term + 216 | self.random_var_dim * 217 | self.pi_term) 218 | likelihoods.append(curr_loglikelihood) 219 | 220 | log_likelihoods = torch.stack(likelihoods, dim=1) 221 | min_idx = torch.argmin(log_likelihoods, dim=1) 222 | weight_component = self.nll_weights[:, min_idx] 223 | weight_component = -torch.log(weight_component) 224 | 225 | return weight_component + log_likelihoods[:, min_idx] 226 | 227 | def forward(self, pose, betas): 228 | if self.use_merged: 229 | return self.merged_log_likelihood(pose, betas) 230 | else: 231 | return self.log_likelihood(pose, betas) 232 | -------------------------------------------------------------------------------- /prox/projection_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems and the Max Planck Institute for Biological 14 | # Cybernetics. All rights reserved. 15 | # 16 | # Contact: ps-license@tuebingen.mpg.de 17 | 18 | import os.path as osp 19 | import cv2 20 | import numpy as np 21 | import json 22 | 23 | class Projection(): 24 | def __init__(self, calib_dir): 25 | with open(osp.join(calib_dir, 'IR.json'), 'r') as f: 26 | self.depth_cam = json.load(f) 27 | with open(osp.join(calib_dir, 'Color.json'), 'r') as f: 28 | self.color_cam = json.load(f) 29 | 30 | def row(self, A): 31 | return A.reshape((1, -1)) 32 | def col(self, A): 33 | return A.reshape((-1, 1)) 34 | 35 | def unproject_depth_image(self, depth_image, cam): 36 | us = np.arange(depth_image.size) % depth_image.shape[1] 37 | vs = np.arange(depth_image.size) // depth_image.shape[1] 38 | ds = depth_image.ravel() 39 | uvd = np.array(np.vstack((us.ravel(), vs.ravel(), ds.ravel())).T) 40 | #unproject 41 | xy_undistorted_camspace = cv2.undistortPoints(np.asarray(uvd[:, :2].reshape((1, -1, 2)).copy()), 42 | np.asarray(cam['camera_mtx']), np.asarray(cam['k'])) 43 | xyz_camera_space = np.hstack((xy_undistorted_camspace.squeeze(), self.col(uvd[:, 2]))) 44 | xyz_camera_space[:, :2] *= self.col(xyz_camera_space[:, 2]) # scale x,y by z 45 | other_answer = xyz_camera_space - self.row(np.asarray(cam['view_mtx'])[:, 3]) # translate 46 | xyz = other_answer.dot(np.asarray(cam['view_mtx'])[:, :3]) # rotate 47 | 48 | return xyz.reshape((depth_image.shape[0], depth_image.shape[1], -1)) 49 | 50 | def projectPoints(self, v, cam): 51 | v = v.reshape((-1,3)).copy() 52 | return cv2.projectPoints(v, np.asarray(cam['R']), np.asarray(cam['T']), np.asarray(cam['camera_mtx']), np.asarray(cam['k']))[0].squeeze() 53 | 54 | def create_scan(self, mask, depth_im, color_im=None, mask_on_color=False, coord='color', TH=1e-2, default_color=[1.00, 0.75, 0.80]): 55 | if not mask_on_color: 56 | depth_im[mask != 0] = 0 57 | if depth_im.size == 0: 58 | return {'v': []} 59 | 60 | points = self.unproject_depth_image(depth_im, self.depth_cam).reshape(-1, 3) 61 | colors = np.tile(default_color, [points.shape[0], 1]) 62 | 63 | uvs = self.projectPoints(points, self.color_cam) 64 | uvs = np.round(uvs).astype(int) 65 | valid_x = np.logical_and(uvs[:, 1] >= 0, uvs[:, 1] < 1080) 66 | valid_y = np.logical_and(uvs[:, 0] >= 0, uvs[:, 0] < 1920) 67 | valid_idx = np.logical_and(valid_x, valid_y) 68 | if mask_on_color: 69 | valid_mask_idx = valid_idx.copy() 70 | valid_mask_idx[valid_mask_idx == True] = mask[uvs[valid_idx == True][:, 1], uvs[valid_idx == True][:, 71 | 0]] == 0 72 | uvs = uvs[valid_mask_idx == True] 73 | points = points[valid_mask_idx] 74 | colors = np.tile(default_color, [points.shape[0], 1]) 75 | # colors = colors[valid_mask_idx] 76 | valid_idx = valid_mask_idx 77 | if color_im is not None: 78 | colors[:, :3] = color_im[uvs[:, 1], uvs[:, 0]] / 255.0 79 | else: 80 | uvs = uvs[valid_idx == True] 81 | if color_im is not None: 82 | colors[valid_idx == True,:3] = color_im[uvs[:, 1], uvs[:, 0]]/255.0 83 | 84 | if coord == 'color': 85 | # Transform to color camera coord 86 | T = np.concatenate([np.asarray(self.color_cam['view_mtx']), np.array([0, 0, 0, 1]).reshape(1, -1)]) 87 | stacked = np.column_stack((points, np.ones(len(points)) )) 88 | points = np.dot(T, stacked.T).T[:, :3] 89 | points = np.ascontiguousarray(points) 90 | ind = points[:, 2] > TH 91 | return {'points':points[ind], 'colors':colors[ind]} 92 | 93 | 94 | def align_color2depth(self, depth_im, color_im, interpolate=True): 95 | (w_d, h_d) = (512, 424) 96 | if interpolate: 97 | # fill depth holes to avoid black spots in aligned rgb image 98 | zero_mask = np.array(depth_im == 0.).ravel() 99 | depth_im_flat = depth_im.ravel() 100 | depth_im_flat[zero_mask] = np.interp(np.flatnonzero(zero_mask), np.flatnonzero(~zero_mask), 101 | depth_im_flat[~zero_mask]) 102 | depth_im = depth_im_flat.reshape(depth_im.shape) 103 | 104 | points = self.unproject_depth_image(depth_im, self.depth_cam).reshape(-1, 3) 105 | uvs = self.projectPoints(points, self.color_cam) 106 | uvs = np.round(uvs).astype(int) 107 | valid_x = np.logical_and(uvs[:, 1] >= 0, uvs[:, 1] < 1080) 108 | valid_y = np.logical_and(uvs[:, 0] >= 0, uvs[:, 0] < 1920) 109 | valid_idx = np.logical_and(valid_x, valid_y) 110 | uvs = uvs[valid_idx == True] 111 | aligned_color = np.zeros((h_d, w_d, 3)).astype(color_im.dtype) 112 | aligned_color[valid_idx.reshape(h_d, w_d)] = color_im[uvs[:, 1], uvs[:, 0]] 113 | 114 | return aligned_color 115 | 116 | def align_depth2color(self, depth_im, depth_raw): 117 | (w_rgb, h_rgb) = (1920, 1080) 118 | (w_d, h_d) = (512, 424) 119 | points = self.unproject_depth_image(depth_im, self.depth_cam).reshape(-1, 3) 120 | uvs = self.projectPoints(points, self.color_cam) 121 | uvs = np.round(uvs).astype(int) 122 | valid_x = np.logical_and(uvs[:, 1] >= 0, uvs[:, 1] < 1080) 123 | valid_y = np.logical_and(uvs[:, 0] >= 0, uvs[:, 0] < 1920) 124 | valid_idx = np.logical_and(valid_x, valid_y) 125 | uvs = uvs[valid_idx == True] 126 | 127 | aligned_depth = np.zeros((h_rgb, w_rgb)).astype('uint16') 128 | aligned_depth[uvs[:, 1], uvs[:, 0]] = depth_raw[valid_idx.reshape(h_d, w_d)] 129 | 130 | return aligned_depth 131 | -------------------------------------------------------------------------------- /prox/renderer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import cv2 4 | import numpy as np 5 | import json 6 | import trimesh 7 | import argparse 8 | os.environ["PYOPENGL_PLATFORM"] = "egl" 9 | import pyrender 10 | import PIL.Image as pil_img 11 | import pickle 12 | import smplx 13 | import torch 14 | 15 | def main(args): 16 | fitting_dir = args.fitting_dir 17 | recording_name = os.path.abspath(fitting_dir).split("/")[-1] 18 | female_subjects_ids = [162, 3452, 159, 3403] 19 | subject_id = int(recording_name.split('_')[1]) 20 | if subject_id in female_subjects_ids: 21 | gender = 'female' 22 | else: 23 | gender = 'male' 24 | pkl_files_dir = osp.join(fitting_dir, 'results') 25 | scene_name = recording_name.split("_")[0] 26 | base_dir = args.base_dir 27 | cam2world_dir = osp.join(base_dir, 'cam2world') 28 | scene_dir = osp.join(base_dir, 'scenes') 29 | recording_dir = osp.join(base_dir, 'recordings', recording_name) 30 | color_dir = os.path.join(recording_dir, 'Color') 31 | meshes_dir = os.path.join(fitting_dir, 'meshes') 32 | rendering_dir = os.path.join(fitting_dir, 'images') 33 | 34 | body_model = smplx.create(args.model_folder, model_type='smplx', 35 | gender=gender, ext='npz', 36 | num_pca_comps=args.num_pca_comps, 37 | create_global_orient=True, 38 | create_body_pose=True, 39 | create_betas=True, 40 | create_left_hand_pose=True, 41 | create_right_hand_pose=True, 42 | create_expression=True, 43 | create_jaw_pose=True, 44 | create_leye_pose=True, 45 | create_reye_pose=True, 46 | create_transl=True 47 | ) 48 | 49 | if args.rendering_mode == '3d' or args.rendering_mode == 'both': 50 | static_scene = trimesh.load(osp.join(scene_dir, scene_name + '.ply')) 51 | with open(os.path.join(cam2world_dir,scene_name + '.json'), 'r') as f: 52 | trans = np.array(json.load(f)) 53 | trans = np.linalg.inv(trans) 54 | static_scene.apply_transform(trans) 55 | 56 | body_scene_rendering_dir = os.path.join(fitting_dir, 'renderings') 57 | if not osp.exists(body_scene_rendering_dir): 58 | os.mkdir(body_scene_rendering_dir) 59 | 60 | #common 61 | H, W = 1080, 1920 62 | camera_center = np.array([951.30, 536.77]) 63 | camera_pose = np.eye(4) 64 | camera_pose = np.array([1.0, -1.0, -1.0, 1.0]).reshape(-1, 1) * camera_pose 65 | camera = pyrender.camera.IntrinsicsCamera( 66 | fx=1060.53, fy=1060.38, 67 | cx=camera_center[0], cy=camera_center[1]) 68 | light = pyrender.DirectionalLight(color=np.ones(3), intensity=2.0) 69 | 70 | for img_name in sorted(os.listdir(pkl_files_dir)): 71 | print('viz frame {}'.format(img_name)) 72 | with open(osp.join(pkl_files_dir, img_name, '000.pkl'), 'rb') as f: 73 | param = pickle.load(f) 74 | torch_param = {} 75 | for key in param.keys(): 76 | if key in ['pose_embedding', 'camera_rotation', 'camera_translation']: 77 | continue 78 | else: 79 | torch_param[key] = torch.tensor(param[key]) 80 | 81 | output = body_model(return_verts=True, **torch_param) 82 | vertices = output.vertices.detach().cpu().numpy().squeeze() 83 | body = trimesh.Trimesh(vertices, body_model.faces, process=False) 84 | if args.save_meshes: 85 | body.export(osp.join(meshes_dir,img_name, '000.ply')) 86 | 87 | material = pyrender.MetallicRoughnessMaterial( 88 | metallicFactor=0.0, 89 | alphaMode='OPAQUE', 90 | baseColorFactor=(1.0, 1.0, 0.9, 1.0)) 91 | body_mesh = pyrender.Mesh.from_trimesh( 92 | body, material=material) 93 | 94 | if args.rendering_mode == 'body' or args.rendering_mode == 'both': 95 | img = cv2.imread(os.path.join(color_dir, img_name + '.jpg'))[:, :, ::-1] / 255.0 96 | H, W, _ = img.shape 97 | img = cv2.flip(img, 1) 98 | 99 | scene = pyrender.Scene(bg_color=[0.0, 0.0, 0.0, 0.0], 100 | ambient_light=(0.3, 0.3, 0.3)) 101 | scene.add(camera, pose=camera_pose) 102 | scene.add(light, pose=camera_pose) 103 | 104 | scene.add(body_mesh, 'mesh') 105 | 106 | r = pyrender.OffscreenRenderer(viewport_width=W, 107 | viewport_height=H, 108 | point_size=1.0) 109 | color, _ = r.render(scene, flags=pyrender.RenderFlags.RGBA) 110 | color = color.astype(np.float32) / 255.0 111 | 112 | valid_mask = (color[:, :, -1] > 0)[:, :, np.newaxis] 113 | input_img = img 114 | output_img = (color[:, :, :-1] * valid_mask + 115 | (1 - valid_mask) * input_img) 116 | 117 | img = pil_img.fromarray((output_img * 255).astype(np.uint8)) 118 | img.save(os.path.join(rendering_dir, img_name, '000', 'output.png')) 119 | 120 | if args.rendering_mode == '3d' or args.rendering_mode == 'both': 121 | static_scene_mesh = pyrender.Mesh.from_trimesh( 122 | static_scene) 123 | 124 | scene = pyrender.Scene() 125 | scene.add(camera, pose=camera_pose) 126 | scene.add(light, pose=camera_pose) 127 | 128 | scene.add(static_scene_mesh, 'mesh') 129 | body_mesh = pyrender.Mesh.from_trimesh( 130 | body, material=material) 131 | scene.add(body_mesh, 'mesh') 132 | 133 | r = pyrender.OffscreenRenderer(viewport_width=W, 134 | viewport_height=H) 135 | color, _ = r.render(scene) 136 | color = color.astype(np.float32) / 255.0 137 | img = pil_img.fromarray((color * 255).astype(np.uint8)) 138 | img.save(os.path.join(body_scene_rendering_dir, img_name + '.png')) 139 | 140 | 141 | if __name__ == '__main__': 142 | parser = argparse.ArgumentParser() 143 | parser.add_argument('fitting_dir', type=str, default=os.getcwd(), 144 | help='recording dir') 145 | parser.add_argument('--base_dir', type=str, default=os.getcwd(), 146 | help='recording dir') 147 | parser.add_argument('--start', type=int, default=0, help='id of the starting frame') 148 | parser.add_argument('--step', type=int, default=1, help='id of the starting frame') 149 | parser.add_argument('--model_folder', default='models', type=str, help='') 150 | parser.add_argument('--num_pca_comps', type=int, default=12,help='') 151 | parser.add_argument('--save_meshes', type=lambda arg: arg.lower() in ['true', '1'], 152 | default=True, help='') 153 | parser.add_argument('--rendering_mode', default='both', type=str, 154 | choices=['overlay', '3d', 'both'], 155 | help='') 156 | 157 | args = parser.parse_args() 158 | main(args) 159 | -------------------------------------------------------------------------------- /prox/viz/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems and the Max Planck Institute for Biological 14 | # Cybernetics. All rights reserved. 15 | # 16 | # Contact: ps-license@tuebingen.mpg.de 17 | 18 | -------------------------------------------------------------------------------- /prox/viz/viz_fitting.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import cv2 4 | import numpy as np 5 | import json 6 | import open3d as o3d 7 | import argparse 8 | 9 | import torch 10 | import pickle 11 | import smplx 12 | 13 | def main(args): 14 | fitting_dir = args.fitting_dir 15 | recording_name = os.path.abspath(fitting_dir).split("/")[-1] 16 | fitting_dir = osp.join(fitting_dir, 'results') 17 | scene_name = recording_name.split("_")[0] 18 | base_dir = args.base_dir 19 | cam2world_dir = osp.join(base_dir, 'cam2world') 20 | scene_dir = osp.join(base_dir, 'scenes') 21 | recording_dir = osp.join(base_dir, 'recordings', recording_name) 22 | color_dir = os.path.join(recording_dir, 'Color') 23 | 24 | female_subjects_ids = [162, 3452, 159, 3403] 25 | subject_id = int(recording_name.split('_')[1]) 26 | if subject_id in female_subjects_ids: 27 | gender = 'female' 28 | else: 29 | gender = 'male' 30 | 31 | cv2.namedWindow('frame', cv2.WINDOW_NORMAL) 32 | 33 | vis = o3d.Visualizer() 34 | vis.create_window() 35 | 36 | scene = o3d.io.read_triangle_mesh(osp.join(scene_dir, scene_name + '.ply')) 37 | with open(os.path.join(cam2world_dir, scene_name + '.json'), 'r') as f: 38 | trans = np.array(json.load(f)) 39 | vis.add_geometry(scene) 40 | 41 | 42 | model = smplx.create(args.model_folder, model_type='smplx', 43 | gender=gender, ext='npz', 44 | num_pca_comps=args.num_pca_comps, 45 | create_global_orient=True, 46 | create_body_pose=True, 47 | create_betas=True, 48 | create_left_hand_pose=True, 49 | create_right_hand_pose=True, 50 | create_expression=True, 51 | create_jaw_pose=True, 52 | create_leye_pose=True, 53 | create_reye_pose=True, 54 | create_transl=True 55 | ) 56 | 57 | count = 0 58 | for img_name in sorted(os.listdir(fitting_dir))[args.start::args.step]: 59 | print('viz frame {}'.format(img_name)) 60 | 61 | with open(osp.join(fitting_dir, img_name, '000.pkl'), 'rb') as f: 62 | param = pickle.load(f) 63 | torch_param = {} 64 | for key in param.keys(): 65 | if key in ['pose_embedding', 'camera_rotation', 'camera_translation']: 66 | continue 67 | else: 68 | torch_param[key] = torch.tensor(param[key]) 69 | 70 | output = model(return_verts=True, **torch_param) 71 | vertices = output.vertices.detach().cpu().numpy().squeeze() 72 | 73 | if count == 0: 74 | body = o3d.TriangleMesh() 75 | vis.add_geometry(body) 76 | body.vertices = o3d.Vector3dVector(vertices) 77 | body.triangles = o3d.Vector3iVector(model.faces) 78 | body.vertex_normals = o3d.Vector3dVector([]) 79 | body.triangle_normals = o3d.Vector3dVector([]) 80 | body.compute_vertex_normals() 81 | body.transform(trans) 82 | 83 | 84 | color_img = cv2.imread(os.path.join(color_dir, img_name + '.jpg')) 85 | color_img = cv2.flip(color_img, 1) 86 | 87 | vis.update_geometry() 88 | while True: 89 | cv2.imshow('frame', color_img) 90 | vis.poll_events() 91 | vis.update_renderer() 92 | key = cv2.waitKey(30) 93 | if key == 27: 94 | break 95 | 96 | count += 1 97 | if __name__ == '__main__': 98 | parser = argparse.ArgumentParser() 99 | parser.add_argument('fitting_dir', type=str, default=os.getcwd(), 100 | help='recording dir') 101 | parser.add_argument('--base_dir', type=str, default=os.getcwd(), 102 | help='recording dir') 103 | parser.add_argument('--start', type=int, default=0, help='id of the starting frame') 104 | parser.add_argument('--step', type=int, default=1, help='id of the starting frame') 105 | parser.add_argument('--model_folder', default='~/models', type=str, help='') 106 | parser.add_argument('--num_pca_comps', type=int, default=12,help='') 107 | parser.add_argument('--gender', type=str, default='neutral', choices=['neutral', 'male', 'female'], 108 | help='Use gender neutral or gender specific SMPL' + 109 | 'model') 110 | args = parser.parse_args() 111 | main(args) 112 | -------------------------------------------------------------------------------- /prox/viz/viz_mosh.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import cv2 4 | import numpy as np 5 | import json 6 | import open3d as o3d 7 | import argparse 8 | 9 | import torch 10 | import pickle 11 | import smplx 12 | 13 | def main(args): 14 | fitting_dir = args.fitting_dir 15 | recording_name = os.path.abspath(fitting_dir).split("/")[-1] 16 | fitting_dir = osp.join(fitting_dir, 'results') 17 | scene_name = recording_name.split("_")[0] 18 | base_dir = args.base_dir 19 | scene_dir = osp.join(base_dir, 'scenes') 20 | recording_dir = osp.join(base_dir, 'recordings', recording_name) 21 | color_dir = os.path.join(recording_dir, 'Color') 22 | 23 | cv2.namedWindow('frame', cv2.WINDOW_NORMAL) 24 | 25 | vis = o3d.Visualizer() 26 | vis.create_window() 27 | 28 | scene = o3d.io.read_triangle_mesh(osp.join(scene_dir, scene_name + '.ply')) 29 | with open(os.path.join(base_dir, 'vicon2scene.json'), 'r') as f: 30 | trans = np.array(json.load(f)) 31 | vis.add_geometry(scene) 32 | 33 | 34 | model = smplx.create(args.model_folder, model_type='smplx', 35 | gender=args.gender, ext='npz', 36 | num_pca_comps=args.num_pca_comps, 37 | create_global_orient=True, 38 | create_body_pose=True, 39 | create_betas=True, 40 | create_left_hand_pose=True, 41 | create_right_hand_pose=True, 42 | create_expression=True, 43 | create_jaw_pose=True, 44 | create_leye_pose=True, 45 | create_reye_pose=True, 46 | create_transl=True 47 | ) 48 | 49 | count = 0 50 | for img_name in sorted(os.listdir(fitting_dir))[args.start::args.step]: 51 | print('viz frame {}'.format(img_name)) 52 | 53 | with open(osp.join(fitting_dir, img_name, '000.pkl'), 'rb') as f: 54 | param = pickle.load(f, encoding='latin1') 55 | 56 | torch_param = {} 57 | for key in param.keys(): 58 | torch_param[key] = torch.tensor(param[key]) 59 | output = model(return_verts=True, **torch_param) 60 | vertices = output.vertices.detach().cpu().numpy().squeeze() 61 | 62 | if count == 0: 63 | body = o3d.TriangleMesh() 64 | vis.add_geometry(body) 65 | body.vertices = o3d.Vector3dVector(vertices) 66 | body.triangles = o3d.Vector3iVector(model.faces) 67 | body.vertex_normals = o3d.Vector3dVector([]) 68 | body.triangle_normals = o3d.Vector3dVector([]) 69 | body.compute_vertex_normals() 70 | body.transform(trans) 71 | 72 | 73 | color_img = cv2.imread(os.path.join(color_dir, img_name + '.jpg')) 74 | color_img = cv2.flip(color_img, 1) 75 | 76 | vis.update_geometry() 77 | while True: 78 | cv2.imshow('frame', color_img) 79 | vis.poll_events() 80 | vis.update_renderer() 81 | key = cv2.waitKey(30) 82 | if key == 27: 83 | break 84 | 85 | count += 1 86 | if __name__ == '__main__': 87 | parser = argparse.ArgumentParser() 88 | parser.add_argument('fitting_dir', type=str, default=os.getcwd(), 89 | help='recording dir') 90 | parser.add_argument('--base_dir', type=str, default=os.getcwd(), 91 | help='recording dir') 92 | parser.add_argument('--start', type=int, default=0, help='id of the starting frame') 93 | parser.add_argument('--step', type=int, default=1, help='id of the starting frame') 94 | parser.add_argument('--model_folder', default='~/models', type=str, help='') 95 | parser.add_argument('--num_pca_comps', type=int, default=12, help='') 96 | parser.add_argument('--gender', type=str, default='neutral', choices=['neutral', 'male', 'female'], 97 | help='Use gender neutral or gender specific SMPL' + 98 | 'model') 99 | args = parser.parse_args() 100 | main(args) 101 | 102 | -------------------------------------------------------------------------------- /prox/viz/viz_raw_data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is 4 | # holder of all proprietary rights on this computer program. 5 | # You can only use this computer program if you have closed 6 | # a license agreement with MPG or you get the right to use the computer 7 | # program from someone who is authorized to grant you that right. 8 | # Any use of the computer program without a valid license is prohibited and 9 | # liable to prosecution. 10 | # 11 | # Copyright©2019 Max-Planck-Gesellschaft zur Förderung 12 | # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute 13 | # for Intelligent Systems and the Max Planck Institute for Biological 14 | # Cybernetics. All rights reserved. 15 | # 16 | # Contact: ps-license@tuebingen.mpg.de 17 | 18 | import os 19 | import os.path as osp 20 | import cv2 21 | import numpy as np 22 | import json 23 | import open3d as o3d 24 | import argparse 25 | from prox.projection_utils import Projection 26 | 27 | def main(args): 28 | recording_name = osp.basename(args.recording_dir) 29 | scene_name = recording_name.split("_")[0] 30 | base_dir = os.path.abspath(osp.join(args.recording_dir, os.pardir, os.pardir)) 31 | cam2world_dir = osp.join(base_dir, 'cam2world') 32 | scene_dir = osp.join(base_dir, 'scenes') 33 | calib_dir = osp.join(base_dir, 'calibration') 34 | 35 | projection = Projection(calib_dir) 36 | cv2.namedWindow('frame', cv2.WINDOW_NORMAL) 37 | 38 | color_dir = os.path.join(args.recording_dir, 'Color') 39 | depth_dir = os.path.join(args.recording_dir,'Depth') 40 | bodyIndex_dir = os.path.join(args.recording_dir,'BodyIndex') 41 | bodyIndexColor_dir = os.path.join(args.recording_dir, 'BodyIndexColor') 42 | 43 | vis = o3d.Visualizer() 44 | vis.create_window() 45 | 46 | trans = np.eye(4) 47 | if args.show_scene: 48 | scene = o3d.io.read_triangle_mesh(osp.join(scene_dir, scene_name + '.ply')) 49 | with open(os.path.join(cam2world_dir,scene_name + '.json'), 'r') as f: 50 | trans = np.array(json.load(f)) 51 | vis.add_geometry(scene) 52 | 53 | scan = o3d.PointCloud() 54 | vis.add_geometry(scan) 55 | 56 | count = 0 57 | for img_name in sorted(os.listdir(color_dir))[args.start::args.step]: 58 | img_name = osp.splitext(img_name)[0] 59 | 60 | 61 | color_img = cv2.imread(os.path.join(color_dir, img_name + '.jpg')) 62 | color_img = cv2.flip(color_img, 1) 63 | 64 | if args.show_scan: 65 | depth_img = cv2.imread(os.path.join(depth_dir, img_name + '.png'), -1).astype(float) 66 | depth_img /= 8.0 67 | depth_img /= 1000.0 68 | 69 | if args.show_body_only: 70 | if args.mask_on_color: 71 | mask = cv2.imread(os.path.join(bodyIndexColor_dir, img_name + '.png'), cv2.IMREAD_GRAYSCALE) 72 | else: 73 | mask = cv2.imread(os.path.join(bodyIndex_dir, img_name + '.png'), cv2.IMREAD_GRAYSCALE) 74 | # the result of this is a mask, where 255 indicate a no-body and 0 indicate a body 75 | mask = cv2.threshold(mask, 254, 255, cv2.THRESH_BINARY)[1] 76 | else: 77 | mask = np.zeros(depth_img.shape[:2]) 78 | 79 | depth_img = cv2.flip(depth_img, 1) 80 | mask = cv2.flip(mask, 1) 81 | 82 | if args.show_color: 83 | scan_dict = projection.create_scan(mask, depth_img, mask_on_color=args.mask_on_color, color_im=color_img[:, :, ::-1]) 84 | else: 85 | scan_dict = projection.create_scan(mask, depth_img, mask_on_color=args.mask_on_color) 86 | 87 | scan.points = o3d.Vector3dVector(scan_dict.get('points')) 88 | scan.colors = o3d.Vector3dVector(scan_dict.get('colors')) 89 | 90 | if np.asarray(scan.points).size == 0: 91 | continue 92 | scan.transform(trans) 93 | vis.update_geometry() 94 | 95 | 96 | print('viz frame {}, print Esc to continue'.format(img_name)) 97 | while True: 98 | cv2.imshow('frame', color_img) 99 | vis.poll_events() 100 | vis.update_renderer() 101 | key = cv2.waitKey(30) 102 | if key == 27: 103 | break 104 | 105 | count += 1 106 | if __name__ == '__main__': 107 | parser = argparse.ArgumentParser() 108 | parser.add_argument('recording_dir', type=str, default=os.getcwd(), 109 | help='recording dir') 110 | parser.add_argument('--show_scene', default=True, type=lambda arg: arg.lower() in ['true', '1'],help='') 111 | parser.add_argument('--show_scan',default=True, type=lambda arg: arg.lower() in ['true', '1'],help='') 112 | parser.add_argument('--show_body_only', default=False, type=lambda arg: arg.lower() in ['true', '1'],help='') 113 | parser.add_argument('--mask_on_color',default=True, type=lambda arg: arg.lower() in ['true', '1'],help='') 114 | parser.add_argument('--show_color', default=False, type=lambda arg: arg.lower() in ['true', '1'],help='') 115 | parser.add_argument('--start', type=int, default=0,help='id of the starting frame') 116 | parser.add_argument('--step', type=int, default=1, help='id of the starting frame') 117 | parser.add_argument('--coord', default='color', type=str, choices=['color', 'depth'],help='') 118 | 119 | args = parser.parse_args() 120 | main(args) 121 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.16.2 2 | torch==1.0.1.post2 3 | torchgeometry>=0.1.2 4 | PyYAML==5.1.1 5 | configargparse 6 | smplx 7 | pillow 8 | opencv-python 9 | tqdm 10 | human_body_prior 11 | pyrender>=0.1.23 12 | trimesh>=2.37.6 13 | scipy 14 | open3d-python==0.7.0.0 15 | --------------------------------------------------------------------------------