├── .gitignore ├── LICENSE ├── README.md ├── TempCLR ├── TempCLR │ ├── __init__.py │ ├── config │ │ ├── __init__.py │ │ ├── body_model.py │ │ ├── cmd_parser.py │ │ ├── datasets_defaults.py │ │ ├── defaults.py │ │ ├── network_defaults.py │ │ └── utils.py │ ├── data │ │ ├── __init__.py │ │ ├── build.py │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ ├── freihand.py │ │ │ └── ho3d.py │ │ ├── structures │ │ │ ├── __init__.py │ │ │ ├── abstract_structure.py │ │ │ ├── bbox.py │ │ │ ├── betas.py │ │ │ ├── body_pose.py │ │ │ ├── global_rot.py │ │ │ ├── hand_pose.py │ │ │ ├── image_list.py │ │ │ ├── joints.py │ │ │ ├── keypoints.py │ │ │ ├── points_2d.py │ │ │ └── vertices.py │ │ ├── transforms │ │ │ ├── __init__.py │ │ │ ├── build.py │ │ │ └── transforms.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── bbox.py │ │ │ ├── image_list.py │ │ │ ├── keypoint_names.py │ │ │ ├── keypoints.py │ │ │ ├── sampling.py │ │ │ ├── struct_utils.py │ │ │ └── transforms.py │ ├── evaluation.py │ ├── models │ │ ├── __init__.py │ │ ├── backbone │ │ │ ├── __init__.py │ │ │ ├── build.py │ │ │ ├── fpn.py │ │ │ ├── hrnet.py │ │ │ ├── resnet.py │ │ │ ├── utils.py │ │ │ └── vgg19.py │ │ ├── body_models │ │ │ ├── __init__.py │ │ │ ├── build.py │ │ │ ├── hand_models.py │ │ │ ├── lbs.py │ │ │ └── utils.py │ │ ├── build.py │ │ ├── camera │ │ │ ├── __init__.py │ │ │ ├── build.py │ │ │ └── camera_projection.py │ │ ├── common │ │ │ ├── __init__.py │ │ │ ├── bbox_sampler.py │ │ │ ├── group_regressor.py │ │ │ ├── iterative_regressor.py │ │ │ ├── networks.py │ │ │ ├── pose_utils.py │ │ │ └── rigid_alignment.py │ │ ├── hand_heads │ │ │ ├── __init__.py │ │ │ ├── build.py │ │ │ ├── hand_heads.py │ │ │ ├── hand_loss_modules.py │ │ │ └── registry.py │ │ ├── nnutils │ │ │ ├── __init__.py │ │ │ └── init_layer.py │ │ └── rendering │ │ │ ├── __init__.py │ │ │ ├── renderer.py │ │ │ └── utils.py │ └── utils │ │ ├── __init__.py │ │ ├── bool_utils.py │ │ ├── cfg_utils.py │ │ ├── checkpointer.py │ │ ├── data_structs.py │ │ ├── img_utils.py │ │ ├── metrics.py │ │ ├── np_utils.py │ │ ├── plot_utils.py │ │ ├── rotation_utils.py │ │ ├── timer.py │ │ ├── torch_utils.py │ │ ├── transf_utils.py │ │ └── typing.py ├── configs │ ├── evaluation_freihand.yaml │ └── evaluation_ho3d.yaml ├── main.py ├── mano_left_extra_joints.yaml └── mano_right_extra_joints.yaml ├── assets ├── .DS_Store ├── digit.gif └── tempclr.gif ├── environment.yml ├── mano_left_extra_joints.yaml └── mano_right_extra_joints.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | License 2 | Software Copyright License for non-commercial scientific research purposes 3 | Please read carefully the following terms and conditions and any accompanying documentation before you download and/or use the TempCLR data, model and software, (the "Data & Software"), including 3D meshes, images, videos, textures, software, scripts, and animations. By downloading and/or using the Data & Software (including downloading, cloning, installing, and any other use of the corresponding github repository), you acknowledge that you have read these terms and conditions, understand them, and agree to be bound by them. If you do not agree with these terms and conditions, you must not download and/or use the Data & Software. Any infringement of the terms of this agreement will automatically terminate your rights under this License 4 | 5 | Ownership / Licensees 6 | The Software and the associated materials has been developed at the 7 | 8 | Eidgenössische Technische Hochschule Zürich and at the Max Planck Institute for Intelligent Systems (hereinafter "MPI"). 9 | 10 | Any copyright or patent right is owned by and proprietary material of the 11 | 12 | Eidgenössische Technische Hochschule Zürich and of the Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (hereinafter “MPG”; MPI and MPG hereinafter collectively “Max-Planck”) 13 | 14 | hereinafter the “Licensor”. 15 | 16 | License Grant 17 | Licensor grants you (Licensee) personally a single-user, non-exclusive, non-transferable, free of charge right: 18 | 19 | To install the Data & Software on computers owned, leased or otherwise controlled by you and/or your organization; 20 | To use the Data & Software for the sole purpose of performing non-commercial scientific research, non-commercial education, or non-commercial artistic projects; 21 | Any other use, in particular any use for commercial, pornographic, military, or surveillance, purposes is prohibited. This includes, without limitation, incorporation in a commercial product, use in a commercial service, or production of other artifacts for commercial purposes. The Data & Software may not be used to create fake, libelous, misleading, or defamatory content of any kind excluding analyses in peer-reviewed scientific research. The Data & Software may not be reproduced, modified and/or made available in any form to any third party without Eidgenössische Technische Hochschule Zürich and Max-Planck’s prior written permission. 22 | 23 | The Data & Software may not be used for pornographic purposes or to generate pornographic material whether commercial or not. This license also prohibits the use of the Software to train methods/algorithms/neural networks/etc. for commercial, pornographic, military, surveillance, or defamatory use of any kind. By downloading the Data & Software, you agree not to reverse engineer it. 24 | 25 | No Distribution 26 | The Data & Software and the license herein granted shall not be copied, shared, distributed, re-sold, offered for re-sale, transferred or sub-licensed in whole or in part except that you may make one copy for archive purposes only. 27 | 28 | Disclaimer of Representations and Warranties 29 | You expressly acknowledge and agree that the Data & Software results from basic research, is provided “AS IS”, may contain errors, and that any use of the Data & Software is at your sole risk. LICENSOR MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE DATA & SOFTWARE, NEITHER EXPRESS NOR IMPLIED, AND THE ABSENCE OF ANY LEGAL OR ACTUAL DEFECTS, WHETHER DISCOVERABLE OR NOT. Specifically, and not to limit the foregoing, licensor makes no representations or warranties (i) regarding the merchantability or fitness for a particular purpose of the Data & Software, (ii) that the use of the Data & Software will not infringe any patents, copyrights or other intellectual property rights of a third party, and (iii) that the use of the Data & Software will not cause any damage of any kind to you or a third party. 30 | 31 | Limitation of Liability 32 | Because this Data & Software License Agreement qualifies as a donation, according to Section 521 of the German Civil Code (Bürgerliches Gesetzbuch – BGB) Licensor as a donor is liable for intent and gross negligence only. If the Licensor fraudulently conceals a legal or material defect, they are obliged to compensate the Licensee for the resulting damage. 33 | Licensor shall be liable for loss of data only up to the amount of typical recovery costs which would have arisen had proper and regular data backup measures been taken. For the avoidance of doubt Licensor shall be liable in accordance with the German Product Liability Act in the event of product liability. The foregoing applies also to Licensor’s legal representatives or assistants in performance. Any further liability shall be excluded. 34 | Patent claims generated through the usage of the Data & Software cannot be directed towards the copyright holders. 35 | The Data & Software is provided in the state of development the licensor defines. If modified or extended by Licensee, the Licensor makes no claims about the fitness of the Data & Software and is not responsible for any problems such modifications cause. 36 | 37 | No Maintenance Services 38 | You understand and agree that Licensor is under no obligation to provide either maintenance services, update services, notices of latent defects, or corrections of defects with regard to the Data & Software. Licensor nevertheless reserves the right to update, modify, or discontinue the Data & Software at any time. 39 | 40 | Defects of the Data & Software must be notified in writing to the Licensor with a comprehensible description of the error symptoms. The notification of the defect should enable the reproduction of the error. The Licensee is encouraged to communicate any use, results, modification or publication. 41 | 42 | Publications using the Data & Software 43 | You acknowledge that the Data & Software is a valuable scientific resource and agree to appropriately reference the following paper in any publication making use of the Data & Software. 44 | 45 | Citation: 46 | 47 | 48 | @inProceedings{ziani2022tempclr, 49 | title={TempCLR: Reconstructing Hands via Time-Coherent Contrastive Learning}, 50 | author={Ziani, Andrea and Fan, Zicong and Kocabas, Muhammed and Christen, Sammy and Hilliges, Otmar}, 51 | booktitle={International Conference on 3D Vision (3DV)}, 52 | year={2022} 53 | } 54 | 55 | As this work evolves on the previous ExPose work, please consider also citing: 56 | 57 | @inproceedings{ExPose:2020, 58 | title = {Monocular Expressive Body Regression through Body-Driven Attention}, 59 | author = {Choutas, Vasileios and Pavlakos, Georgios and Bolkart, Timo and Tzionas, Dimitrios and Black, Michael J.}, 60 | booktitle = {European Conference on Computer Vision (ECCV)}, 61 | year = {2020}, 62 | url = {https://expose.is.tue.mpg.de} 63 | } 64 | 65 | Commercial licensing opportunities 66 | For commercial uses of the Data & Software, please send email to ps-license@tue.mpg.de 67 | 68 | This Agreement shall be governed by the laws of the Federal Republic of Germany except for the UN Sales Convention. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TempCLR: Reconstructing Hands via Time-Coherent Contrastive Learning [3DV 2022] 2 | 3 | [![report](https://img.shields.io/badge/Project-Page-blue)](https://eth-ait.github.io/tempclr) 4 | [![report](https://img.shields.io/badge/ArXiv-Paper-red)](http://arxiv.org/abs/2209.00489) 5 | 6 | > [**TempCLR: Reconstructing Hands via Time-Coherent Contrastive Learning**](http://arxiv.org/abs/2209.00489), 7 | > [Andrea Ziani*](https://www.linkedin.com/in/andrea-ziani/), 8 | > [Zicong Fan*](https://ait.ethz.ch/people/zfan), 9 | > [Muhammed Kocabas](https://ps.is.tuebingen.mpg.de/person/mkocabas), 10 | > [Sammy Christen](https://ait.ethz.ch/people/sammyc/), 11 | > [Otmar Hilliges](https://ait.ethz.ch/people/hilliges) 12 | > *Equal Contribution. 13 | > International Conference on 3D Vision (3DV), 2022 14 | 15 | 16 |

17 | Image 18 |

19 | 20 | 21 | > You can find our training code [here](https://drive.google.com/file/d/1weVxFljs4LKIexN9lVkLmcNQFQfWVywn/view?usp=sharing). 22 | 23 | 24 | ## Updates 25 | 26 | - Sept. 28, 2022: Inference code and pre-trained model are ready. 27 | - Sept. 1, 2022: Project page and arXiv paper are online. 28 | 29 | # Table of contents 30 | - [Download and setup pre-trained models](#download-and-setup-pre-trained-models) 31 | - [Setup inference code](#setup-inference-code) 32 | * [Datasets](#datasets) 33 | * [Setup anaconda environment](#setup-anaconda-environment) 34 | + [Create the environment](#create-the-environment) 35 | + [Activate the environment](#activate-the-environment) 36 | * [Setup environment variables](#setup-environment-variables) 37 | + [Download utilities](#download-utilities) 38 | + [Download MANO models](#download-mano-models) 39 | + [Setup variables](#setup-variables) 40 | * [Config file utils](#config-file-utils) 41 | - [Run inference code](#run-inference-code) 42 | - [Acknowledgements](#acknowledgements) 43 | - [Citations](#citations) 44 | 45 | In case you encounter any issue following this README, please [Contact Us](mailto:aziani@student.ethz.ch,zicong.fan@inf.ethz.ch). 46 | 47 | # Download our release models 48 | Our release models can be downloaded at [this Dropbox](https://www.dropbox.com/sh/jmy2owz873nerw1/AAAHxG9YLJd91l5Ou5gsDvGBa?dl=0). 49 | 50 | Once you downloaded our release zip file, extract it in the project directory. The final directory structure should look like: 51 | ``` 52 | TempCLR/ 53 | ├── ... 54 | TempCLR_release/ 55 | ├── FreiHAND_model/ 56 | │   └── checkpoints/ 57 | │   ├── latest_checkpoint 58 | │   └── tempclr_freihand 59 | ├── HO3D_model/ 60 | │   └── checkpoints/ 61 | │   ├── latest_checkpoint 62 | │   └── tempclr_ho3d 63 | ├── LICENSE 64 | └── TempCLR_utils/ 65 | ├── data/ 66 | │   ├── all_means.pkl 67 | │   └── shape_mean.npy 68 | └── models/ 69 | └── mano/ 70 | └── README 71 | environment.yml 72 | LICENCE 73 | mano_left_extra_joints.yaml 74 | mano_right_extra_joints.yaml 75 | 76 | ``` 77 | 78 | # Setup inference code 79 | 80 | ## Datasets 81 | The repository provides the code to perform inference on the [FreiHAND](https://lmb.informatik.uni-freiburg.de/resources/datasets/FreihandDataset.en.html) dataset and the [HO-3D_v2](https://www.tugraz.at/index.php?id=40231) dataset. 82 | 83 | In order to run the inference code, download the datasets first and extract them in the same directory. The resulting directory structure and directory names should look like: 84 | ``` 85 | datasets 86 | ├── freihand 87 | └── HO3D_v2 88 | ``` 89 | 90 | ## Setup anaconda environment 91 | This repository provides the recipe `tempclr-env.yaml` for a conda environment containing every dependency the code needs in order to work. 92 | 93 | If you have not installed anaconda in your device, install [Anaconda](https://www.anaconda.com/products/distribution) first and then come back to this guide. 94 | 95 | ### Create the environment 96 | To create the environment, open a shell in the TempCLR project directory and run this command: 97 | ``` 98 | conda env create -f environment.yml 99 | ``` 100 | 101 | Wait until the execution ends. If everything went well you should see a suggestion to activate `tempclr-env` . 102 | 103 | ### Activate the environment 104 | To activate the environment run: 105 | ``` 106 | conda activate tempclr-env 107 | ``` 108 | 109 | Great! The environment has been activated. Now that the environment has been set up proceed to the next section. 110 | 111 | ## Setup environment variables 112 | 113 | ### Download MANO models 114 | Our project is based on MANO. Hence, MANO models are needed to correctly run this codebase. 115 | Before proceeding further, download MANO models (i.e. `MANO_RIGHT.pkl` and `MANO_LEFT.pkl`) from the [official website](https://mano.is.tue.mpg.de/index.html) 116 | and place them inside `TempCLR_utils/models/mano` directory. 117 | 118 | ### Setup variables 119 | In the shell where you will run the inference code, run the following commands: 120 | ``` 121 | export MODELS=/models 122 | export MEAN_POSE_PATH=/data 123 | export DATASETS= 124 | export INFERENCE= 125 | ``` 126 | 127 | # Run inference code 128 | Once followed all the previous steps, open a shell in the TempCLR project with the conda environment and environment variables set and use the following command to run the inference code: 129 | ``` 130 | python TempCLR/main.py --exp-cfg TempCLR/configs/evaluation_freihand.yaml 131 | ``` 132 | for inference on FreiHAND, and 133 | ``` 134 | python TempCLR/main.py --exp-cfg TempCLR/configs/evaluation_ho3d.yaml 135 | ``` 136 | for inference on HO-3D. 137 | 138 | Since the ground-truth labels of the test sets are hidden in CodaLab competitions (see [HO-3D](https://competitions.codalab.org/competitions/22485) and [FreiHAND](https://competitions.codalab.org/competitions/21238)), the code will save a `pred.json` file in the pre-trained model directory. 139 | This file contains the predictions on the test set to be uploaded on CodaLab. Zip the json file and upload it in the respective CodaLab competition to obtain the results for the different metrics reported in the paper. 140 | 141 | # Config file utils 142 | ## Save inference re-projection images 143 | To save the re-projection images produced by the pre-trained models set these two flags in the config file: 144 | ``` 145 | save_reproj_images: True 146 | create_image_summaries: True 147 | ``` 148 | 149 | The images will be saved in the pre-trained model directory under a new directory named `summary` . 150 | 151 | 152 | # Acknowledgements 153 | [Muhammed Kocabas](https://ps.is.mpg.de/employees/mkocabas) is supported by the Max Planck ETH Center for Learning Systems. 154 | The authors would like to thank [Vassilis Choutas](https://ps.is.mpg.de/person/vchoutas) for providing the code of the baseline model ([ExPose](https://expose.is.tue.mpg.de/)) adopted for the project. 155 | 156 | # Citations 157 | If you find this work useful, consider citing: 158 | 159 | ``` 160 | @inProceedings{ziani2022tempclr, 161 | title={TempCLR: Reconstructing Hands via Time-Coherent Contrastive Learning}, 162 | author={Ziani, Andrea and Fan, Zicong and Kocabas, Muhammed and Christen, Sammy and Hilliges, Otmar}, 163 | booktitle={International Conference on 3D Vision (3DV)}, 164 | year={2022} 165 | } 166 | 167 | @inproceedings{hampali2020honnotate, 168 | title={Honnotate: A method for 3d annotation of hand and object poses}, 169 | author={Hampali, Shreyas and Rad, Mahdi and Oberweger, Markus and Lepetit, Vincent}, 170 | booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition}, 171 | pages={3196--3206}, 172 | year={2020} 173 | } 174 | 175 | 176 | @inproceedings{Freihand2019, 177 | author = {Christian Zimmermann, Duygu Ceylan, Jimei Yang, Bryan Russell, Max Argus and Thomas Brox}, 178 | title = {FreiHAND: A Dataset for Markerless Capture of Hand Pose and Shape from Single RGB Images}, 179 | booktitle = {IEEE International Conference on Computer Vision (ICCV)}, 180 | year = {2019}, 181 | url = "https://lmb.informatik.uni-freiburg.de/projects/freihand/" 182 | } 183 | 184 | @inproceedings{ExPose:2020, 185 | title = {Monocular Expressive Body Regression through Body-Driven Attention}, 186 | author = {Choutas, Vasileios and Pavlakos, Georgios and Bolkart, Timo and Tzionas, Dimitrios and Black, Michael J.}, 187 | booktitle = {European Conference on Computer Vision (ECCV)}, 188 | pages = {20--40}, 189 | year = {2020}, 190 | url = {https://expose.is.tue.mpg.de} 191 | } 192 | 193 | @article{MANO:SIGGRAPHASIA:2017, 194 | title = {Embodied Hands: Modeling and Capturing Hands and Bodies Together}, 195 | author = {Romero, Javier and Tzionas, Dimitrios and Black, Michael J.}, 196 | journal = {ACM Transactions on Graphics, (Proc. SIGGRAPH Asia)}, 197 | volume = {36}, 198 | number = {6}, 199 | series = {245:1--245:17}, 200 | month = nov, 201 | year = {2017}, 202 | month_numeric = {11} 203 | } 204 | ``` 205 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ -------------------------------------------------------------------------------- /TempCLR/TempCLR/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from .cmd_parser import parse_args 12 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/config/body_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from dataclasses import dataclass 12 | from omegaconf import OmegaConf 13 | from .utils import Variable, Pose 14 | 15 | 16 | @dataclass 17 | class PCA: 18 | num_comps: int = 12 19 | flat_hand_mean: bool = False 20 | 21 | 22 | @dataclass 23 | class Clusters: 24 | fname: str = 'data/clusters' 25 | tau: float = 1.0 26 | 27 | 28 | @dataclass 29 | class PoseClusters(Pose): 30 | clusters: Clusters = Clusters() 31 | 32 | 33 | @dataclass 34 | class PoseWithPCA(Pose): 35 | pca: PCA = PCA() 36 | 37 | 38 | @dataclass 39 | class PoseWithPCAAndClusters(PoseWithPCA, PoseClusters): 40 | pass 41 | 42 | 43 | @dataclass 44 | class Shape(Variable): 45 | num: int = 10 46 | 47 | 48 | @dataclass 49 | class Expression(Variable): 50 | num: int = 10 51 | 52 | 53 | @dataclass 54 | class Texture(Variable): 55 | dim: int = 50 56 | path: str = 'data/flame/texture.npz' 57 | 58 | 59 | @dataclass 60 | class Lighting(Variable): 61 | dim: int = 27 62 | type: str = 'sh' 63 | 64 | 65 | @dataclass 66 | class AbstractBodyModel: 67 | extra_joint_path: str = '' 68 | v_template_path: str = '' 69 | mean_pose_path: str = '' 70 | shape_mean_path: str = '' 71 | use_compressed: bool = True 72 | gender = 'neutral' 73 | learn_joint_regressor: bool = False 74 | 75 | 76 | @dataclass 77 | class MANO(AbstractBodyModel): 78 | betas: Shape = Shape() 79 | wrist_pose: Pose = Pose() 80 | hand_pose: PoseWithPCAAndClusters = PoseWithPCAAndClusters() 81 | translation: Variable = Variable() 82 | texture: Texture = Texture() 83 | lighting: Lighting = Lighting() 84 | 85 | 86 | @dataclass 87 | class HandModel: 88 | type: str = 'mano' 89 | is_right: bool = True 90 | model_folder: str = 'models' 91 | vertex_ids_path: str = '' 92 | 93 | mano: MANO = MANO() 94 | 95 | 96 | hand_conf = OmegaConf.structured(HandModel) 97 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/config/cmd_parser.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import argparse 12 | from omegaconf import OmegaConf 13 | from .defaults import conf as default_conf 14 | 15 | def parse_args(): 16 | arg_formatter = argparse.ArgumentDefaultsHelpFormatter 17 | 18 | description = 'PyTorch MANO Regressor' 19 | parser = argparse.ArgumentParser(formatter_class=arg_formatter, 20 | description=description) 21 | 22 | parser.add_argument('--exp-cfg', type=str, dest='exp_cfgs', 23 | required=True, nargs='+', 24 | help='The configuration of the experiment') 25 | parser.add_argument('--exp-opts', default=[], dest='exp_opts', 26 | nargs='*', 27 | help='The configuration of the Detector') 28 | parser.add_argument('--local_rank', default=0, type=int, 29 | help='ranking within the nodes') 30 | parser.add_argument('--num-gpus', dest='num_gpus', 31 | default=1, type=int, 32 | help='Number of gpus') 33 | parser.add_argument('--backend', dest='backend', 34 | default='nccl', type=str, 35 | choices=['nccl', 'gloo'], 36 | help='Backend used for multi-gpu training') 37 | 38 | cmd_args = parser.parse_args() 39 | 40 | cfg = default_conf.copy() 41 | for exp_cfg in cmd_args.exp_cfgs: 42 | if exp_cfg: 43 | cfg.merge_with(OmegaConf.load(exp_cfg)) 44 | if cmd_args.exp_opts: 45 | cfg.merge_with(OmegaConf.from_cli(cmd_args.exp_opts)) 46 | 47 | cfg.network.use_sync_bn = (cfg.network.use_sync_bn and 48 | cmd_args.num_gpus > 1) 49 | cfg.local_rank = cmd_args.local_rank 50 | cfg.num_gpus = cmd_args.num_gpus 51 | 52 | return cfg 53 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/config/datasets_defaults.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from typing import Tuple 12 | from dataclasses import dataclass 13 | from omegaconf import OmegaConf 14 | from TempCLR.utils.typing import StringTuple, FloatTuple 15 | 16 | 17 | ############################## DATASETS ############################## 18 | 19 | 20 | @dataclass 21 | class Transforms: 22 | flip_prob: float = 0.0 23 | max_size: float = 1080 24 | downsample_dist: str = 'categorical' 25 | downsample_factor_min: float = 1.0 26 | downsample_factor_max: float = 1.0 27 | downsample_cat_factors: Tuple[float] = (1.0,) 28 | center_jitter_factor: float = 0.0 29 | center_jitter_dist: str = 'uniform' 30 | crop_size: int = 256 31 | scale_factor_min: float = 1.0 32 | scale_factor_max: float = 1.0 33 | scale_factor: float = 0.0 34 | scale_dist: str = 'uniform' 35 | noise_scale: float = 0.0 36 | rotation_factor: float = 0.0 37 | mean: Tuple[float] = (0.485, 0.456, 0.406) 38 | std: Tuple[float] = (0.229, 0.224, 0.225) 39 | brightness: float = 0.0 40 | saturation: float = 0.0 41 | hue: float = 0.0 42 | contrast: float = 0.0 43 | extreme_crop_prob: float = 0.0 44 | torso_upper_body_prob: float = 0.5 45 | motion_blur_prob: float = 0.0 46 | motion_blur_kernel_size_min: int = 3 47 | motion_blur_kernel_size_max: int = 21 48 | sobel_kernel_size: int = 3 49 | sobel_prob: float = 0.2 50 | color_drop_prob: float = 0.0 51 | color_jitter_prob: float = 0.0 52 | 53 | 54 | @dataclass 55 | class NumWorkers: 56 | train: int = 8 57 | val: int = 2 58 | test: int = 2 59 | 60 | 61 | @dataclass 62 | class Splits: 63 | train: StringTuple = tuple() 64 | val: StringTuple = tuple() 65 | test: StringTuple = tuple() 66 | 67 | 68 | @dataclass 69 | class Dataset: 70 | data_folder: str = 'data/' 71 | metrics: StringTuple = ('mpjpe14',) 72 | 73 | 74 | @dataclass 75 | class DatasetWithKeypoints(Dataset): 76 | binarization = True 77 | body_thresh: float = 0.05 78 | hand_thresh: float = 0.2 79 | head_thresh: float = 0.3 80 | keyp_folder: str = 'keypoints' 81 | keyp_format: str = 'openpose25_v1' 82 | use_face_contour: bool = True 83 | 84 | 85 | @dataclass 86 | class SequenceDataset(Dataset): 87 | interpenetration_threshold: float = 1.5 88 | pos_to_sample: int = 1 89 | window_size: int = 10 90 | neg_to_sample: int = 1 91 | 92 | 93 | @dataclass 94 | class ParameterOptions: 95 | return_params: bool = True 96 | return_shape: bool = False 97 | return_expression: bool = False 98 | return_full_pose: bool = False 99 | return_vertices: bool = False 100 | 101 | 102 | @dataclass 103 | class FreiHand(DatasetWithKeypoints, ParameterOptions): 104 | data_folder: str = 'data/freihand' 105 | mask_folder: str = 'data/freihand/masks' 106 | metrics: StringTuple = ('mpjpe', 'v2v') 107 | return_vertices: bool = True 108 | return_params: bool = True 109 | return_shape: bool = True 110 | file_format: str = 'npz' 111 | is_right: bool = True 112 | split_size: float = 0.8 113 | 114 | 115 | @dataclass 116 | class HO3D(DatasetWithKeypoints, ParameterOptions): 117 | data_folder: str = 'data/ho3d' 118 | metrics: Tuple[str] = ('mpjpe',) 119 | return_vertices: bool = True 120 | return_params: bool = True 121 | return_shape: bool = True 122 | file_format: str = 'json' 123 | split_size: float = 0.80 124 | is_right: bool = True 125 | split_by_frames: bool = False 126 | subsequences_length: int = 16 127 | subsequences_stride: int = 1 128 | 129 | 130 | @dataclass 131 | class DatasetConfig: 132 | batch_size: int = 1 133 | ratio_2d: float = 0.5 134 | use_packed: bool = True 135 | use_face_contour: bool = True 136 | vertex_flip_correspondences: str = '' 137 | transforms: Transforms = Transforms() 138 | splits: Splits = Splits() 139 | num_workers: NumWorkers = NumWorkers() 140 | 141 | 142 | @dataclass 143 | class HandConfig(DatasetConfig): 144 | splits: Splits = Splits(train=('freihand',)) 145 | freihand: FreiHand = FreiHand() 146 | ho3d: HO3D = HO3D() 147 | 148 | 149 | hand_conf = OmegaConf.structured(HandConfig) 150 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/config/defaults.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from typing import Tuple, Optional 12 | from dataclasses import dataclass 13 | from omegaconf import OmegaConf 14 | from .network_defaults import conf as network_cfg, Network 15 | from .datasets_defaults import ( 16 | hand_conf as hand_data_conf, HandConfig, 17 | ) 18 | from .body_model import ( 19 | hand_conf, HandModel, 20 | ) 21 | from ..utils import StringTuple 22 | 23 | 24 | @dataclass 25 | class MPJPE: 26 | alignments: Tuple[str] = ('root', 'procrustes', 'none') 27 | root_joints: Tuple[str] = tuple() 28 | 29 | 30 | @dataclass 31 | class Metrics: 32 | v2v: Tuple[str] = ('procrustes', 'root', 'none') 33 | mpjpe: MPJPE = MPJPE() 34 | mpjpe_2d: MPJPE = MPJPE() 35 | fscores_thresh: Optional[Tuple[float, float]] = (5.0 / 1000, 15.0 / 1000) 36 | 37 | 38 | @dataclass 39 | class Evaluation: 40 | hand: Metrics = Metrics( 41 | mpjpe=MPJPE(root_joints=('right_wrist',)), 42 | mpjpe_2d=MPJPE(alignments=('none',)), 43 | fscores_thresh=(5.0 / 1000, 15.0 / 1000) 44 | ) 45 | 46 | 47 | @dataclass 48 | class Config: 49 | num_gpus: int = 1 50 | local_rank: int = 0 51 | use_cuda: bool = True 52 | is_training: bool = False 53 | logger_level: str = 'info' 54 | use_half_precision: bool = False 55 | output_folder: str = 'output' 56 | summary_folder: str = 'summaries' 57 | results_folder: str = 'results' 58 | code_folder: str = 'code' 59 | save_reproj_images: bool = False 60 | summary_steps: int = 100 61 | img_summary_steps: int = 100 62 | hd_img_summary_steps: int = 1000 63 | create_image_summaries: bool = True 64 | imgs_per_row: int = 2 65 | 66 | part_key: str = 'hand' 67 | experiment_tags: StringTuple = tuple() 68 | 69 | @dataclass 70 | class Degrees: 71 | hand: Tuple[float] = tuple() 72 | 73 | degrees: Degrees = Degrees() 74 | 75 | pretrained: str = '' 76 | 77 | checkpoint_folder: str = 'checkpoints' 78 | 79 | float_dtype: str = 'float32' 80 | hand_vertex_ids_path: str = '' 81 | 82 | network: Network = network_cfg 83 | hand_model: HandModel = hand_conf 84 | 85 | @dataclass 86 | class Datasets: 87 | use_equal_sampling: bool = True 88 | use_packed: bool = False 89 | hand: HandConfig = hand_data_conf 90 | 91 | datasets: Datasets = Datasets() 92 | 93 | evaluation: Evaluation = Evaluation() 94 | 95 | 96 | conf = OmegaConf.structured(Config) 97 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/config/network_defaults.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from typing import Tuple 12 | from dataclasses import dataclass, make_dataclass, field 13 | from omegaconf import OmegaConf 14 | 15 | @dataclass 16 | class HandConditioning: 17 | wrist_pose: bool = True 18 | finger_pose: bool = True 19 | shape: bool = True 20 | 21 | 22 | @dataclass 23 | class LeakyReLU: 24 | negative_slope: float = 0.01 25 | 26 | 27 | @dataclass 28 | class ELU: 29 | alpha: float = 1.0 30 | 31 | 32 | @dataclass 33 | class PReLU: 34 | num_parameters: int = 1 35 | init: float = 0.25 36 | 37 | 38 | @dataclass 39 | class Activation: 40 | type: str = 'relu' 41 | inplace: bool = True 42 | 43 | leaky_relu: LeakyReLU = LeakyReLU() 44 | prelu: PReLU = PReLU() 45 | elu: ELU = ELU() 46 | 47 | 48 | @dataclass 49 | class BatchNorm: 50 | eps: float = 1e-05 51 | momentum: float = 0.1 52 | affine: bool = True 53 | track_running_stats: bool = True 54 | 55 | 56 | @dataclass 57 | class GroupNorm: 58 | num_groups: int = 32 59 | eps: float = 1e-05 60 | affine: bool = True 61 | 62 | 63 | @dataclass 64 | class LayerNorm: 65 | eps: float = 1e-05 66 | elementwise_affine: bool = True 67 | 68 | 69 | @dataclass 70 | class Normalization: 71 | type: str = 'batch-norm' 72 | batch_norm: BatchNorm = BatchNorm() 73 | layer_norm = LayerNorm = LayerNorm() 74 | group_norm: GroupNorm = GroupNorm() 75 | 76 | 77 | @dataclass 78 | class HeadConditioning: 79 | neck_pose: bool = True 80 | jaw_pose: bool = True 81 | shape: bool = True 82 | expression: bool = True 83 | 84 | 85 | @dataclass 86 | class WeakPerspective: 87 | regress_scale: bool = True 88 | regress_translation: bool = True 89 | mean_scale: float = 0.9 90 | 91 | 92 | @dataclass 93 | class Perspective: 94 | regress_translation: bool = False 95 | regress_rotation: bool = False 96 | regress_focal_length: bool = False 97 | focal_length: float = 5000.0 98 | 99 | 100 | @dataclass 101 | class Camera: 102 | type: str = 'weak-persp' 103 | pos_func: str = 'softplus' 104 | weak_persp: WeakPerspective = WeakPerspective() 105 | perspective: Perspective = Perspective() 106 | 107 | 108 | @dataclass 109 | class ResNet: 110 | replace_stride_with_dilation: Tuple[bool] = (False, False, False) 111 | 112 | 113 | @dataclass 114 | class HRNet: 115 | @dataclass 116 | class Stage: 117 | num_modules: int = 1 118 | num_branches: int = 1 119 | num_blocks: Tuple[int] = (4,) 120 | num_channels: Tuple[int] = (64,) 121 | block: str = 'BOTTLENECK' 122 | fuse_method: str = 'SUM' 123 | 124 | @dataclass 125 | class SubSample: 126 | num_layers: int = 3 127 | num_filters: Tuple[int] = (512,) * num_layers 128 | kernel_size: int = 7 129 | norm_type: str = 'bn' 130 | activ_type: str = 'relu' 131 | dim: int = 2 132 | kernel_sizes = [kernel_size] * len(num_filters) 133 | stride: int = 2 134 | strides: Tuple[int] = (stride,) * len(num_filters) 135 | padding: int = 1 136 | 137 | use_old_impl: bool = True 138 | pretrained_layers: Tuple[str] = ('*',) 139 | pretrained_path: str = ( 140 | '$CLUSTER_HOME/network_weights/hrnet_v2/hrnetv2_w48_imagenet_pretrained.pth' 141 | ) 142 | stage1: Stage = Stage() 143 | stage2: Stage = Stage(num_branches=2, num_blocks=(4, 4), 144 | num_channels=(48, 96), block='BASIC') 145 | stage3: Stage = Stage(num_modules=4, num_branches=3, 146 | num_blocks=(4, 4, 4), 147 | num_channels=(48, 96, 192), 148 | block='BASIC') 149 | stage4: Stage = Stage(num_modules=3, num_branches=4, 150 | num_blocks=(4, 4, 4, 4,), 151 | num_channels=(48, 96, 192, 384), 152 | block='BASIC', 153 | ) 154 | 155 | 156 | @dataclass 157 | class Backbone: 158 | type: str = 'resnet50' 159 | pretrained: bool = False 160 | projection_head: bool = False 161 | freeze: bool = False 162 | resnet: ResNet = ResNet() 163 | hrnet: HRNet = HRNet() 164 | 165 | @dataclass 166 | class TemporalBackbone: 167 | active: bool = False 168 | seq_len: int = 8 169 | num_layers: int = 2 170 | add_linear: bool = True 171 | use_residual: bool = True 172 | bidirectional: bool = False 173 | hidden_size: int = 1024 174 | freeze: bool = False 175 | 176 | @dataclass 177 | class MLP: 178 | layers: Tuple[int] = (1024, 1024) 179 | activation: Activation = Activation() 180 | normalization: Normalization = Normalization() 181 | preactivated: bool = False 182 | dropout: float = 0.0 183 | init_type: str = 'xavier' 184 | gain: float = 0.01 185 | bias_init: float = 0.0 186 | 187 | 188 | @dataclass 189 | class FeatureFusion: 190 | active: bool = False 191 | fusion_type: str = 'weighted' 192 | 193 | @dataclass 194 | class Net: 195 | type: str = 'mlp' 196 | mlp: MLP = MLP() 197 | 198 | network: Net = Net() 199 | 200 | 201 | @dataclass 202 | class LSTM: 203 | bias: bool = True 204 | hidden_size: int = 1024 205 | 206 | 207 | @dataclass 208 | class GRU: 209 | bias: bool = True 210 | hidden_size: int = 1024 211 | 212 | 213 | @dataclass 214 | class RNN: 215 | type: str = 'lstm' 216 | layer_dims: Tuple[int] = (1024,) 217 | init_type: str = 'randn' 218 | learn_mean: bool = True 219 | dropout: float = 0.0 220 | lstm: LSTM = LSTM() 221 | gru: GRU = GRU() 222 | mlp: MLP = MLP(layers=tuple(), gain=1.0) 223 | 224 | 225 | @dataclass 226 | class HMRLike: 227 | type: str = 'mlp' 228 | feature_key: str = 'avg_pooling' 229 | append_params: bool = True 230 | num_stages: int = 3 231 | pose_last_stage: bool = True 232 | detach_mean: bool = False 233 | learn_mean: bool = False 234 | 235 | backbone: Backbone = Backbone(type='resnet50') 236 | camera: Camera = Camera() 237 | mlp: MLP = MLP() 238 | rnn: RNN = RNN() 239 | 240 | 241 | @dataclass 242 | class Hand(HMRLike): 243 | use_photometric: bool = False 244 | is_right: bool = True 245 | groups: Tuple[str] = ( 246 | ( 247 | 'wrist_pose', 248 | 'hand_pose', 249 | 'camera', 250 | 'betas', 251 | ), 252 | ) 253 | 254 | @dataclass 255 | class Renderer: 256 | topology_path: str = 'data/mano/hand_template.obj' 257 | uv_size: int = 256 258 | displacement_path: str = 'data/mano/displacement.npy' 259 | renderer: Renderer = Renderer() 260 | 261 | use_hand_seg: bool = False 262 | temporal_backbone: TemporalBackbone = TemporalBackbone() 263 | 264 | 265 | @dataclass 266 | class Network: 267 | type: str = 'hand-iterative-model' 268 | use_sync_bn: bool = False 269 | 270 | hand_add_shape_noise: bool = False 271 | hand_shape_std: float = 0.0 272 | hand_shape_prob: float = 0.0 273 | 274 | # Hand noise parameters 275 | add_hand_pose_noise: bool = False 276 | hand_pose_std: float = 0.0 277 | num_hand_components: int = 3 278 | hand_noise_prob: float = 0.0 279 | 280 | hand_randomize_global_rot: bool = False 281 | hand_global_rot_max: float = 0.0 282 | hand_global_rot_min: float = 0.0 283 | hand_global_rot_noise_prob: float = 0.0 284 | 285 | hmr: HMRLike = HMRLike() 286 | hand: Hand = Hand() 287 | 288 | conf = OmegaConf.structured(Network) 289 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/config/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from typing import Tuple 12 | from dataclasses import dataclass 13 | 14 | 15 | @dataclass 16 | class FScores: 17 | hand: Tuple[float] = (5.0 / 1000, 15.0 / 1000) 18 | head: Tuple[float] = (5.0 / 1000, 15.0 / 1000) 19 | 20 | 21 | @dataclass 22 | class Variable: 23 | create: bool = True 24 | requires_grad: bool = True 25 | 26 | 27 | @dataclass 28 | class Pose(Variable): 29 | type: str = 'cont-rot-repr' 30 | 31 | 32 | @dataclass 33 | class Normalization: 34 | type: str = 'batch-norm' 35 | affine: bool = True 36 | elementwise_affine: bool = True 37 | 38 | 39 | @dataclass 40 | class LeakyRelu: 41 | negative_slope: float = 0.01 42 | 43 | 44 | @dataclass 45 | class Activation: 46 | type: str = 'relu' 47 | leaky_relu: LeakyRelu = LeakyRelu() 48 | 49 | 50 | @dataclass 51 | class RealNVP: 52 | num_flow_blocks: int = 2 53 | coupling_type: str = 'half-affine' 54 | normalization: Normalization = Normalization() 55 | activation: Activation = Activation() 56 | use_fc: bool = False 57 | mask_type: str = 'top' 58 | shuffle_mask: bool = True 59 | 60 | hidden_features: int = 256 61 | num_blocks_per_layer: int = 2 62 | use_volume_preserving: bool = False 63 | dropout_probability: float = 0.0 64 | batch_norm_within_layers: bool = False 65 | batch_norm_between_layers: bool = False 66 | activation: Activation = Activation() 67 | 68 | @dataclass 69 | class Coupling: 70 | hidden_dims: Tuple[int] = (256, 256) 71 | dropout: float = 0.0 72 | normalization: Normalization = Normalization() 73 | activation: Activation = Activation() 74 | 75 | coupling: Coupling = Coupling() 76 | 77 | 78 | @dataclass 79 | class NSF: 80 | num_flow_blocks: int = 2 81 | @dataclass 82 | class Transf: 83 | type: str = 'rq-coupling' 84 | hidden_features: int = 256 85 | num_transform_blocks: int = 2 86 | dropout: float = 0.0 87 | use_batch_norm: bool = False 88 | num_bins: int = 8 89 | tail_bound: int = 3 90 | apply_unconditional_transform: bool = True 91 | transf: Transf = Transf() 92 | 93 | @dataclass 94 | class Linear: 95 | type: str = 'lu' 96 | linear: Linear = Linear() 97 | 98 | 99 | @dataclass 100 | class MAF: 101 | hidden_features: int = 256 102 | num_layers: int = 5 103 | num_blocks_per_layer: int = 1 104 | use_residual_blocks: bool = False 105 | use_random_masks: bool = True 106 | use_random_permutations: bool = True 107 | dropout_probability: float = 0.0 108 | batch_norm_between_layers: bool = False 109 | batch_norm_within_layers: bool = False 110 | 111 | 112 | @dataclass 113 | class Autoregressive: 114 | type: str = 'real-nvp' 115 | input_dim: int = 21 * 6 116 | real_nvp: RealNVP = RealNVP() 117 | maf: MAF = MAF() 118 | nsf: NSF = NSF() 119 | ckpt: str = '' 120 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from .build import make_all_data_loaders 12 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from typing import List, Tuple, Union 12 | import os.path as osp 13 | 14 | from loguru import logger 15 | import functools 16 | import torch 17 | import torch.utils.data as dutils 18 | from . import datasets 19 | from .structures import (StructureList, 20 | ImageList, ImageListPacked) 21 | from .transforms import build_transforms 22 | from TempCLR.utils import Tensor, TensorList 23 | 24 | DEFAULT_NUM_WORKERS = { 25 | 'train': 0, 26 | 'val': 0, 27 | 'test': 0 28 | } 29 | 30 | 31 | def make_data_sampler(dataset): 32 | return dutils.SequentialSampler(dataset) 33 | 34 | 35 | def make_hand_dataset(name, dataset_cfg, transforms, 36 | **kwargs): 37 | if name == 'freihand': 38 | obj = datasets.FreiHand 39 | elif name == 'ho3d': 40 | obj = datasets.HO3D 41 | else: 42 | raise ValueError(f'Unknown dataset: {name}') 43 | 44 | logger.info(f'Building dataset: {name}') 45 | args = dict(**dataset_cfg[name]) 46 | args.update(kwargs) 47 | vertex_flip_correspondences = osp.expandvars(dataset_cfg.get( 48 | 'vertex_flip_correspondences', '')) 49 | 50 | dset_obj = obj(transforms=transforms, hand_only=True, 51 | vertex_flip_correspondences=vertex_flip_correspondences, 52 | **args) 53 | 54 | logger.info(f'Created dataset: {dset_obj.name()}') 55 | return dset_obj 56 | 57 | 58 | class MemoryPinning(object): 59 | def __init__( 60 | self, 61 | full_img_list: Union[ImageList, List[Tensor]], 62 | images: Tensor, 63 | targets: StructureList 64 | ): 65 | super(MemoryPinning, self).__init__() 66 | self.img_list = full_img_list 67 | self.images = images 68 | self.targets = targets 69 | 70 | def pin_memory( 71 | self 72 | ) -> Tuple[Union[ImageList, ImageListPacked, TensorList], 73 | Tensor, StructureList]: 74 | if self.img_list is not None: 75 | if isinstance(self.img_list, (ImageList, ImageListPacked)): 76 | self.img_list.pin_memory() 77 | elif isinstance(self.img_list, (list, tuple)): 78 | self.img_list = [x.pin_memory() for x in self.img_list] 79 | return ( 80 | self.img_list, 81 | self.images.pin_memory(), 82 | self.targets, 83 | ) 84 | 85 | 86 | def collate_batch( 87 | batch, 88 | return_full_imgs=False, 89 | pin_memory=False 90 | ): 91 | if return_full_imgs: 92 | images, cropped_images, targets, _ = zip(*batch) 93 | else: 94 | _, cropped_images, targets, _ = zip(*batch) 95 | 96 | out_targets = [] 97 | for t in targets: 98 | if t is None: 99 | continue 100 | if type(t) == list: 101 | out_targets += t 102 | else: 103 | out_targets.append(t) 104 | out_cropped_images = [] 105 | for img in cropped_images: 106 | if img is None: 107 | continue 108 | if torch.is_tensor(img): 109 | if len(img.shape) < 4: 110 | img.unsqueeze_(dim=0) 111 | out_cropped_images.append(img) 112 | elif isinstance(img, (list, tuple)): 113 | for d in img: 114 | d.unsqueeze_(dim=0) 115 | out_cropped_images.append(d) 116 | 117 | if len(out_cropped_images) < 1: 118 | return None, None, None 119 | 120 | full_img_list = None 121 | if return_full_imgs: 122 | full_img_list = images 123 | 124 | out_cropped_images = torch.cat(out_cropped_images) 125 | 126 | if pin_memory: 127 | return MemoryPinning( 128 | full_img_list, 129 | out_cropped_images, 130 | out_targets 131 | ) 132 | else: 133 | return full_img_list, out_cropped_images, out_targets 134 | 135 | 136 | def make_data_loader(dataset, batch_size=32, num_workers=0, 137 | is_train=True, sampler=None, collate_fn=None, 138 | batch_sampler=None, pin_memory=False, 139 | ): 140 | if batch_sampler is None: 141 | sampler = make_data_sampler(dataset) 142 | 143 | if batch_sampler is None: 144 | assert sampler is not None, ( 145 | 'Batch sampler and sampler can\'t be "None" at the same time') 146 | data_loader = torch.utils.data.DataLoader( 147 | dataset, 148 | batch_size=batch_size, 149 | num_workers=num_workers, 150 | sampler=sampler, 151 | collate_fn=collate_fn, 152 | drop_last=True and is_train, 153 | pin_memory=pin_memory, 154 | ) 155 | else: 156 | data_loader = torch.utils.data.DataLoader( 157 | dataset, 158 | num_workers=num_workers, 159 | collate_fn=collate_fn, 160 | batch_sampler=batch_sampler, 161 | pin_memory=pin_memory, 162 | ) 163 | return data_loader 164 | 165 | 166 | def make_all_data_loaders( 167 | exp_cfg, 168 | split='test', 169 | return_hand_full_imgs=False, 170 | enable_augment=True, 171 | **kwargs 172 | ): 173 | dataset_cfg = exp_cfg.get('datasets', {}) 174 | 175 | hand_dsets_cfg = dataset_cfg.get('hand', {}) 176 | hand_dset_names = hand_dsets_cfg.get('splits', {})[split] 177 | hand_transfs_cfg = hand_dsets_cfg.get('transforms', {}) 178 | hand_num_workers = hand_dsets_cfg.get( 179 | 'num_workers', DEFAULT_NUM_WORKERS).get(split, 0) 180 | 181 | hand_transforms = build_transforms( 182 | hand_transfs_cfg, is_train=False, 183 | enable_augment=enable_augment, 184 | return_full_imgs=return_hand_full_imgs) 185 | 186 | if hand_transforms: 187 | logger.info( 188 | 'Hand transformations: \n{}', 189 | '\n'.join(list(map(str, hand_transforms)))) 190 | else: 191 | logger.info( 192 | 'Fixed Hand Transformation per Sequence') 193 | 194 | hand_datasets = [] 195 | for dataset_name in hand_dset_names: 196 | dset = make_hand_dataset(dataset_name, hand_dsets_cfg, 197 | transforms=hand_transforms, 198 | is_train=False, split=split, **kwargs) 199 | hand_datasets.append(dset) 200 | 201 | hand_batch_size = hand_dsets_cfg.get('batch_size') 202 | 203 | hand_collate_fn = functools.partial( 204 | collate_batch, 205 | return_full_imgs=return_hand_full_imgs) 206 | 207 | hand_data_loaders = [] 208 | for hand_dataset in hand_datasets: 209 | hand_data_loaders.append( 210 | make_data_loader(hand_dataset, batch_size=hand_batch_size, 211 | num_workers=hand_num_workers, 212 | is_train=False, 213 | batch_sampler=None, 214 | collate_fn=hand_collate_fn, 215 | )) 216 | 217 | return { 218 | 'hand': hand_data_loaders 219 | } 220 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from .freihand import FreiHand 12 | from .ho3d import HO3D 13 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/structures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from typing import NewType, List, Union, Tuple 12 | 13 | from .abstract_structure import AbstractStructure 14 | from .keypoints import Keypoints2D, Keypoints3D 15 | 16 | from .betas import Betas 17 | from .global_rot import GlobalRot 18 | from .body_pose import BodyPose 19 | from .hand_pose import HandPose 20 | 21 | from .vertices import Vertices 22 | from .joints import Joints 23 | from .bbox import BoundingBox 24 | 25 | from .image_list import ImageList, ImageListPacked, to_image_list 26 | from .points_2d import Points2D 27 | 28 | StructureList = NewType('StructureList', List[AbstractStructure]) 29 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/structures/abstract_structure.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from abc import ABC, abstractmethod 12 | from loguru import logger 13 | 14 | 15 | class AbstractStructure(ABC): 16 | def __init__(self): 17 | super(AbstractStructure, self).__init__() 18 | self.extra_fields = {} 19 | 20 | def __del__(self): 21 | if hasattr(self, 'extra_fields'): 22 | self.extra_fields.clear() 23 | 24 | def add_field(self, field, field_data): 25 | self.extra_fields[field] = field_data 26 | 27 | def get_field(self, field): 28 | return self.extra_fields[field] 29 | 30 | def has_field(self, field): 31 | return field in self.extra_fields 32 | 33 | def delete_field(self, field): 34 | if field in self.extra_fields: 35 | del self.extra_fields[field] 36 | 37 | def shift(self, vector, *args, **kwargs): 38 | for k, v in self.extra_fields.items(): 39 | if isinstance(v, AbstractStructure): 40 | v = v.shift(vector) 41 | self.add_field(k, v) 42 | self.add_field('motion_blur_shift', vector) 43 | return self 44 | 45 | def transpose(self, method): 46 | for k, v in self.extra_fields.items(): 47 | if isinstance(v, AbstractStructure): 48 | v = v.transpose(method) 49 | self.add_field(k, v) 50 | self.add_field('is_flipped', True) 51 | return self 52 | 53 | def normalize(self, *args, **kwargs): 54 | for k, v in self.extra_fields.items(): 55 | if isinstance(v, AbstractStructure): 56 | v = v.normalize(*args, **kwargs) 57 | return self 58 | 59 | def rotate(self, *args, **kwargs): 60 | for k, v in self.extra_fields.items(): 61 | if isinstance(v, AbstractStructure): 62 | v = v.rotate(*args, **kwargs) 63 | self.add_field('rot', kwargs.get('rot', 0)) 64 | return self 65 | 66 | def crop(self, *args, **kwargs): 67 | for k, v in self.extra_fields.items(): 68 | if isinstance(v, AbstractStructure): 69 | v = v.crop(*args, **kwargs) 70 | return self 71 | 72 | def resize(self, *args, **kwargs): 73 | for k, v in self.extra_fields.items(): 74 | if isinstance(v, AbstractStructure): 75 | v = v.resize(*args, **kwargs) 76 | self.add_field(k, v) 77 | return self 78 | 79 | def to_tensor(self, *args, **kwargs): 80 | for k, v in self.extra_fields.items(): 81 | if isinstance(v, AbstractStructure): 82 | v.to_tensor(*args, **kwargs) 83 | self.add_field(k, v) 84 | 85 | def to(self, *args, **kwargs): 86 | for k, v in self.extra_fields.items(): 87 | if hasattr(v, "to"): 88 | v = v.to(*args, **kwargs) 89 | return self 90 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/structures/bbox.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import print_function 13 | from copy import deepcopy 14 | 15 | import numpy as np 16 | 17 | import cv2 18 | 19 | import torch 20 | from loguru import logger 21 | 22 | # from ...utils.torch_utils import to_tensor 23 | from ..utils import bbox_area, bbox_to_wh 24 | from .abstract_structure import AbstractStructure 25 | from ...utils.transf_utils import get_transform 26 | 27 | # transpose 28 | FLIP_LEFT_RIGHT = 0 29 | FLIP_TOP_BOTTOM = 1 30 | 31 | 32 | class BoundingBox(AbstractStructure): 33 | def __init__(self, bbox, size, flip_axis=0, transform=True, **kwargs): 34 | super(BoundingBox, self).__init__() 35 | self.bbox = bbox 36 | self.flip_axis = flip_axis 37 | self.size = size 38 | self.transform = transform 39 | 40 | def __repr__(self): 41 | msg = ', '.join(map(str, map(float, self.bbox))) 42 | return f'Bounding box: {msg}' 43 | 44 | def to_tensor(self, *args, **kwargs): 45 | if not torch.is_tensor(self.bbox): 46 | self.bbox = torch.from_numpy(self.bbox) 47 | 48 | for k, v in self.extra_fields.items(): 49 | if isinstance(v, AbstractStructure): 50 | v.to_tensor(*args, **kwargs) 51 | 52 | def rotate(self, rot=0, *args, **kwargs): 53 | (h, w) = self.size[:2] 54 | (cX, cY) = (w // 2, h // 2) 55 | M = cv2.getRotationMatrix2D((cX, cY), rot, 1.0) 56 | cos = np.abs(M[0, 0]) 57 | sin = np.abs(M[0, 1]) 58 | # compute the new bounding dimensions of the image 59 | nW = int((h * sin) + (w * cos)) 60 | nH = int((h * cos) + (w * sin)) 61 | # adjust the rotation matrix to take into account translation 62 | M[0, 2] += (nW / 2) - cX 63 | M[1, 2] += (nH / 2) - cY 64 | 65 | if self.transform: 66 | bbox = self.bbox.copy().reshape(4) 67 | xmin, ymin, xmax, ymax = bbox 68 | points = np.array( 69 | [[xmin, ymin], 70 | [xmin, ymax], 71 | [xmax, ymin], 72 | [xmax, ymax]], 73 | ) 74 | 75 | bbox = (np.dot(points, M[:2, :2].T) + M[:2, 2] + 1) 76 | xmin, ymin = np.amin(bbox, axis=0) 77 | xmax, ymax = np.amax(bbox, axis=0) 78 | 79 | new_bbox = np.array([xmin, ymin, xmax, ymax]) 80 | else: 81 | new_bbox = self.bbox.copy().reshape(4) 82 | 83 | bbox_target = type(self)( 84 | new_bbox, size=(nH, nW, 3), transform=self.transform) 85 | for k, v in self.extra_fields.items(): 86 | if isinstance(v, AbstractStructure): 87 | v = v.rotate(rot=rot, *args, **kwargs) 88 | bbox_target.add_field(k, v) 89 | 90 | return bbox_target 91 | 92 | def crop(self, center, scale, rot=0, crop_size=224, *args, **kwargs): 93 | if self.transform: 94 | bbox = self.bbox.copy().reshape(4) 95 | xmin, ymin, xmax, ymax = bbox 96 | points = np.array( 97 | [[xmin, ymin], 98 | [xmin, ymax], 99 | [xmax, ymin], 100 | [xmax, ymax]], 101 | ) 102 | transf = get_transform( 103 | center, scale, (crop_size, crop_size), rot=rot) 104 | 105 | bbox = (np.dot(points, transf[:2, :2].T) + transf[:2, 2] + 1) 106 | xmin, ymin = np.amin(bbox, axis=0) 107 | xmax, ymax = np.amax(bbox, axis=0) 108 | 109 | new_bbox = np.array([xmin, ymin, xmax, ymax]) 110 | else: 111 | new_bbox = self.bbox.copy().reshape(4) 112 | 113 | bbox_target = type(self)(new_bbox, size=(crop_size, crop_size), 114 | transform=self.transform) 115 | for k, v in self.extra_fields.items(): 116 | if isinstance(v, AbstractStructure): 117 | v = v.crop(center=center, scale=scale, 118 | crop_size=crop_size, rot=rot, 119 | *args, **kwargs) 120 | bbox_target.add_field(k, v) 121 | 122 | return bbox_target 123 | 124 | def shift(self, vector, *args, **kwargs): 125 | if torch.is_tensor(self.bbox): 126 | bbox = self.bbox.clone().reshape(4) 127 | else: 128 | bbox = self.bbox.copy().reshape(4) 129 | 130 | xmin, ymin, xmax, ymax = bbox 131 | if torch.is_tensor(self.bbox): 132 | new_bbox = torch.tensor( 133 | [xmin + vector[0], 134 | ymin + vector[1], 135 | xmax + vector[0], 136 | ymax + vector[1]]) 137 | else: 138 | new_bbox = np.array( 139 | [xmin + vector[0], 140 | ymin + vector[1], 141 | xmax + vector[0], 142 | ymax + vector[1]]) 143 | 144 | bbox_target = type(self)(new_bbox, size=self.size, 145 | flip_axis=self.flip_axis, 146 | transform=self.transform) 147 | for k, v in self.extra_fields.items(): 148 | if isinstance(v, AbstractStructure): 149 | v = v.shift(vector, *args, **kwargs) 150 | bbox_target.add_field(k, v) 151 | 152 | return bbox_target 153 | 154 | def resize(self, size, *args, **kwargs): 155 | ratios = tuple(float(s) / float(s_orig) 156 | for s, s_orig in zip(size, self.size)) 157 | ratio_h, ratio_w, _ = ratios 158 | bbox = self.bbox.copy().reshape(4) 159 | xmin, ymin, xmax, ymax = bbox 160 | xmin, xmax = xmin * ratio_w, xmax * ratio_w 161 | ymin, ymax = ymin * ratio_h, ymax * ratio_h 162 | 163 | new_bbox = np.array([xmin, ymin, xmax, ymax]) 164 | bbox_target = type(self)(new_bbox, size=size, 165 | flip_axis=self.flip_axis, 166 | transform=self.transform) 167 | for k, v in self.extra_fields.items(): 168 | if isinstance(v, AbstractStructure): 169 | v = v.resize(size=size, *args, **kwargs) 170 | bbox_target.add_field(k, v) 171 | 172 | return bbox_target 173 | 174 | def __len__(self): 175 | return 1 176 | 177 | def transpose(self, method): 178 | if method not in (FLIP_LEFT_RIGHT,): 179 | raise NotImplementedError( 180 | "Only FLIP_LEFT_RIGHT implemented") 181 | 182 | xmin, xmax = self.bbox.reshape(-1)[[0, 2]] 183 | # logger.info(f'Before: {xmin}, {xmax}') 184 | W = self.size[1] 185 | new_xmin = W - xmax 186 | new_xmax = W - xmin 187 | new_ymin, new_ymax = self.bbox[[1, 3]] 188 | # logger.info(f'After: {xmin}, {xmax}') 189 | 190 | if torch.is_tensor(self.bbox): 191 | flipped_bbox = torch.tensor( 192 | [new_xmin, new_ymin, new_xmax, new_ymax], 193 | dtype=self.bbox.dtype, device=self.bbox.device) 194 | else: 195 | flipped_bbox = np.array( 196 | [new_xmin, new_ymin, new_xmax, new_ymax], 197 | dtype=self.bbox.dtype) 198 | 199 | bbox_target = type(self)(flipped_bbox, self.size, 200 | transform=self.transform) 201 | # logger.info(bbox_target) 202 | for k, v in self.extra_fields.items(): 203 | if isinstance(v, AbstractStructure): 204 | v = v.transpose(method) 205 | bbox_target.add_field(k, v) 206 | 207 | bbox_target.add_field('is_flipped', True) 208 | return bbox_target 209 | 210 | def to(self, *args, **kwargs): 211 | bbox_tensor = self.bbox 212 | if not torch.is_tensor(self.bbox): 213 | bbox_tensor = torch.tensor(bbox_tensor) 214 | bbox_target = type(self)(bbox_tensor.to(*args, **kwargs), self.size, 215 | transform=self.transform) 216 | for k, v in self.extra_fields.items(): 217 | if hasattr(v, "to"): 218 | v = v.to(*args, **kwargs) 219 | bbox_target.add_field(k, v) 220 | return bbox_target 221 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/structures/betas.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import print_function 13 | 14 | import numpy as np 15 | 16 | import torch 17 | import cv2 18 | 19 | from torchvision.transforms import functional as F 20 | from .abstract_structure import AbstractStructure 21 | 22 | 23 | class Betas(AbstractStructure): 24 | """ Stores the shape params 25 | """ 26 | 27 | def __init__(self, betas, dtype=torch.float32, **kwargs): 28 | super(Betas, self).__init__() 29 | 30 | self.betas = betas 31 | 32 | def to_tensor(self, *args, **kwargs): 33 | if not torch.is_tensor(self.betas): 34 | self.betas = torch.from_numpy(self.betas) 35 | for k, v in self.extra_fields.items(): 36 | if isinstance(v, AbstractStructure): 37 | v.to_tensor(*args, **kwargs) 38 | 39 | def to(self, *args, **kwargs): 40 | field = type(self)(betas=self.betas.to(*args, **kwargs)) 41 | for k, v in self.extra_fields.items(): 42 | if hasattr(v, "to"): 43 | v = v.to(*args, **kwargs) 44 | field.add_field(k, v) 45 | return field 46 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/structures/body_pose.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import numpy as np 12 | import torch 13 | 14 | from .abstract_structure import AbstractStructure 15 | from ...utils.rotation_utils import batch_rodrigues 16 | 17 | # transpose 18 | FLIP_LEFT_RIGHT = 0 19 | FLIP_TOP_BOTTOM = 1 20 | 21 | sign_flip = np.array( 22 | [1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, 23 | -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, 24 | -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 25 | 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, 26 | -1, 1, -1, -1]) 27 | 28 | SIGN_FLIP = torch.tensor([6, 7, 8, 3, 4, 5, 9, 10, 11, 15, 16, 17, 29 | 12, 13, 14, 18, 19, 20, 24, 25, 26, 21, 22, 23, 27, 30 | 28, 29, 33, 34, 35, 30, 31, 32, 31 | 36, 37, 38, 42, 43, 44, 39, 40, 41, 45, 46, 47, 51, 32 | 52, 53, 48, 49, 50, 57, 58, 59, 54, 55, 56, 63, 64, 33 | 65, 60, 61, 62], 34 | dtype=torch.long) - 3 35 | SIGN_FLIP = SIGN_FLIP.detach().numpy() 36 | 37 | 38 | class BodyPose(AbstractStructure): 39 | """ Stores the body pose vector. Assumes the input is in axis-angle format 40 | """ 41 | 42 | def __init__(self, body_pose, **kwargs): 43 | super(BodyPose, self).__init__() 44 | self.body_pose = body_pose 45 | 46 | def to_tensor(self, to_rot=True, *args, **kwargs): 47 | self.body_pose = torch.from_numpy(self.body_pose) 48 | 49 | if to_rot: 50 | self.body_pose = batch_rodrigues( 51 | self.body_pose.view(-1, 3)).view(-1, 3, 3) 52 | 53 | for k, v in self.extra_fields.items(): 54 | if isinstance(v, AbstractStructure): 55 | v.to_tensor(*args, **kwargs) 56 | 57 | def transpose(self, method): 58 | if method not in (FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM): 59 | raise NotImplementedError( 60 | "Only FLIP_LEFT_RIGHT and FLIP_TOP_BOTTOM implemented" 61 | ) 62 | 63 | if torch.is_tensor(self.body_pose): 64 | dim_flip = torch.tensor([1, -1, -1], dtype=self.body_pose.dtype) 65 | else: 66 | dim_flip = np.array([1, -1, -1], dtype=self.body_pose.dtype) 67 | 68 | body_pose = (self.body_pose.reshape(-1)[SIGN_FLIP].reshape(21, 3) * 69 | dim_flip).reshape(21 * 3).copy() 70 | field = type(self)(body_pose=body_pose) 71 | 72 | for k, v in self.extra_fields.items(): 73 | if isinstance(v, AbstractStructure): 74 | v = v.transpose(method) 75 | field.add_field(k, v) 76 | self.add_field('is_flipped', True) 77 | return field 78 | 79 | def crop(self, rot=0, *args, **kwargs): 80 | field = type(self)(body_pose=self.body_pose) 81 | 82 | for k, v in self.extra_fields.items(): 83 | if isinstance(v, AbstractStructure): 84 | v = v.crop(rot=rot, *args, **kwargs) 85 | field.add_field(k, v) 86 | self.add_field('rot', rot) 87 | return field 88 | 89 | def to(self, *args, **kwargs): 90 | field = type(self)(body_pose=self.body_pose.to(*args, **kwargs)) 91 | for k, v in self.extra_fields.items(): 92 | if hasattr(v, "to"): 93 | v = v.to(*args, **kwargs) 94 | field.add_field(k, v) 95 | return field 96 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/structures/global_rot.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import numpy as np 12 | 13 | import torch 14 | import cv2 15 | from .abstract_structure import AbstractStructure 16 | from TempCLR.utils.rotation_utils import batch_rodrigues 17 | 18 | # transpose 19 | FLIP_LEFT_RIGHT = 0 20 | FLIP_TOP_BOTTOM = 1 21 | 22 | 23 | class GlobalRot(AbstractStructure): 24 | 25 | def __init__(self, global_rot, **kwargs): 26 | super(GlobalRot, self).__init__() 27 | self.global_rot = global_rot 28 | 29 | def to_tensor(self, to_rot=True, *args, **kwargs): 30 | if not torch.is_tensor(self.global_rot): 31 | self.global_rot = torch.from_numpy(self.global_rot) 32 | 33 | if to_rot: 34 | self.global_rot = batch_rodrigues( 35 | self.global_rot.view(-1, 3)).view(1, 3, 3) 36 | 37 | for k, v in self.extra_fields.items(): 38 | if isinstance(v, AbstractStructure): 39 | v.to_tensor(*args, **kwargs) 40 | 41 | def transpose(self, method): 42 | 43 | if method not in (FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM): 44 | raise NotImplementedError( 45 | "Only FLIP_LEFT_RIGHT and FLIP_TOP_BOTTOM implemented" 46 | ) 47 | 48 | if torch.is_tensor(self.global_rot): 49 | dim_flip = torch.tensor([1, -1, -1], dtype=self.global_rot.dtype) 50 | global_rot = self.global_rot.clone().squeeze() * dim_flip 51 | else: 52 | dim_flip = np.array([1, -1, -1], dtype=self.global_rot.dtype) 53 | global_rot = self.global_rot.copy().squeeze() * dim_flip 54 | 55 | field = type(self)(global_rot=global_rot) 56 | 57 | for k, v in self.extra_fields.items(): 58 | if isinstance(v, AbstractStructure): 59 | v = v.transpose(method) 60 | field.add_field(k, v) 61 | self.add_field('is_flipped', True) 62 | return field 63 | 64 | def rotate(self, rot=0, *args, **kwargs): 65 | global_rot = self.global_rot.copy() 66 | if rot != 0: 67 | R = np.array([[np.cos(np.deg2rad(-rot)), 68 | -np.sin(np.deg2rad(-rot)), 0], 69 | [np.sin(np.deg2rad(-rot)), 70 | np.cos(np.deg2rad(-rot)), 0], 71 | [0, 0, 1]], dtype=np.float32) 72 | 73 | # find the rotation of the body in camera frame 74 | per_rdg, _ = cv2.Rodrigues(global_rot) 75 | # apply the global rotation to the global orientation 76 | resrot, _ = cv2.Rodrigues(np.dot(R, per_rdg)) 77 | global_rot = (resrot.T)[0].reshape(3) 78 | field = type(self)(global_rot=global_rot) 79 | for k, v in self.extra_fields.items(): 80 | if isinstance(v, AbstractStructure): 81 | v = v.crop(rot=rot, *args, **kwargs) 82 | field.add_field(k, v) 83 | 84 | self.add_field('rot', rot) 85 | return field 86 | 87 | def to(self, *args, **kwargs): 88 | field = type(self)(global_rot=self.global_rot.to(*args, **kwargs)) 89 | for k, v in self.extra_fields.items(): 90 | if hasattr(v, "to"): 91 | v = v.to(*args, **kwargs) 92 | field.add_field(k, v) 93 | return field 94 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/structures/hand_pose.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import numpy as np 12 | 13 | import torch 14 | 15 | from .abstract_structure import AbstractStructure 16 | from TempCLR.utils.rotation_utils import batch_rodrigues 17 | 18 | # transpose 19 | FLIP_LEFT_RIGHT = 0 20 | FLIP_TOP_BOTTOM = 1 21 | 22 | 23 | class HandPose(AbstractStructure): 24 | """ Stores left and right hand pose parameters 25 | """ 26 | 27 | def __init__(self, left_hand_pose, right_hand_pose, **kwargs): 28 | super(HandPose, self).__init__() 29 | self.left_hand_pose = left_hand_pose 30 | self.right_hand_pose = right_hand_pose 31 | 32 | def to_tensor(self, to_rot=True, *args, **kwargs): 33 | if not torch.is_tensor(self.left_hand_pose): 34 | if self.left_hand_pose is not None: 35 | self.left_hand_pose = torch.from_numpy(self.left_hand_pose) 36 | if not torch.is_tensor(self.right_hand_pose): 37 | if self.right_hand_pose is not None: 38 | self.right_hand_pose = torch.from_numpy( 39 | self.right_hand_pose) 40 | if to_rot: 41 | if self.left_hand_pose is not None: 42 | self.left_hand_pose = batch_rodrigues( 43 | self.left_hand_pose.view(-1, 3)).view(-1, 3, 3) 44 | if self.right_hand_pose is not None: 45 | self.right_hand_pose = batch_rodrigues( 46 | self.right_hand_pose.view(-1, 3)).view(-1, 3, 3) 47 | 48 | for k, v in self.extra_fields.items(): 49 | if isinstance(v, AbstractStructure): 50 | v.to_tensor(*args, **kwargs) 51 | 52 | def transpose(self, method): 53 | 54 | if method not in (FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM): 55 | raise NotImplementedError( 56 | "Only FLIP_LEFT_RIGHT and FLIP_TOP_BOTTOM implemented" 57 | ) 58 | 59 | if torch.is_tensor(self.left_hand_pose): 60 | dim_flip = torch.tensor([1, -1, -1], dtype=torch.float32) 61 | else: 62 | dim_flip = np.array([1, -1, -1], dtype=np.float32) 63 | 64 | left_hand_pose, right_hand_pose = None, None 65 | if self.right_hand_pose is not None: 66 | left_hand_pose = (self.right_hand_pose.reshape(15, 3) * 67 | dim_flip).reshape(45) 68 | if self.left_hand_pose is not None: 69 | right_hand_pose = (self.left_hand_pose.reshape(15, 3) * 70 | dim_flip).reshape(45) 71 | 72 | field = type(self)(left_hand_pose=left_hand_pose, 73 | right_hand_pose=right_hand_pose) 74 | 75 | for k, v in self.extra_fields.items(): 76 | if isinstance(v, AbstractStructure): 77 | v = v.transpose(method) 78 | field.add_field(k, v) 79 | self.add_field('is_flipped', True) 80 | return field 81 | 82 | def to(self, *args, **kwargs): 83 | left_hand_pose = self.left_hand_pose 84 | right_hand_pose = self.right_hand_pose 85 | if left_hand_pose is not None: 86 | left_hand_pose = left_hand_pose.to(*args, **kwargs) 87 | if right_hand_pose is not None: 88 | right_hand_pose = right_hand_pose.to(*args, **kwargs) 89 | field = type(self)( 90 | left_hand_pose=left_hand_pose, right_hand_pose=right_hand_pose) 91 | for k, v in self.extra_fields.items(): 92 | if hasattr(v, "to"): 93 | v = v.to(*args, **kwargs) 94 | field.add_field(k, v) 95 | return field 96 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/structures/image_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from typing import List, NewType, Union 12 | import sys 13 | import numpy as np 14 | import torch 15 | from loguru import logger 16 | 17 | from TempCLR.utils import Tensor, Timer 18 | 19 | 20 | class ImageList(object): 21 | def __init__(self, images: torch.Tensor, 22 | img_sizes: List[torch.Size], 23 | padding=None): 24 | self.images = images 25 | self.img_sizes = img_sizes 26 | self.sizes_tensor = torch.stack( 27 | [torch.tensor(s) if not torch.is_tensor(s) else s 28 | for s in img_sizes]).to(dtype=self.images.dtype) 29 | if padding is not None: 30 | self.padding_tensor = torch.stack( 31 | [torch.tensor(s) if not torch.is_tensor(s) else s 32 | for s in padding]).to(dtype=self.images.dtype) 33 | self._shape = self.images.shape 34 | 35 | def as_image_list(self) -> List[Tensor]: 36 | return self.images 37 | 38 | def as_tensor(self, dtype=torch.float32) -> Tensor: 39 | return self.images.to(dtype=dtype) 40 | 41 | @property 42 | def shape(self): 43 | return self._shape 44 | 45 | @property 46 | def device(self): 47 | return self.images.device 48 | 49 | @property 50 | def dtype(self): 51 | return self.images.dtype 52 | 53 | def pin_memory(self): 54 | if not self.images.is_pinned(): 55 | self.images = self.images.pin_memory() 56 | return self 57 | 58 | def __del__(self): 59 | del self.images 60 | del self.sizes_tensor 61 | del self.img_sizes 62 | 63 | def to(self, *args, **kwargs): 64 | images = self.images.to(*args, **kwargs) 65 | sizes_tensor = self.sizes_tensor.to(*args, **kwargs) 66 | return ImageList(images, sizes_tensor) 67 | 68 | 69 | class ImageListPacked(object): 70 | def __init__( 71 | self, 72 | packed_tensor: Tensor, 73 | starts: List[int], 74 | num_elements: List[int], 75 | img_sizes: List[torch.Size], 76 | ) -> None: 77 | ''' 78 | ''' 79 | self.packed_tensor = packed_tensor 80 | self.starts = starts 81 | self.num_elements = num_elements 82 | self.img_sizes = img_sizes 83 | 84 | self._shape = [len(starts)] + [max(s) for s in zip(*img_sizes)] 85 | 86 | _, self.heights, self.widths = zip(*img_sizes) 87 | 88 | def as_tensor(self, dtype=torch.float32): 89 | return self.packed_tensor.to(dtype=dtype) 90 | 91 | def as_image_list(self) -> List[Tensor]: 92 | out_list = [] 93 | 94 | sizes = [shape[1:] for shape in self.img_sizes] 95 | H, W = [max(s) for s in zip(*sizes)] 96 | 97 | out_shape = (3, H, W) 98 | for ii in range(len(self.img_sizes)): 99 | start = self.starts[ii] 100 | end = self.starts[ii] + self.num_elements[ii] 101 | c, h, w = self.img_sizes[ii] 102 | img = self.packed_tensor[start:end].reshape(c, h, w) 103 | out_img = torch.zeros( 104 | out_shape, device=self.device, dtype=self.dtype) 105 | out_img[:c, :h, :w] = img 106 | out_list.append(out_img.detach().cpu().numpy()) 107 | 108 | return out_list 109 | 110 | @property 111 | def shape(self): 112 | return self._shape 113 | 114 | @property 115 | def device(self): 116 | return self.packed_tensor.device 117 | 118 | @property 119 | def dtype(self): 120 | return self.packed_tensor.dtype 121 | 122 | def pin_memory(self): 123 | if not self.images.is_pinned(): 124 | self.images = self.images.pin_memory() 125 | return self 126 | 127 | def to(self, *args, **kwargs): 128 | self.packed_tensor = self.packed_tensor.to(*args, **kwargs) 129 | return self 130 | 131 | 132 | mem_timer = Timer(name='Memory') 133 | transf_timer = Timer(name='Transfer') 134 | 135 | 136 | def to_image_list_concat( 137 | images: List[Tensor] 138 | ) -> ImageList: 139 | if images is None: 140 | return images 141 | if isinstance(images, ImageList): 142 | return images 143 | sizes = [img.shape[1:] for img in images] 144 | # logger.info(sizes) 145 | H, W = [max(s) for s in zip(*sizes)] 146 | 147 | batch_size = len(images) 148 | batched_shape = (batch_size, images[0].shape[0], H, W) 149 | batched = torch.zeros( 150 | batched_shape, device=images[0].device, dtype=images[0].dtype) 151 | 152 | # for img, padded in zip(images, batched): 153 | # shape = img.shape 154 | # padded[:shape[0], :shape[1], :shape[2]] = img 155 | padding = None 156 | for ii, img in enumerate(images): 157 | shape = img.shape 158 | batched[ii, :shape[0], :shape[1], :shape[2]] = img 159 | 160 | return ImageList(batched, sizes, padding=padding) 161 | 162 | 163 | def to_image_list_packed(images: List[Tensor]) -> ImageListPacked: 164 | if images is None: 165 | return images 166 | if isinstance(images, ImageListPacked): 167 | return images 168 | # Store the size of each image 169 | # Compute the number of elements in each image 170 | sizes = [img.shape for img in images] 171 | num_element_list = [np.prod(s) for s in sizes] 172 | # Compute the total number of elements 173 | 174 | packed = torch.cat([img.flatten() for img in images]) 175 | # Compute the start index of each image tensor in the packed tensor 176 | starts = [0] + list(np.cumsum(num_element_list))[:-1] 177 | return ImageListPacked(packed, starts, num_element_list, sizes) 178 | 179 | 180 | def to_image_list( 181 | images: List[Tensor], 182 | use_packed=False 183 | ) -> Union[ImageList, ImageListPacked]: 184 | ''' 185 | ''' 186 | func = to_image_list_packed if use_packed else to_image_list_concat 187 | return func(images) 188 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/structures/joints.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import numpy as np 12 | 13 | import torch 14 | from .abstract_structure import AbstractStructure 15 | 16 | 17 | class Joints(AbstractStructure): 18 | def __init__(self, joints, **kwargs): 19 | super(Joints, self).__init__() 20 | # self.joints = to_tensor(joints) 21 | self.joints = joints 22 | 23 | def __repr__(self): 24 | s = self.__class__.__name__ 25 | return s 26 | 27 | def to_tensor(self, *args, **kwargs): 28 | self.joints = torch.tensor(self.joints) 29 | 30 | for k, v in self.extra_fields.items(): 31 | if isinstance(v, AbstractStructure): 32 | v.to_tensor(*args, **kwargs) 33 | 34 | def __getitem__(self, key): 35 | if key == 'joints': 36 | return self.joints 37 | else: 38 | raise ValueError('Unknown key: {}'.format(key)) 39 | 40 | def __len__(self): 41 | return 1 42 | 43 | def to(self, *args, **kwargs): 44 | joints = type(self)(self.joints.to(*args, **kwargs)) 45 | for k, v in self.extra_fields.items(): 46 | if hasattr(v, "to"): 47 | v = v.to(*args, **kwargs) 48 | joints.add_field(k, v) 49 | return joints 50 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/structures/points_2d.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import numpy as np 12 | import torch 13 | 14 | import cv2 15 | from loguru import logger 16 | from .abstract_structure import AbstractStructure 17 | 18 | from TempCLR.utils import Array, Tensor, IntTuple, get_transform 19 | 20 | # transpose 21 | FLIP_LEFT_RIGHT = 0 22 | FLIP_TOP_BOTTOM = 1 23 | 24 | 25 | class Points2D(AbstractStructure): 26 | """ Stores a 2D point grid 27 | """ 28 | 29 | def __init__( 30 | self, 31 | points, 32 | size: IntTuple, 33 | flip_axis=0, 34 | dtype=torch.float32, 35 | bc=None, 36 | closest_faces=None, 37 | ) -> None: 38 | super(Points2D, self).__init__() 39 | self.points = points 40 | self.size = size 41 | self.flip_axis = flip_axis 42 | self.closest_faces = closest_faces 43 | self.bc = bc 44 | 45 | def __getitem__(self, key): 46 | if key == 'points': 47 | return self.points 48 | else: 49 | raise ValueError(f'Unknown key: {key}') 50 | 51 | def transpose(self, method): 52 | if method not in (FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM): 53 | raise NotImplementedError( 54 | "Only FLIP_LEFT_RIGHT and FLIP_TOP_BOTTOM implemented" 55 | ) 56 | 57 | width = self.size[1] 58 | TO_REMOVE = 1 59 | flipped_points = self.points.copy() 60 | flipped_points[:, self.flip_axis] = ( 61 | width - flipped_points[:, self.flip_axis] - TO_REMOVE) 62 | 63 | if self.bc is not None: 64 | closest_tri_points = flipped_points[self.closest_faces].copy() 65 | flipped_points = ( 66 | self.bc[:, :, np.newaxis] * closest_tri_points).sum(axis=1) 67 | flipped_points = flipped_points.astype(self.points.dtype) 68 | 69 | points = type(self)(flipped_points, 70 | size=self.size, 71 | flip_axis=self.flip_axis, 72 | bc=self.bc, 73 | closest_faces=self.closest_faces, 74 | ) 75 | 76 | for k, v in self.extra_fields.items(): 77 | if isinstance(v, AbstractStructure): 78 | v = v.transpose(method) 79 | points.add_field(k, v) 80 | self.add_field('is_flipped', True) 81 | return points 82 | 83 | def to_tensor(self, *args, **kwargs): 84 | self.points = torch.from_numpy(self.points) 85 | for k, v in self.extra_fields.items(): 86 | if isinstance(v, AbstractStructure): 87 | v.to_tensor(*args, **kwargs) 88 | 89 | def shift(self, vector, *args, **kwargs): 90 | points = self.points.copy() 91 | points += vector.reshape(1, 2) 92 | 93 | field = type(self)(points, 94 | self.size, 95 | flip_axis=self.flip_axis, 96 | bc=self.bc, 97 | closest_faces=self.closest_faces) 98 | 99 | for k, v in self.extra_fields.items(): 100 | if isinstance(v, AbstractStructure): 101 | v = v.shift(vector, *args, **kwargs) 102 | field.add_field(k, v) 103 | return field 104 | 105 | def crop(self, center, scale, crop_size=224, *args, **kwargs): 106 | points = self.points.copy() 107 | transf = get_transform(center, scale, (crop_size, crop_size)) 108 | points = (np.dot( 109 | points, transf[:2, :2].T) + transf[:2, 2] + 1).astype(points.dtype) 110 | 111 | field = type(self)(points, 112 | (crop_size, crop_size, 3), 113 | flip_axis=self.flip_axis, 114 | bc=self.bc, 115 | closest_faces=self.closest_faces) 116 | 117 | for k, v in self.extra_fields.items(): 118 | if isinstance(v, AbstractStructure): 119 | v = v.crop(*args, **kwargs) 120 | field.add_field(k, v) 121 | 122 | self.add_field('rot', kwargs.get('rot', 0)) 123 | return field 124 | 125 | def rotate(self, rot=0, *args, **kwargs): 126 | if rot == 0: 127 | return self 128 | points = self.points.copy() 129 | (h, w) = self.size[:2] 130 | (cX, cY) = (w // 2, h // 2) 131 | M = cv2.getRotationMatrix2D((cX, cY), rot, 1.0) 132 | cos = np.abs(M[0, 0]) 133 | sin = np.abs(M[0, 1]) 134 | # compute the new bounding dimensions of the image 135 | nW = int((h * sin) + (w * cos)) 136 | nH = int((h * cos) + (w * sin)) 137 | 138 | # adjust the rotation matrix to take into account translation 139 | M[0, 2] += (nW / 2) - cX 140 | M[1, 2] += (nH / 2) - cY 141 | points = (np.dot(points, M[:2, :2].T) + M[:2, 2] + 1).astype( 142 | points.dtype) 143 | 144 | points = type(self)( 145 | points, size=self.size, flip_axis=self.flip_axis, 146 | bc=self.bc, 147 | closest_faces=self.closest_faces,) 148 | for k, v in self.extra_fields.items(): 149 | if isinstance(v, AbstractStructure): 150 | v = v.rotate(rot=rot, *args, **kwargs) 151 | points.add_field(k, v) 152 | 153 | self.add_field('rot', rot) 154 | return points 155 | 156 | def as_array(self) -> Array: 157 | if torch.is_tensor(self.points): 158 | points = self.points.detach().cpu().numpy() 159 | else: 160 | points = self.points.copy() 161 | return points 162 | 163 | def as_tensor(self, dtype=torch.float32, device=None) -> Tensor: 164 | if torch.is_tensor(self.points): 165 | return self.points 166 | else: 167 | return torch.tensor(self.points, dtype=dtype, device=device) 168 | 169 | def resize(self, size, *args, **kwargs): 170 | ratios = tuple(float(s) / float(s_orig) 171 | for s, s_orig in zip(size, self.size)) 172 | ratio_h, ratio_w, _ = ratios 173 | resized_data = self.points.copy() 174 | 175 | resized_data[..., 0] *= ratio_w 176 | resized_data[..., 1] *= ratio_h 177 | 178 | points = type(self)(resized_data, 179 | size=size, 180 | flip_axis=self.flip_axis, 181 | bc=self.bc, 182 | closest_faces=self.closest_faces,) 183 | # bbox._copy_extra_fields(self) 184 | for k, v in self.extra_fields.items(): 185 | if isinstance(v, AbstractStructure): 186 | v = v.resize(size, *args, **kwargs) 187 | points.add_field(k, v) 188 | 189 | return points 190 | 191 | def to(self, *args, **kwargs): 192 | points = type(self)( 193 | self.points.to(*args, **kwargs), 194 | size=self.size, flip_axis=self.flip_axis) 195 | for k, v in self.extra_fields.items(): 196 | if hasattr(v, "to"): 197 | v = v.to(*args, **kwargs) 198 | points.add_field(k, v) 199 | return points 200 | 201 | def normalize(self, *args, **kwargs): 202 | if torch.is_tensor(self.points): 203 | points = self.points.clone() 204 | else: 205 | points = self.points.copy() 206 | 207 | H, W, _ = self.size 208 | points[:, 0] = 2.0 * points[:, 0] / W - 1.0 209 | points[:, 1] = 2.0 * points[:, 1] / H - 1.0 210 | 211 | points = type(self)(points, size=self.size, flip_axis=self.flip_axis, 212 | bc=self.bc, 213 | closest_faces=self.closest_faces,) 214 | for k, v in self.extra_fields.items(): 215 | if isinstance(v, AbstractStructure): 216 | v = v.normalize(*args, **kwargs) 217 | points.add_field(k, v) 218 | 219 | return points 220 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/structures/vertices.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import numpy as np 12 | import torch 13 | 14 | from .abstract_structure import AbstractStructure 15 | from TempCLR.utils import Array, Tensor 16 | from loguru import logger 17 | 18 | # transpose 19 | FLIP_LEFT_RIGHT = 0 20 | FLIP_TOP_BOTTOM = 1 21 | 22 | 23 | class Vertices(AbstractStructure): 24 | """ Stores vertices 25 | """ 26 | 27 | def __init__(self, vertices, 28 | bc=None, 29 | closest_faces=None, 30 | flip=True, 31 | flip_index=0, 32 | dtype=torch.float32): 33 | super(Vertices, self).__init__() 34 | self.vertices = vertices 35 | self.flip_index = flip_index 36 | self.closest_faces = closest_faces 37 | self.bc = bc 38 | self.flip = flip 39 | 40 | def __getitem__(self, key): 41 | if key == 'vertices': 42 | return self.vertices 43 | else: 44 | raise ValueError('Unknown key: {}'.format(key)) 45 | 46 | def transpose(self, method): 47 | if not self.flip: 48 | return self 49 | if method not in (FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM): 50 | raise NotImplementedError( 51 | "Only FLIP_LEFT_RIGHT and FLIP_TOP_BOTTOM implemented" 52 | ) 53 | 54 | if self.closest_faces is None or self.bc is None: 55 | raise RuntimeError(f'Cannot support flip without correspondences') 56 | 57 | flipped_vertices = self.vertices.copy() 58 | flipped_vertices[:, self.flip_index] *= -1 59 | 60 | closest_tri_vertices = flipped_vertices[self.closest_faces].copy() 61 | flipped_vertices = ( 62 | self.bc[:, :, np.newaxis] * closest_tri_vertices).sum(axis=1) 63 | flipped_vertices = flipped_vertices.astype(self.vertices.dtype) 64 | 65 | vertices = type(self)(flipped_vertices, flip_index=self.flip_index, 66 | bc=self.bc, closest_faces=self.closest_faces) 67 | 68 | for k, v in self.extra_fields.items(): 69 | if isinstance(v, AbstractStructure): 70 | v = v.transpose(method) 71 | vertices.add_field(k, v) 72 | self.add_field('is_flipped', True) 73 | return vertices 74 | 75 | def to_tensor(self, *args, **kwargs): 76 | self.vertices = torch.from_numpy(self.vertices) 77 | for k, v in self.extra_fields.items(): 78 | if isinstance(v, AbstractStructure): 79 | v.to_tensor(*args, **kwargs) 80 | 81 | def crop(self, *args, **kwargs): 82 | vertices = self.vertices.copy() 83 | field = type(self)(vertices, flip_index=self.flip_index, 84 | bc=self.bc, 85 | closest_faces=self.closest_faces) 86 | 87 | for k, v in self.extra_fields.items(): 88 | if isinstance(v, AbstractStructure): 89 | v = v.crop(*args, **kwargs) 90 | field.add_field(k, v) 91 | 92 | self.add_field('rot', kwargs.get('rot', 0)) 93 | return field 94 | 95 | def rotate(self, rot=0, *args, **kwargs): 96 | if rot == 0: 97 | return self 98 | vertices = self.vertices.copy() 99 | R = np.array([[np.cos(np.deg2rad(-rot)), 100 | -np.sin(np.deg2rad(-rot)), 0], 101 | [np.sin(np.deg2rad(-rot)), 102 | np.cos(np.deg2rad(-rot)), 0], 103 | [0, 0, 1]], dtype=np.float32) 104 | vertices = np.dot(vertices, R.T) 105 | 106 | vertices = type(self)(vertices, flip_index=self.flip_index, 107 | bc=self.bc, closest_faces=self.closest_faces) 108 | for k, v in self.extra_fields.items(): 109 | if isinstance(v, AbstractStructure): 110 | v = v.rotate(rot=rot, *args, **kwargs) 111 | vertices.add_field(k, v) 112 | 113 | self.add_field('rot', rot) 114 | return vertices 115 | 116 | def as_array(self) -> Array: 117 | if torch.is_tensor(self.vertices): 118 | vertices = self.vertices.detach().cpu().numpy() 119 | else: 120 | vertices = self.vertices.copy() 121 | return vertices 122 | 123 | def as_tensor(self, dtype=torch.float32, device=None) -> Tensor: 124 | if torch.is_tensor(self.vertices): 125 | return self.vertices 126 | else: 127 | return torch.tensor(self.vertices, dtype=dtype, device=device) 128 | 129 | def to(self, *args, **kwargs): 130 | vertices = type(self)( 131 | self.vertices.to(*args, **kwargs), flip_index=self.flip_index, 132 | bc=self.bc, 133 | closest_faces=self.closest_faces) 134 | for k, v in self.extra_fields.items(): 135 | if hasattr(v, "to"): 136 | v = v.to(*args, **kwargs) 137 | vertices.add_field(k, v) 138 | return vertices 139 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from .build import build_transforms 12 | from .transforms import * 13 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/transforms/build.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | # Copyright (c) ETH Zurich and its affiliates. 4 | # All rights reserved. 5 | # 6 | # This source code is licensed under the license found in the 7 | # LICENSE file in the root directory of this source tree. 8 | # 9 | 10 | """ 11 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 12 | """ 13 | import torchvision 14 | from loguru import logger 15 | 16 | from . import transforms as T 17 | 18 | 19 | def build_transforms(transf_cfg, is_train: bool, 20 | enable_augment: bool = True, 21 | return_full_imgs: bool = False): 22 | if is_train and enable_augment: 23 | flip_prob = transf_cfg.get('flip_prob', 0) 24 | # downsample_prob = transf_cfg.get('downsample_prob', 0) 25 | max_size = transf_cfg.get('max_size', -1) 26 | downsample_dist = transf_cfg.get('downsample_dist', 'categorical') 27 | downsample_cat_factors = transf_cfg.get( 28 | 'downsample_cat_factors', (1.0,)) 29 | downsample_factor_min = transf_cfg.get('downsample_factor_min', 1.0) 30 | downsample_factor_max = transf_cfg.get('downsample_factor_max', 1.0) 31 | scale_factor = transf_cfg.get('scale_factor', 0.0) 32 | scale_factor_min = transf_cfg.get('scale_factor_min', 0.0) 33 | scale_factor_max = transf_cfg.get('scale_factor_max', 0.0) 34 | scale_dist = transf_cfg.get('scale_dist', 'uniform') 35 | rotation_factor = transf_cfg.get('rotation_factor', 0.0) 36 | noise_scale = transf_cfg.get('noise_scale', 0.0) 37 | center_jitter_factor = transf_cfg.get('center_jitter_factor', 0.0) 38 | center_jitter_dist = transf_cfg.get('center_jitter_dist', 'normal') 39 | color_jitter_prob = transf_cfg.get('color_jitter_prob', 0.0) 40 | motion_blur_prob = transf_cfg.get('motion_blur_prob', 0.0) 41 | motion_blur_kernel_size_min = transf_cfg.get( 42 | 'motion_blur_kernel_size_min', 3) 43 | motion_blur_kernel_size_max = transf_cfg.get( 44 | 'motion_blur_kernel_size_max', 7) 45 | sobel_kernel_size = transf_cfg.get('sobel_kernel_size', 0) 46 | color_drop_prob = transf_cfg.get('color_drop_prob', 0.0) 47 | sobel_prob = transf_cfg.get('sobel_prob', 0.0) 48 | else: 49 | flip_prob = 0.0 50 | max_size = -1 51 | color_drop_prob = 0.0 52 | sobel_prob = 0.0 53 | # downsample_prob = 0.0 54 | # downsample_factor = 1.0 55 | downsample_dist = 'categorical' 56 | downsample_cat_factors = (1.0,) 57 | downsample_factor_min = 1.0 58 | downsample_factor_max = 1.0 59 | scale_factor = 0.0 60 | scale_factor_min = 1.0 61 | scale_factor_max = 1.0 62 | rotation_factor = 0.0 63 | noise_scale = 0.0 64 | center_jitter_factor = 0.0 65 | center_jitter_dist = transf_cfg.get('center_jitter_dist', 'normal') 66 | scale_dist = transf_cfg.get('scale_dist', 'uniform') 67 | extreme_crop_prob = 0.0 68 | torso_upper_body_prob = 0.0 69 | motion_blur_prob = 0.0 70 | motion_blur_kernel_size_min = transf_cfg.get( 71 | 'motion_blur_kernel_size_min', 3) 72 | motion_blur_kernel_size_max = transf_cfg.get( 73 | 'motion_blur_kernel_size_max', 7) 74 | color_jitter_prob = transf_cfg.get('color_jitter_prob', 0.0) 75 | sobel_kernel_size = transf_cfg.get('sobel_kernel_size', 0) 76 | 77 | normalize_transform = T.Normalize( 78 | transf_cfg.get('mean'), transf_cfg.get('std')) 79 | logger.debug('Normalize {}', normalize_transform) 80 | 81 | crop_size = transf_cfg.get('crop_size') 82 | crop = T.Crop(crop_size=crop_size, is_train=is_train, 83 | scale_factor_max=scale_factor_max, 84 | scale_factor_min=scale_factor_min, 85 | scale_factor=scale_factor, 86 | scale_dist=scale_dist, 87 | return_full_imgs=return_full_imgs, 88 | ) 89 | pixel_noise = T.ChannelNoise(noise_scale=noise_scale) 90 | logger.debug('Crop {}', crop) 91 | 92 | downsample = T.SimulateLowRes( 93 | dist=downsample_dist, 94 | cat_factors=downsample_cat_factors, 95 | factor_min=downsample_factor_min, 96 | factor_max=downsample_factor_max) 97 | 98 | transform = T.Compose( 99 | [ 100 | T.Resize(max_size), 101 | T.SobelFilter(sobel_kernel_size, sobel_prob), 102 | T.MotionBlur(motion_blur_prob, 103 | kernel_size_min=motion_blur_kernel_size_min, 104 | kernel_size_max=motion_blur_kernel_size_max, 105 | ), 106 | T.BBoxCenterJitter(center_jitter_factor, dist=center_jitter_dist), 107 | T.ColorJitter(color_jitter_prob), 108 | T.ColorDrop(color_drop_prob), 109 | pixel_noise, 110 | T.RandomHorizontalFlip(flip_prob), 111 | T.RandomRotation( 112 | is_train=is_train, rotation_factor=rotation_factor), 113 | crop, 114 | downsample, 115 | T.ToTensor(), 116 | normalize_transform, 117 | ] 118 | ) 119 | return transform 120 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from .keypoints import ( 12 | read_keypoints, 13 | get_part_idxs, 14 | create_flip_indices, 15 | kp_connections, 16 | map_keypoints, 17 | threshold_and_keep_parts, 18 | ) 19 | 20 | from .bbox import * 21 | from .transforms import flip_pose 22 | from .keypoint_names import * 23 | from .struct_utils import targets_to_array_and_indices 24 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/utils/bbox.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from typing import Dict, NewType, Tuple 12 | 13 | 14 | import torch 15 | import numpy as np 16 | from loguru import logger 17 | from TempCLR.utils import Tensor 18 | 19 | __all__ = [ 20 | 'points_to_bbox', 21 | 'center_size_to_bbox', 22 | 'keyps_to_bbox', 23 | 'bbox_to_center_scale', 24 | 'scale_to_bbox_size', 25 | 'bbox_area', 26 | 'bbox_to_wh', 27 | 'bbox_iou', 28 | 'bbox_xyxy_to_bbox_xywh', 29 | 'bbox_xywh_to_bbox_xyxy', 30 | ] 31 | 32 | 33 | def points_to_bbox( 34 | points: Tensor, 35 | bbox_scale_factor: float = 1.0) -> Tuple[Tensor, Tensor]: 36 | 37 | min_coords, _ = torch.min(points, dim=1) 38 | xmin, ymin = min_coords[:, 0], min_coords[:, 1] 39 | max_coords, _ = torch.max(points, dim=1) 40 | xmax, ymax = max_coords[:, 0], max_coords[:, 1] 41 | 42 | center = torch.stack( 43 | [xmax + xmin, ymax + ymin], dim=-1) * 0.5 44 | 45 | width = (xmax - xmin) 46 | height = (ymax - ymin) 47 | 48 | # Convert the bounding box to a square box 49 | size = torch.max(width, height) * bbox_scale_factor 50 | 51 | return center, size 52 | 53 | 54 | def center_size_to_bbox(center: Tensor, size: Tensor) -> Tensor: 55 | xmin = center[:, 0] - size * 0.5 56 | ymin = center[:, 1] - size * 0.5 57 | 58 | xmax = center[:, 0] + size * 0.5 59 | ymax = center[:, 1] + size * 0.5 60 | 61 | return torch.stack([xmin, ymin, xmax, ymax], axis=-1) 62 | 63 | 64 | def keyps_to_bbox(keypoints, conf, img_size=None, clip_to_img=False, 65 | min_valid_keypoints=6, scale=1.0): 66 | valid_keypoints = keypoints[conf > 0] 67 | if len(valid_keypoints) < min_valid_keypoints: 68 | return None 69 | 70 | xmin, ymin = np.amin(valid_keypoints, axis=0) 71 | xmax, ymax = np.amax(valid_keypoints, axis=0) 72 | # Clip to the image 73 | if img_size is not None and clip_to_img: 74 | H, W, _ = img_size 75 | xmin = np.clip(xmin, 0, W) 76 | xmax = np.clip(xmax, 0, W) 77 | ymin = np.clip(ymin, 0, H) 78 | ymax = np.clip(ymax, 0, H) 79 | 80 | width = (xmax - xmin) * scale 81 | height = (ymax - ymin) * scale 82 | 83 | x_center = 0.5 * (xmax + xmin) 84 | y_center = 0.5 * (ymax + ymin) 85 | xmin = x_center - 0.5 * width 86 | xmax = x_center + 0.5 * width 87 | ymin = y_center - 0.5 * height 88 | ymax = y_center + 0.5 * height 89 | 90 | bbox = np.stack([xmin, ymin, xmax, ymax], axis=0).astype(np.float32) 91 | if bbox_area(bbox) > 0: 92 | return bbox 93 | else: 94 | return None 95 | 96 | 97 | def bbox_to_center_scale(bbox, dset_scale_factor=1.0, ref_bbox_size=200): 98 | if bbox is None: 99 | return None, None, None 100 | bbox = bbox.reshape(-1) 101 | bbox_size = dset_scale_factor * max( 102 | bbox[2] - bbox[0], bbox[3] - bbox[1]) 103 | scale = bbox_size / ref_bbox_size 104 | center = np.stack( 105 | [(bbox[0] + bbox[2]) * 0.5, 106 | (bbox[1] + bbox[3]) * 0.5]).astype(np.float32) 107 | return center, scale, bbox_size 108 | 109 | 110 | def scale_to_bbox_size(scale, ref_bbox_size=200): 111 | return scale * ref_bbox_size 112 | 113 | 114 | def bbox_area(bbox): 115 | if torch.is_tensor(bbox): 116 | if bbox is None: 117 | return 0.0 118 | xmin, ymin, xmax, ymax = torch.split(bbox.reshape(-1, 4), 1, dim=1) 119 | return torch.abs((xmax - xmin) * (ymax - ymin)).squeeze(dim=-1) 120 | else: 121 | if bbox is None: 122 | return 0.0 123 | xmin, ymin, xmax, ymax = np.split(bbox.reshape(-1, 4), 4, axis=1) 124 | return np.abs((xmax - xmin) * (ymax - ymin)) 125 | 126 | 127 | def bbox_to_wh(bbox): 128 | if bbox is None: 129 | return (0.0, 0.0) 130 | xmin, ymin, xmax, ymax = np.split(bbox.reshape(-1, 4), 4, axis=1) 131 | return xmax - xmin, ymax - ymin 132 | 133 | 134 | def bbox_xyxy_to_bbox_xywh(bbox): 135 | if bbox is None: 136 | return (0.0, 0.0, 0.0, 0.0) 137 | bbox = np.asarray(bbox) 138 | xmin, ymin, xmax, ymax = np.split(bbox.reshape(-1, 4), 4, axis=1) 139 | return np.array([xmin, ymin, xmax - xmin, ymax - ymin], dtype=bbox.dtype) 140 | 141 | 142 | def bbox_xywh_to_bbox_xyxy(bbox): 143 | if bbox is None: 144 | return (0.0, 0.0, 0.0, 0.0) 145 | bbox = np.asarray(bbox) 146 | xmin, ymin, width, height = np.split(bbox.reshape(-1, 4), 4, axis=1) 147 | out_bbox = np.array( 148 | [xmin, ymin, xmin + width, ymin + height], dtype=bbox.dtype) 149 | return out_bbox.reshape(-1, 4) 150 | 151 | 152 | def bbox_iou(bbox1, bbox2, epsilon=1e-9): 153 | ''' Computes IoU between bounding boxes 154 | 155 | Parameters 156 | ---------- 157 | bbox1: torch.Tensor or np.ndarray 158 | A Nx4 array of bounding boxes in xyxy format 159 | bbox2: torch.Tensor or np.ndarray 160 | A Nx4 array of bounding boxes in xyxy format 161 | Returns 162 | ------- 163 | ious: torch.Tensor or np.ndarray 164 | A N dimensional array that contains the IoUs between bounding 165 | box pairs 166 | ''' 167 | if torch.is_tensor(bbox1): 168 | # B 169 | bbox1 = bbox1.reshape(-1, 4) 170 | bbox2 = bbox2.reshape(-1, 4) 171 | 172 | # Should be B 173 | left_top = torch.max(bbox1[:, :2], bbox2[:, :2]) 174 | right_bottom = torch.min(bbox1[:, 2:], bbox2[:, 2:]) 175 | 176 | wh = (right_bottom - left_top).clamp(min=0) 177 | 178 | area1, area2 = bbox_area(bbox1), bbox_area(bbox2) 179 | 180 | isect = wh[:, 0] * wh[:, 1].reshape(bbox1.shape[0]) 181 | union = (area1 + area2 - isect).reshape(bbox1.shape[0]) 182 | else: 183 | bbox1 = bbox1.reshape(4) 184 | bbox2 = bbox2.reshape(4) 185 | 186 | left_top = np.maximum(bbox1[:2], bbox2[:2]) 187 | right_bottom = np.minimum(bbox1[2:], bbox2[2:]) 188 | 189 | wh = right_bottom - left_top 190 | 191 | area1, area2 = bbox_area(bbox1), bbox_area(bbox2) 192 | 193 | isect = np.clip(wh[0] * wh[1], 0, float('inf')) 194 | union = (area1 + area2 - isect).squeeze() 195 | 196 | return isect / (union + epsilon) 197 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/utils/image_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from typing import Union, List 12 | 13 | import numpy as np 14 | 15 | import PIL.Image as pil_img 16 | 17 | 18 | class ImageList: 19 | def __init__(self, images: List): 20 | assert isinstance(images, (list, tuple)) 21 | 22 | def to_tensor(self): 23 | pass 24 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/utils/sampling.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import time 12 | from itertools import cycle 13 | 14 | import numpy as np 15 | import torch 16 | import torch.utils.data as dutils 17 | from loguru import logger 18 | from collections import defaultdict 19 | import random 20 | 21 | 22 | class EqualSampler(dutils.Sampler): 23 | def __init__(self, datasets, batch_size=1, ratio_2d=0.5, shuffle=False): 24 | super(EqualSampler, self).__init__(datasets) 25 | self.num_datasets = len(datasets) 26 | self.ratio_2d = ratio_2d 27 | 28 | self.shuffle = shuffle 29 | self.dset_sizes = {} 30 | self.elements_per_index = {} 31 | self.only_2d = {} 32 | self.offsets = {} 33 | start = 0 34 | for dset in datasets: 35 | self.dset_sizes[dset.name()] = len(dset) 36 | self.offsets[dset.name()] = start 37 | self.only_2d[dset.name()] = dset.only_2d() 38 | self.elements_per_index[ 39 | dset.name()] = dset.get_elements_per_index() 40 | 41 | start += len(dset) 42 | 43 | if ratio_2d < 1.0 and sum(self.only_2d.values()) == len(self.only_2d): 44 | raise ValueError( 45 | f'Invalid 2D ratio value: {ratio_2d} with only 2D data') 46 | 47 | self.length = sum(map(lambda x: len(x), datasets)) 48 | 49 | self.batch_size = batch_size 50 | self._can_reuse_batches = False 51 | logger.info(self) 52 | 53 | def __repr__(self): 54 | msg = 'EqualSampler(batch_size={}, shuffle={}, ratio_2d={}\n'.format( 55 | self.batch_size, self.shuffle, self.ratio_2d) 56 | for dset_name in self.dset_sizes: 57 | msg += '\t{}: {}, only 2D is {}\n'.format( 58 | dset_name, self.dset_sizes[dset_name], 59 | self.only_2d[dset_name]) 60 | 61 | return msg + ')' 62 | 63 | def _prepare_batches(self): 64 | batch_idxs = [] 65 | 66 | dset_idxs = {} 67 | for dset_name, dset_size in self.dset_sizes.items(): 68 | if self.shuffle: 69 | dset_idxs[dset_name] = cycle( 70 | iter(torch.randperm(dset_size).tolist())) 71 | else: 72 | dset_idxs[dset_name] = cycle(range(dset_size)) 73 | 74 | num_batches = int(round(self.length / self.batch_size)) 75 | for bidx in range(num_batches): 76 | curr_idxs = [] 77 | num_samples = 0 78 | num_2d_only = 0 79 | max_num_2d = int(self.batch_size * self.ratio_2d) 80 | idxs_add = defaultdict(lambda: 0) 81 | while num_samples < self.batch_size: 82 | for dset_name in dset_idxs: 83 | # If we already have self.ratio_2d * batch_size items with 84 | # 2D annotations then ignore this dataset for now 85 | if num_2d_only >= max_num_2d and self.only_2d[dset_name]: 86 | continue 87 | try: 88 | curr_idxs.append( 89 | next(dset_idxs[dset_name]) + 90 | self.offsets[dset_name]) 91 | num_samples += self.elements_per_index[dset_name] 92 | # If the dataset has only 2D annotations increase the 93 | # count 94 | num_2d_only += (self.elements_per_index[dset_name] * 95 | self.only_2d[dset_name]) 96 | idxs_add[dset_name] += ( 97 | self.elements_per_index[dset_name]) 98 | finally: 99 | pass 100 | if num_samples >= self.batch_size: 101 | break 102 | 103 | curr_idxs = np.array(curr_idxs) 104 | if self.shuffle: 105 | np.random.shuffle(curr_idxs) 106 | batch_idxs.append(curr_idxs) 107 | return batch_idxs 108 | 109 | def __len__(self): 110 | if not hasattr(self, '_batch_idxs'): 111 | self._batch_idxs = self._prepare_batches() 112 | self._can_reuse_bathces = True 113 | return len(self._batch_idxs) 114 | 115 | def __iter__(self): 116 | if self._can_reuse_batches: 117 | batch_idxs = self._batch_idxs 118 | self._can_reuse_batches = False 119 | else: 120 | batch_idxs = self._prepare_batches() 121 | 122 | self._batch_idxs = batch_idxs 123 | return iter(batch_idxs) 124 | 125 | 126 | class SequenceSampler(dutils.Sampler): 127 | def __init__(self, datasets, batch_size=1, shuffle=True): 128 | super(SequenceSampler, self).__init__(datasets) 129 | self.num_datasets = len(datasets) 130 | self.dset_sizes = {} 131 | self.elements_per_index = {} 132 | self.offsets = {} 133 | 134 | start = 0 135 | self.datasets = {} 136 | for dset in datasets: 137 | self.datasets[dset.name()] = dset 138 | self.dset_sizes[dset.name()] = len(dset) 139 | self.offsets[dset.name()] = start 140 | self.elements_per_index[ 141 | dset.name()] = dset.get_elements_per_index() 142 | 143 | start += len(dset) 144 | 145 | self.shuffle = shuffle 146 | self.batch_size = batch_size 147 | self._can_reuse_batches = False 148 | self.length = sum(map(lambda x: len(x.get_subseq_start_end_indices()), datasets)) 149 | logger.info(self) 150 | 151 | def __repr__(self): 152 | msg = 'SequenceSampler(batch_size={}, \n'.format( 153 | self.batch_size) 154 | for dset_name in self.dset_sizes: 155 | msg += '\t{}: {}\n'.format( 156 | dset_name, self.dset_sizes[dset_name]) 157 | 158 | return msg + ')' 159 | 160 | def _prepare_batches(self): 161 | batch_idxs = [] 162 | 163 | sequences_per_dataset = defaultdict(cycle) 164 | for dset_name in self.datasets.keys(): 165 | subseq_list = self.datasets[dset_name].get_subseq_start_end_indices() 166 | if self.shuffle: 167 | random.shuffle(subseq_list) 168 | sequences_per_dataset[dset_name] = cycle(iter(subseq_list)) 169 | 170 | num_batches = int(round(self.length / self.batch_size)) 171 | for bidx in range(num_batches): 172 | curr_idxs = [] 173 | num_sequences = 0 174 | while num_sequences < self.batch_size: 175 | for dset_name in sequences_per_dataset.keys(): 176 | curr_seq = next(sequences_per_dataset[dset_name]) 177 | 178 | curr_idxs += [idx + self.offsets[dset_name] for idx in curr_seq] 179 | num_sequences += 1 180 | 181 | if num_sequences == self.batch_size: 182 | break 183 | 184 | if num_sequences == self.batch_size: 185 | batch_idxs.append(np.array(curr_idxs)) 186 | 187 | return batch_idxs 188 | 189 | def __len__(self): 190 | if not hasattr(self, '_batch_idxs'): 191 | self._batch_idxs = self._prepare_batches() 192 | self._can_reuse_bathces = True 193 | return len(self._batch_idxs) 194 | 195 | def __iter__(self): 196 | if self._can_reuse_batches: 197 | batch_idxs = self._batch_idxs 198 | self._can_reuse_batches = False 199 | else: 200 | batch_idxs = self._prepare_batches() 201 | 202 | self._batch_idxs = batch_idxs 203 | return iter(batch_idxs) 204 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/utils/struct_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from typing import Tuple 12 | import numpy as np 13 | import torch 14 | 15 | from TempCLR.utils import Array 16 | 17 | 18 | def targets_to_array_and_indices( 19 | targets, 20 | field_key: str, 21 | data_key: str, 22 | ) -> Tuple[Array, Array]: 23 | indices = np.array([ii for ii, t in enumerate(targets) if 24 | t.has_field(field_key)], dtype=np.int) 25 | if len(indices) > 1: 26 | data_lst = [] 27 | for ii, t in enumerate(targets): 28 | if t.has_field(field_key): 29 | data = getattr(t.get_field(field_key), data_key) 30 | if torch.is_tensor(data): 31 | data = data.detach().cpu().numpy() 32 | data_lst.append(data) 33 | data_array = np.stack(data_lst) 34 | return data_array, indices 35 | else: 36 | return np.array([]), indices 37 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/data/utils/transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import numpy as np 12 | 13 | import torch 14 | 15 | DIM_FLIP = np.array([1, -1, -1], dtype=np.float32) 16 | DIM_FLIP_TENSOR = torch.tensor([1, -1, -1], dtype=torch.float32) 17 | 18 | 19 | def flip_pose(pose_vector, pose_format='aa'): 20 | if pose_format == 'aa': 21 | if torch.is_tensor(pose_vector): 22 | dim_flip = DIM_FLIP_TENSOR 23 | else: 24 | dim_flip = DIM_FLIP 25 | return (pose_vector.reshape(-1, 3) * dim_flip).reshape(-1) 26 | elif pose_format == 'rot-mat': 27 | rot_mats = pose_vector.reshape(-1, 9).clone() 28 | 29 | rot_mats[:, [1, 2, 3, 6]] *= -1 30 | return rot_mats.view_as(pose_vector) 31 | else: 32 | raise ValueError(f'Unknown rotation format: {pose_format}') 33 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from .build import build_model 12 | from .hand_heads import HAND_HEAD_REGISTRY 13 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from .build import build_backbone 12 | from .utils import make_projection_head -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/backbone/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import torch 12 | 13 | from .resnet import resnets 14 | from .fpn import build_fpn_backbone 15 | from .hrnet import build as build_hr_net 16 | 17 | 18 | def build_backbone(backbone_cfg): 19 | backbone_type = backbone_cfg.get('type', 'resnet50') 20 | # use_avgpool = cfg.get('network', {}).get('type') != 'attention' 21 | pretrained = backbone_cfg.get('pretrained', True) 22 | 23 | if 'fpn' in backbone_type: 24 | backbone = build_fpn_backbone(backbone_cfg, pretrained=pretrained) 25 | return backbone, backbone.get_output_dim() 26 | elif 'hrnet' in backbone_type: 27 | backbone = build_hr_net( 28 | backbone_cfg, pretrained=True) 29 | return backbone, backbone.get_output_dim() 30 | elif 'resnet' in backbone_type: 31 | resnet_cfg = backbone_cfg.get('resnet') 32 | backbone = resnets[backbone_type]( 33 | pretrained=backbone_cfg.pretrained, freeze=backbone_cfg.freeze, **resnet_cfg) 34 | return backbone, backbone.get_output_dim() 35 | else: 36 | msg = 'Unknown backbone type: {}'.format(backbone_type) 37 | raise ValueError(msg) 38 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/backbone/fpn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import print_function 13 | from __future__ import division 14 | 15 | import sys 16 | from loguru import logger 17 | 18 | from typing import Dict 19 | 20 | import torch 21 | import torch.nn as nn 22 | 23 | from torchvision.models import resnet 24 | from torchvision.models.detection.backbone_utils import ( 25 | BackboneWithFPN as _BackboneWithFPN) 26 | 27 | FPN_FEATURE_DIM = 256 28 | 29 | 30 | class BackboneWithFPN(_BackboneWithFPN): 31 | def __init__(self, *args, **kwargs): 32 | super(BackboneWithFPN, self).__init__(*args, **kwargs) 33 | 34 | def forward(self, x): 35 | body_features = getattr(self, 'body')(x) 36 | 37 | output = getattr(self, 'fpn')(body_features) 38 | 39 | for key in body_features: 40 | output[f'body_{key}'] = body_features[key] 41 | return output 42 | 43 | 44 | def resnet_fpn_backbone(backbone_name, pretrained=True, freeze=False): 45 | backbone = resnet.__dict__[backbone_name]( 46 | pretrained=pretrained) 47 | if freeze: 48 | # freeze layers 49 | for name, parameter in backbone.named_parameters(): 50 | if ('layer2' not in name and 'layer3' not in name and 51 | 'layer4' not in name): 52 | parameter.requires_grad_(False) 53 | 54 | return_layers = {'layer1': 'layer1', 55 | 'layer2': 'layer2', 56 | 'layer3': 'layer3', 57 | 'layer4': 'layer4'} 58 | 59 | in_channels_stage2 = backbone.inplanes // 8 60 | in_channels_list = [ 61 | in_channels_stage2, 62 | in_channels_stage2 * 2, 63 | in_channels_stage2 * 4, 64 | in_channels_stage2 * 8, 65 | ] 66 | out_channels = 256 67 | return BackboneWithFPN(backbone, return_layers, in_channels_list, 68 | out_channels) 69 | 70 | 71 | def build_fpn_backbone(backbone_cfg, 72 | pretrained=True) -> nn.Module: 73 | backbone_type = backbone_cfg.get('type', 'resnet50') 74 | 75 | resnet_type = backbone_type.replace('fpn', '').replace('_', '').replace( 76 | '-', '') 77 | network = resnet_fpn_backbone(resnet_type, pretrained=pretrained) 78 | 79 | fpn_cfg = backbone_cfg.get('fpn', {}) 80 | 81 | return RegressionFPN(network, fpn_cfg) 82 | 83 | 84 | class SumAvgPooling(nn.Module): 85 | def __init__(self, pooling_type='avg', **kwargs) -> None: 86 | super(SumAvgPooling, self).__init__() 87 | 88 | if pooling_type == 'avg': 89 | self.pooling = nn.AdaptiveAvgPool2d(1) 90 | elif pooling_type == 'max': 91 | self.pooling = nn.AdaptiveMaxPool2d(1) 92 | else: 93 | raise ValueError(f'Unknown pooling function: {pooling_type}') 94 | 95 | def get_out_feature_dim(self) -> int: 96 | return FPN_FEATURE_DIM 97 | 98 | def forward(self, features: Dict[str, torch.Tensor]) -> torch.Tensor: 99 | 100 | pooled_features = {} 101 | # Pool each feature map 102 | for key in features: 103 | batch_size, feat_dim = features[key].shape[:2] 104 | pooled_features[key] = self.pooling(features[key]).view( 105 | batch_size, feat_dim) 106 | 107 | # Sum the individual features 108 | return sum(pooled_features.values()) 109 | 110 | 111 | class ConcatPooling(nn.Module): 112 | def __init__(self, use_max: bool = True, use_avg: bool = True, 113 | **kwargs) -> None: 114 | super(ConcatPooling, self).__init__() 115 | assert use_avg or use_max, 'Either max or avg pooling should be on' 116 | 117 | self.use_avg = use_avg 118 | self.use_max = use_max 119 | if use_avg: 120 | self.avg_pooling = nn.AdaptiveAvgPool2d(1) 121 | if use_max: 122 | self.max_pooling = nn.AdaptiveMaxPool2d(1) 123 | 124 | def extra_repr(self) -> str: 125 | msg = [f'Use average pooling: {self.use_avg}', 126 | f'Use max pooling: {self.use_max}'] 127 | return '\n'.join(msg) 128 | 129 | def get_out_feature_dim(self) -> int: 130 | return 5 * ( 131 | self.use_avg * FPN_FEATURE_DIM + self.use_max * FPN_FEATURE_DIM) 132 | 133 | def forward(self, features: Dict[str, torch.Tensor]) -> torch.Tensor: 134 | pooled_features = [] 135 | for key in features: 136 | batch_size, feat_dim = features[key].shape[:2] 137 | feats = [] 138 | if self.use_avg: 139 | avg_pooled_features = self.avg_pooling(features[key]).view( 140 | batch_size, feat_dim) 141 | feats.append(avg_pooled_features) 142 | if self.use_max: 143 | max_pooled_features = self.max_pooling(features[key]).view( 144 | batch_size, feat_dim) 145 | feats.append(max_pooled_features) 146 | pooled_features.append( 147 | torch.cat(feats, dim=-1)) 148 | return torch.cat(pooled_features, dim=-1) 149 | 150 | 151 | class BilinearPooling(nn.Module): 152 | def __init__(self, pooling_type='avg', **kwargs) -> None: 153 | super(BilinearPooling, self).__init__() 154 | raise NotImplementedError 155 | if pooling_type == 'avg': 156 | self.pooling = nn.AdaptiveAvgPool2d(1) 157 | elif pooling_type == 'max': 158 | self.pooling = nn.AdaptiveMaxPool2d(1) 159 | else: 160 | raise ValueError(f'Unknown pooling function: {pooling_type}') 161 | 162 | def forward(self, features: Dict[str, torch.Tensor]) -> torch.Tensor: 163 | pooled_features = {} 164 | # Pool each feature map 165 | for key in features: 166 | batch_size, feat_dim = features[key].shape[:2] 167 | pooled_features[key] = self.pooling(features[key]).view( 168 | batch_size, feat_dim) 169 | # Should be BxNxK 170 | stacked_features = torch.stack(pooled_features.values(), dim=1) 171 | pass 172 | 173 | 174 | # class RegressionFPN(nn.Module): 175 | class RegressionFPN(nn.Module): 176 | 177 | def __init__(self, backbone, fpn_cfg) -> None: 178 | super(RegressionFPN, self).__init__() 179 | self.feat_extractor = backbone 180 | 181 | pooling_type = fpn_cfg.get('pooling_type', 'sum_avg') 182 | self.avg_pooling = nn.AdaptiveAvgPool2d(1) 183 | if pooling_type == 'sum_avg': 184 | sum_avg_cfg = fpn_cfg.get('sum_avg', {}) 185 | self.pooling = SumAvgPooling(**sum_avg_cfg) 186 | elif pooling_type == 'concat': 187 | concat_cfg = fpn_cfg.get('concat', {}) 188 | self.pooling = ConcatPooling(**concat_cfg) 189 | elif pooling_type == 'none': 190 | self.pooling = None 191 | else: 192 | raise ValueError(f'Unknown pooling type {pooling_type}') 193 | 194 | def get_output_dim(self) -> int: 195 | output = { 196 | 'layer1': FPN_FEATURE_DIM, 197 | 'layer2': FPN_FEATURE_DIM, 198 | 'layer3': FPN_FEATURE_DIM, 199 | 'layer4': FPN_FEATURE_DIM, 200 | } 201 | 202 | for key in output: 203 | output[f'{key}_avg_pooling'] = FPN_FEATURE_DIM 204 | return output 205 | 206 | def forward(self, x: torch.Tensor) -> torch.Tensor: 207 | features = self.feat_extractor(x) 208 | 209 | if self.pooling is not None: 210 | pass 211 | features['avg_pooling'] = self.avg_pooling(features['body_layer4']) 212 | return features 213 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/backbone/resnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import print_function 13 | from __future__ import division 14 | 15 | import sys 16 | from loguru import logger 17 | 18 | import torch 19 | import torch.nn as nn 20 | import torch.utils.model_zoo as model_zoo 21 | 22 | from torchvision.models.resnet import (ResNet, Bottleneck, BasicBlock, 23 | model_urls) 24 | 25 | 26 | class RegressionResNet(ResNet): 27 | 28 | def __init__(self, block, layers, forward_to=4, 29 | num_classes=1000, 30 | use_avgpool=True, 31 | replace_stride_with_dilation=None, 32 | zero_init_residual=False, **kwargs): 33 | super(RegressionResNet, self).__init__( 34 | block, layers, 35 | replace_stride_with_dilation=replace_stride_with_dilation) 36 | self.forward_to = forward_to 37 | msg = 'Forward to must be from 0 to 4' 38 | assert self.forward_to > 0 and self.forward_to <= 4, msg 39 | 40 | self.replace_stride_with_dilation = replace_stride_with_dilation 41 | 42 | self.expansion = block.expansion 43 | self.output_dim = block.expansion * 512 44 | self.use_avgpool = use_avgpool 45 | if not use_avgpool: 46 | del self.avgpool 47 | del self.fc 48 | 49 | def extra_repr(self): 50 | if self.replace_stride_with_dilation is None: 51 | msg = [ 52 | f'Layer 1: {64 * self.expansion}, H / 4, W / 4', 53 | f'Layer 2: {64 * self.expansion * 2}, H / 8, W / 8', 54 | f'Layer 3: {64 * self.expansion * 4}, H / 16, W / 16', 55 | f'Layer 4: {64 * self.expansion * 8}, H / 32, W / 32' 56 | ] 57 | else: 58 | if not any(self.replace_stride_with_dilation): 59 | msg = [ 60 | f'Layer 1: {64 * self.expansion}, H / 4, W / 4', 61 | f'Layer 2: {64 * self.expansion * 2}, H / 8, W / 8', 62 | f'Layer 3: {64 * self.expansion * 4}, H / 16, W / 16', 63 | f'Layer 4: {64 * self.expansion * 8}, H / 32, W / 32' 64 | ] 65 | else: 66 | layer2 = 4 * 2 ** (not self.replace_stride_with_dilation[0]) 67 | layer3 = (layer2 * 68 | 2 ** (not self.replace_stride_with_dilation[1])) 69 | layer4 = (layer3 * 70 | 2 ** (not self.replace_stride_with_dilation[2])) 71 | msg = [ 72 | f'Layer 1: {64 * self.expansion}, H / 4, W / 4', 73 | f'Layer 2: {64 * self.expansion * 2}, H / {layer2}, ' 74 | f'W / {layer2}', 75 | f'Layer 3: {64 * self.expansion * 4}, H / {layer3}, ' 76 | f'W / {layer3}', 77 | f'Layer 4: {64 * self.expansion * 8}, H / {layer4}, ' 78 | f'W / {layer4}' 79 | ] 80 | 81 | return '\n'.join(msg) 82 | 83 | def get_output_dim(self): 84 | return { 85 | 'layer1': 64 * self.expansion, 86 | 'layer2': 64 * self.expansion * 2, 87 | 'layer3': 64 * self.expansion * 4, 88 | 'layer4': 64 * self.expansion * 8, 89 | 'avg_pooling': 64 * self.expansion * 8, 90 | } 91 | 92 | def forward(self, x): 93 | x = self.conv1(x) 94 | x = self.bn1(x) 95 | x = self.relu(x) 96 | x = self.maxpool(x) 97 | output = {'maxpool': x} 98 | 99 | x = self.layer1(x) 100 | output['layer1'] = x 101 | x = self.layer2(x) 102 | output['layer2'] = x 103 | x = self.layer3(x) 104 | output['layer3'] = x 105 | x = self.layer4(x) 106 | output['layer4'] = x 107 | 108 | # Output size: BxC 109 | x = self.avgpool(x).view(x.size(0), -1) 110 | output['avg_pooling'] = x 111 | 112 | return output 113 | 114 | 115 | def resnet18(pretrained=False, freeze=False, **kwargs): 116 | """Constructs a ResNet-18 model. 117 | 118 | Args: 119 | pretrained (bool): If True, returns a model pre-trained on ImageNet 120 | freeze (bool): If True, freeze the weights of the network 121 | """ 122 | model = RegressionResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 123 | if pretrained: 124 | logger.info('Loading pretrained ResNet-18') 125 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18']), 126 | strict=False) 127 | if freeze: 128 | logger.info("Freeze ResNet-18 parameters") 129 | for param in model.parameters(): 130 | param.requires_grad = False 131 | 132 | return model 133 | 134 | 135 | def resnet34(pretrained=False, freeze=False, **kwargs): 136 | """Constructs a ResNet-34 model. 137 | 138 | Args: 139 | pretrained (bool): If True, returns a model pre-trained on ImageNet 140 | freeze (bool): If True, freeze the weights of the network 141 | """ 142 | model = RegressionResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 143 | if pretrained: 144 | logger.info('Loading pretrained ResNet-34') 145 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34']), 146 | strict=False) 147 | if freeze: 148 | logger.info("Freeze ResNet-34 parameters") 149 | for param in model.parameters(): 150 | param.requires_grad = False 151 | 152 | return model 153 | 154 | 155 | def resnet50(pretrained=False, freeze=False, **kwargs): 156 | """Constructs a ResNet-50 model. 157 | 158 | Args: 159 | pretrained (bool): If True, returns a model pre-trained on ImageNet 160 | freeze (bool): If True, freeze the weights of the network 161 | """ 162 | model = RegressionResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 163 | if pretrained: 164 | logger.info('Loading pretrained ResNet-50') 165 | missing, unexpected = model.load_state_dict( 166 | model_zoo.load_url(model_urls['resnet50']), strict=False) 167 | if len(missing) > 0: 168 | logger.warning( 169 | f'The following keys were not found: {missing}') 170 | if len(unexpected): 171 | logger.warning( 172 | f'The following keys were not expected: {unexpected}') 173 | 174 | if freeze: 175 | logger.info("Freeze ResNet-50 parameters") 176 | for param in model.parameters(): 177 | param.requires_grad = False 178 | 179 | return model 180 | 181 | 182 | def resnet101(pretrained=False, freeze=False, **kwargs): 183 | """Constructs a ResNet-101 model. 184 | 185 | Args: 186 | pretrained (bool): If True, returns a model pre-trained on ImageNet 187 | freeze (bool): If True, freeze the weights of the network 188 | """ 189 | model = RegressionResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 190 | if pretrained: 191 | logger.info('Loading pretrained ResNet-101') 192 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101']), 193 | strict=False) 194 | if freeze: 195 | logger.info("Freeze ResNet-101 parameters") 196 | for param in model.parameters(): 197 | param.requires_grad = False 198 | 199 | return model 200 | 201 | 202 | def resnet152(pretrained=False, freeze=False, **kwargs): 203 | """Constructs a ResNet-152 model. 204 | 205 | Args: 206 | pretrained (bool): If True, returns a model pre-trained on ImageNet 207 | freeze (bool): If True, freeze the weights of the network 208 | """ 209 | model = RegressionResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 210 | if pretrained: 211 | logger.info('Loading pretrained ResNet-152') 212 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152']), 213 | strict=False) 214 | 215 | if freeze: 216 | logger.info("Freeze ResNet-152 parameters") 217 | for param in model.parameters(): 218 | param.requires_grad = False 219 | 220 | return model 221 | 222 | 223 | resnets = {'resnet18': resnet18, 224 | 'resnet34': resnet34, 225 | 'resnet50': resnet50, 226 | 'resnet101': resnet101, 227 | 'resnet152': resnet152} 228 | 229 | 230 | if __name__ == '__main__': 231 | resnet = resnet50(pretrained=True, 232 | replace_stride_with_dilation=None) 233 | dilated_resnet = resnet50( 234 | pretrained=True, 235 | replace_stride_with_dilation=[False, True, True]) 236 | device = torch.device('cuda') 237 | resnet = resnet.to(device=device) 238 | dilated_resnet = dilated_resnet.to(device=device) 239 | 240 | B, C, H, W = 32, 3, 256, 256 241 | 242 | images = torch.zeros([B, C, H, W], dtype=torch.float32, device=device) 243 | 244 | output = resnet(images)['layer4'] 245 | logger.info(f'No dilation: {output.shape}') 246 | 247 | output = dilated_resnet(images)['layer4'] 248 | logger.info(f'With dilation: {output.shape}') 249 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/backbone/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import torch 12 | import torch.nn as nn 13 | 14 | from torchvision.models import resnet 15 | 16 | class ProjectionHead(nn.Module): 17 | def __init__(self, projection_head_input_dim, projection_head_hidden_dim, output_dim): 18 | super(ProjectionHead, self).__init__() 19 | self.head = nn.Sequential( 20 | nn.Linear( 21 | projection_head_input_dim, 22 | projection_head_hidden_dim, 23 | bias=True, 24 | ), 25 | nn.BatchNorm1d(projection_head_hidden_dim), 26 | nn.ReLU(), 27 | nn.Linear( 28 | projection_head_hidden_dim, 29 | output_dim, 30 | bias=False, 31 | ), 32 | ) 33 | 34 | def forward(self, x): 35 | return self.head(x) 36 | 37 | def make_conv_layer(input_dim, cfg): 38 | num_layers = cfg.get('num_layers') 39 | num_filters = cfg.num_filters 40 | 41 | expansion = resnet.Bottleneck.expansion 42 | 43 | layers = [] 44 | for i in range(num_layers): 45 | downsample = nn.Conv2d(input_dim, num_filters, stride=1, 46 | kernel_size=1, bias=False) 47 | 48 | layers.append( 49 | resnet.Bottleneck(input_dim, num_filters // expansion, 50 | downsample=downsample) 51 | ) 52 | input_dim = num_filters 53 | return nn.Sequential(*layers) 54 | 55 | 56 | def make_subsample_layers(input_dim, cfg): 57 | num_filters = cfg.get('num_filters') 58 | strides = cfg.get('strides') 59 | kernel_sizes = cfg.get('kernel_sizes') 60 | 61 | param_desc = zip(num_filters, kernel_sizes, strides) 62 | layers = [] 63 | for out_dim, kernel_size, stride in param_desc: 64 | layers.append( 65 | ConvNormActiv( 66 | input_dim, 67 | out_dim, 68 | kernel_size=kernel_size, 69 | stride=stride, 70 | **cfg, 71 | ) 72 | ) 73 | input_dim = out_dim 74 | return nn.Sequential(*layers), out_dim 75 | 76 | 77 | def make_projection_head(projection_head_input_dim, projection_head_hidden_dim, output_dim) -> ProjectionHead: 78 | return ProjectionHead(projection_head_input_dim, projection_head_hidden_dim, output_dim) 79 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/backbone/vgg19.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/body_models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from .utils import transform_mat 12 | 13 | from .hand_models import MANO 14 | 15 | from .build import ( 16 | build_hand_model, 17 | build_hand_texture, 18 | ) 19 | from .utils import KeypointTensor, find_joint_kin_chain 20 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/body_models/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from typing import Union 12 | import os.path as osp 13 | from omegaconf import DictConfig 14 | 15 | from loguru import logger 16 | from .hand_models import MANO, HTML 17 | 18 | 19 | def build_hand_model(body_model_cfg: DictConfig) -> Union[MANO]: 20 | model_type = body_model_cfg.get('type', 'mano') 21 | model_folder = osp.expandvars( 22 | body_model_cfg.get('model_folder', 'data/models')) 23 | 24 | is_right = body_model_cfg.get('is_right', True) 25 | vertex_ids_path = body_model_cfg.get('vertex_ids_path', '') 26 | curr_model_cfg = body_model_cfg.get(model_type, {}) 27 | logger.debug(f'Building {model_type.upper()} body model') 28 | if model_type.lower() == 'mano': 29 | model_key = 'mano' 30 | model = MANO 31 | else: 32 | raise ValueError(f'Unknown model type {model_type}, exiting!') 33 | 34 | model_path = osp.join(model_folder, model_key) 35 | return model(model_folder=model_path, 36 | vertex_ids_path=vertex_ids_path, 37 | is_right=is_right, 38 | **curr_model_cfg) 39 | 40 | 41 | def build_hand_texture(body_model_cfg: DictConfig): 42 | ''' Factory function for the head model 43 | ''' 44 | model_type = body_model_cfg.get('type', 'flame') 45 | model_folder = osp.expandvars( 46 | body_model_cfg.get('model_folder', 'data/models')) 47 | 48 | curr_model_cfg = body_model_cfg.get(model_type, {}) 49 | 50 | texture_cfg = curr_model_cfg.get('texture', {}) 51 | 52 | logger.debug(f'Building {model_type.upper()} body model') 53 | model_path = osp.join(model_folder, model_type) 54 | if model_type.lower() == 'mano': 55 | model_key = 'mano' 56 | model = HTML 57 | else: 58 | raise ValueError(f'Unknown model type {model_type}, exiting!') 59 | return model(**texture_cfg) 60 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/body_models/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from typing import List, Optional, Any 12 | import os.path as osp 13 | import yaml 14 | 15 | import numpy as np 16 | import torch 17 | import torch.nn as nn 18 | import torch.nn.functional as F 19 | 20 | from TempCLR.utils import Tensor, StringList 21 | 22 | 23 | def transform_mat(R: Tensor, t: Tensor) -> Tensor: 24 | ''' Creates a batch of transformation matrices 25 | Args: 26 | - R: Bx3x3 array of a batch of rotation matrices 27 | - t: Bx3x1 array of a batch of translation vectors 28 | Returns: 29 | - T: Bx4x4 Transformation matrix 30 | ''' 31 | # No padding left or right, only add an extra row 32 | return torch.cat([F.pad(R, [0, 0, 0, 1], value=0.0), 33 | F.pad(t, [0, 0, 0, 1], value=1.0)], dim=2) 34 | 35 | 36 | def find_joint_kin_chain(joint_id: int, kinematic_tree: List) -> List: 37 | kin_chain = [] 38 | curr_idx = joint_id 39 | while curr_idx != -1: 40 | kin_chain.append(curr_idx) 41 | curr_idx = kinematic_tree[curr_idx] 42 | return kin_chain 43 | 44 | 45 | def to_tensor(array, dtype=torch.float32) -> Tensor: 46 | if not torch.is_tensor(array): 47 | return torch.tensor(array, dtype=dtype) 48 | else: 49 | return array.to(dtype=dtype) 50 | 51 | 52 | class JointsFromVerticesSelector(nn.Module): 53 | 54 | def _forward_unimplemented(self, *input: Any) -> None: 55 | pass 56 | 57 | def __init__( 58 | self, 59 | face_ids: Optional[List] = None, 60 | bcs: Optional[List] = None, 61 | names: Optional[StringList] = None, 62 | fname: str = None, 63 | **kwargs 64 | ) -> None: 65 | ''' Selects extra joints from vertices 66 | ''' 67 | super(JointsFromVerticesSelector, self).__init__() 68 | 69 | err_msg = ( 70 | 'Either pass a filename or triangle face ids, names and' 71 | ' barycentrics') 72 | assert fname is not None or ( 73 | face_ids is not None and bcs is not None and names is not None 74 | ), err_msg 75 | if fname is not None: 76 | fname = osp.expanduser(osp.expandvars(fname)) 77 | with open(fname, 'r') as f: 78 | data = yaml.load(f, Loader=yaml.FullLoader) 79 | names = list(data.keys()) 80 | bcs = [] 81 | face_ids = [] 82 | for name, d in data.items(): 83 | face_ids.append(d['face']) 84 | bcs.append(d['bc']) 85 | bcs = np.array(bcs, dtype=np.float32) 86 | face_ids = np.array(face_ids, dtype=np.int32) 87 | assert len(bcs) == len(face_ids), ( 88 | 'The number of barycentric coordinates must be equal to the faces' 89 | ) 90 | assert len(names) == len(face_ids), ( 91 | 'The number of names must be equal to the number of ' 92 | ) 93 | 94 | self.names = names 95 | self.register_buffer('bcs', torch.tensor(bcs, dtype=torch.float32)) 96 | self.register_buffer( 97 | 'face_ids', torch.tensor(face_ids, dtype=torch.long)) 98 | 99 | def as_tensor( 100 | self, 101 | num_vertices: int, 102 | faces: Tensor 103 | ) -> Tensor: 104 | ''' Builds a linear regression matrix for the extra joints 105 | ''' 106 | # Get the number of extra joints 107 | num_extra_joints = len(self.names) 108 | output = torch.zeros([num_extra_joints, num_vertices]) 109 | # Get the indices of the vertices we use 110 | vertex_ids = faces[self.face_ids] 111 | for ii, vids in enumerate(vertex_ids): 112 | # Assign the barycentric weight of each point 113 | output[ii, vids] = self.bcs[ii] 114 | return output 115 | 116 | def extra_joint_names(self) -> StringList: 117 | ''' Returns the names of the extra joints 118 | ''' 119 | return self.names 120 | 121 | def forward( 122 | self, 123 | vertices: Tensor, 124 | faces: Tensor 125 | ) -> Tensor: 126 | if len(self.face_ids) < 1: 127 | return [] 128 | vertex_ids = faces[self.face_ids].reshape(-1) 129 | # Should be BxNx3x3 130 | triangles = torch.index_select(vertices, 1, vertex_ids).reshape( 131 | -1, len(self.bcs), 3, 3) 132 | return (triangles * self.bcs[None, :, :, None]).sum(dim=2) 133 | 134 | 135 | class KeypointTensor(object): 136 | def __init__(self, data, 137 | source='smplx', 138 | keypoint_names=None, 139 | connections=None, 140 | part_connections=None, 141 | part_indices=None, 142 | **kwargs): 143 | ''' A keypoint wrapper with keypoint_names 144 | ''' 145 | if isinstance(data, (KeypointTensor,)): 146 | data = data._t 147 | self._t = torch.as_tensor(data, **kwargs) 148 | self._source = source 149 | self._keypoint_names = keypoint_names 150 | self._connections = connections 151 | self._part_indices = part_indices 152 | self._part_connections = part_connections 153 | 154 | @staticmethod 155 | def from_obj(tensor, obj): 156 | return KeypointTensor(tensor, source=obj.source, 157 | keypoint_names=obj.keypoint_names, 158 | connections=obj.connections, 159 | part_indices=obj.part_indices, 160 | part_connections=obj.part_connections) 161 | 162 | @property 163 | def source(self): 164 | return self._source 165 | 166 | @property 167 | def keypoint_names(self): 168 | return self._keypoint_names 169 | 170 | @property 171 | def connections(self): 172 | return self._connections 173 | 174 | @property 175 | def part_indices(self): 176 | return self._part_indices 177 | 178 | @property 179 | def part_connections(self): 180 | return self._part_connections 181 | 182 | def __repr__(self) -> str: 183 | return f'KeypointTensor:\n{self._t}' 184 | 185 | def __getitem__(self, key): 186 | return self._t[key] 187 | return KeypointTensor( 188 | self._t[key], keypoint_names=self._keypoint_names, 189 | source=self.source, 190 | connections=self.connections, 191 | part_indices=self._part_indices, 192 | part_connections=self._part_connections, 193 | ) 194 | 195 | def __getattribute__(self, name): 196 | tensor = super(KeypointTensor, self).__getattribute__('_t') 197 | if hasattr(tensor, name): 198 | # If the tensor has a member function with name `name` then call it 199 | func = getattr(tensor, name) 200 | if 'numpy' in name: 201 | return lambda: self._t.numpy() 202 | elif callable(func): 203 | return lambda *args, **kwargs: KeypointTensor( 204 | func(*args, **kwargs), 205 | source=self.source, 206 | keypoint_names=self._keypoint_names, 207 | connections=self._connections, 208 | part_indices=self._part_indices, 209 | part_connections=self._part_connections, 210 | ) 211 | else: 212 | return getattr(self._t, name) 213 | else: 214 | output = super(KeypointTensor, self).__getattribute__(name) 215 | return output 216 | 217 | def __torch_function__(self, func, types, args=(), kwargs=None): 218 | if kwargs is None: 219 | kwargs = {} 220 | args = [a._t if hasattr(a, '_t') else a for a in args] 221 | ret = func(*args, **kwargs) 222 | if torch.is_tensor(ret): 223 | return KeypointTensor(ret, 224 | source=self.source, 225 | keypoint_names=self._keypoint_names, 226 | connections=self._connections, 227 | part_indices=self._part_indices, 228 | part_connections=self._part_connections, 229 | ) 230 | else: 231 | return ret 232 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from typing import Dict 12 | 13 | import numpy as np 14 | import torch.nn as nn 15 | from loguru import logger 16 | 17 | from .hand_heads import build_hand_head, HAND_HEAD_REGISTRY 18 | 19 | 20 | def build_model(exp_cfg) -> Dict[str, nn.Module]: 21 | network_cfg = exp_cfg.get('network', {}) 22 | net_type = network_cfg.get('type', 'TempCLR') 23 | 24 | logger.info(f'Going to build a: {net_type}') 25 | if net_type in HAND_HEAD_REGISTRY: 26 | network = build_hand_head(exp_cfg) 27 | else: 28 | raise ValueError(f'Unknown network type: {net_type}') 29 | 30 | return { 31 | 'network': network 32 | } 33 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/camera/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from .build import build_camera_head 12 | from .camera_projection import ( 13 | build_cam_proj, DEFAULT_FOCAL_LENGTH, CameraParams) 14 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/camera/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import numpy as np 12 | 13 | import torch 14 | import torch.nn as nn 15 | import torch.nn.functional as F 16 | 17 | # from .camera_head import CameraHead 18 | from .camera_projection import build_cam_proj 19 | 20 | 21 | def build_camera_head(cfg, feat_dim): 22 | return CameraHead(cfg, feat_dim) 23 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/camera/camera_projection.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import sys 12 | 13 | import numpy as np 14 | 15 | import torch 16 | import torch.nn as nn 17 | import torch.nn.functional as F 18 | 19 | from loguru import logger 20 | from TempCLR.models.body_models import KeypointTensor 21 | 22 | DEFAULT_FOCAL_LENGTH = 5000 23 | 24 | 25 | class CameraParams(object): 26 | attributes = ['translation', 'rotation', 'scale', 'focal_length'] 27 | 28 | KEYS = ['translation', 'rotation', 'scale', 'focal_length'] 29 | 30 | def __init__(self, translation=None, rotation=None, scale=None, 31 | focal_length=None): 32 | super(CameraParams, self).__init__() 33 | 34 | self.translation = translation 35 | self.rotation = rotation 36 | self.scale = scale 37 | self.focal_length = focal_length 38 | 39 | def keys(self): 40 | return [key for key in self.KEYS 41 | if getattr(self, key) is not None] 42 | 43 | def get(self, key, default=None): 44 | return getattr(self, key, default) 45 | 46 | def __getitem__(self, key): 47 | return getattr(self, key) 48 | 49 | 50 | def build_cam_proj(camera_cfg, dtype=torch.float32): 51 | camera_type = camera_cfg.get('type', 'weak-persp') 52 | camera_pos_scale = camera_cfg.get('pos_func') 53 | if camera_pos_scale == 'softplus': 54 | camera_scale_func = F.softplus 55 | elif camera_pos_scale == 'exp': 56 | camera_scale_func = torch.exp 57 | elif camera_pos_scale == 'none' or camera_pos_scale == 'None': 58 | def func(x): 59 | return x 60 | camera_scale_func = func 61 | else: 62 | raise ValueError( 63 | f'Unknown positive scaling function: {camera_pos_scale}') 64 | 65 | if camera_type.lower() == 'persp': 66 | if camera_pos_scale == 'softplus': 67 | mean_flength = np.log(np.exp(DEFAULT_FOCAL_LENGTH) - 1) 68 | elif camera_pos_scale == 'exp': 69 | mean_flength = np.log(DEFAULT_FOCAL_LENGTH) 70 | elif camera_pos_scale == 'none': 71 | mean_flength = DEFAULT_FOCAL_LENGTH 72 | camera = PerspectiveCamera(dtype=dtype) 73 | camera_mean = torch.tensor( 74 | [mean_flength, 0.0, 0.0], dtype=torch.float32) 75 | camera_param_dim = 4 76 | elif camera_type.lower() == 'weak-persp': 77 | weak_persp_cfg = camera_cfg.get('weak_persp', {}) 78 | mean_scale = weak_persp_cfg.get('mean_scale', 0.9) 79 | if camera_pos_scale == 'softplus': 80 | mean_scale = np.log(np.exp(mean_scale) - 1) 81 | elif camera_pos_scale == 'exp': 82 | mean_scale = np.log(mean_scale) 83 | camera_mean = torch.tensor([mean_scale, 0.0, 0.0], dtype=torch.float32) 84 | camera = WeakPerspectiveCamera(dtype=dtype) 85 | camera_param_dim = 3 86 | else: 87 | raise ValueError(f'Unknown camera type: {camera_type}') 88 | 89 | return { 90 | 'camera': camera, 91 | 'mean': camera_mean, 92 | 'scale_func': camera_scale_func, 93 | 'dim': camera_param_dim 94 | } 95 | 96 | 97 | class PerspectiveCamera(nn.Module): 98 | ''' Module that implements a perspective camera 99 | ''' 100 | 101 | FOCAL_LENGTH = DEFAULT_FOCAL_LENGTH 102 | 103 | def __init__(self, dtype=torch.float32, focal_length=None, **kwargs): 104 | super(PerspectiveCamera, self).__init__() 105 | self.dtype = dtype 106 | 107 | if focal_length is None: 108 | focal_length = self.FOCAL_LENGTH 109 | # Make a buffer so that PyTorch does not complain when creating 110 | # the camera matrix 111 | self.register_buffer('focal_length', 112 | torch.tensor(focal_length, dtype=dtype)) 113 | 114 | def forward(self, points, focal_length=None, translation=None, 115 | rotation=None, camera_center=None, **kwargs): 116 | ''' Forward pass for the perspective camera 117 | 118 | Parameters 119 | ---------- 120 | points: torch.tensor, BxNx3 121 | The tensor that contains the points that will be projected. 122 | If not in homogeneous coordinates, then 123 | focal_length: torch.tensor, BxNx3, optional 124 | The predicted focal length of the camera. If not given, 125 | then the default value of 5000 is assigned 126 | translation: torch.tensor, Bx3, optional 127 | The translation predicted for each element in the batch. If 128 | not given then a zero translation vector is assumed 129 | rotation: torch.tensor, Bx3x3, optional 130 | The rotation predicted for each element in the batch. If 131 | not given then an identity rotation matrix is assumed 132 | camera_center: torch.tensor, Bx2, optional 133 | The center of each image for the projection. If not given, 134 | then a zero vector is used 135 | Returns 136 | ------- 137 | Returns a torch.tensor object with size BxNx2 with the 138 | location of the projected points on the image plane 139 | ''' 140 | 141 | device = points.device 142 | batch_size = points.shape[0] 143 | 144 | if rotation is None: 145 | rotation = torch.eye( 146 | 3, dtype=points.dtype, device=device).unsqueeze(dim=0).expand( 147 | batch_size, -1, -1) 148 | if translation is None: 149 | translation = torch.zeros( 150 | [3], dtype=points.dtype, 151 | device=device).unsqueeze(dim=0).expand(batch_size, -11) 152 | 153 | if camera_center is None: 154 | camera_center = torch.zeros([batch_size, 2], dtype=points.dtype, 155 | device=device) 156 | 157 | with torch.no_grad(): 158 | camera_mat = torch.zeros([batch_size, 2, 2], 159 | dtype=self.dtype, device=points.device) 160 | if focal_length is None: 161 | focal_length = self.focal_length 162 | 163 | camera_mat[:, 0, 0] = focal_length 164 | camera_mat[:, 1, 1] = focal_length 165 | 166 | points_transf = torch.einsum( 167 | 'bji,bmi->bmj', 168 | rotation, points) + translation.unsqueeze(dim=1) 169 | 170 | img_points = torch.div(points_transf[:, :, :2], 171 | points_transf[:, :, 2].unsqueeze(dim=-1)) 172 | img_points = torch.einsum( 173 | 'bmi,bji->bjm', 174 | camera_mat, img_points) + camera_center.reshape(-1, 1, 2) 175 | return img_points 176 | 177 | 178 | class WeakPerspectiveCamera(nn.Module): 179 | ''' Scaled Orthographic / Weak-Perspective Camera 180 | ''' 181 | 182 | def __init__(self, **kwargs): 183 | super(WeakPerspectiveCamera, self).__init__() 184 | 185 | def forward(self, points, scale, translation, **kwargs): 186 | ''' Implements the forward pass for a Scaled Orthographic Camera 187 | 188 | Parameters 189 | ---------- 190 | points: torch.tensor, BxNx3 191 | The tensor that contains the points that will be projected. 192 | If not in homogeneous coordinates, then 193 | scale: torch.tensor, Bx1 194 | The predicted scaling parameters 195 | translation: torch.tensor, Bx2 196 | The translation applied on the image plane to the points 197 | Returns 198 | ------- 199 | projected_points: torch.tensor, BxNx2 200 | The points projected on the image plane, according to the 201 | given scale and translation 202 | ''' 203 | assert translation.shape[-1] == 2, 'Translation shape must be -1x2' 204 | assert scale.shape[-1] == 1, 'Scale shape must be -1x1' 205 | 206 | projected_points = scale.view(-1, 1, 1) * ( 207 | points[:, :, :2] + translation.view(-1, 1, 2)) 208 | if (type(projected_points) != type(points) and isinstance( 209 | points, (KeypointTensor,))): 210 | projected_points = KeypointTensor.from_obj( 211 | projected_points, points) 212 | return projected_points 213 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/common/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/common/rigid_alignment.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from typing import NewType 12 | 13 | import torch 14 | import torch.nn as nn 15 | 16 | from loguru import logger 17 | 18 | 19 | Tensor = NewType('Tensor', torch.tensor) 20 | 21 | 22 | class RotationTranslationAlignment(nn.Module): 23 | def __init__(self) -> None: 24 | ''' Implements rotation and translation alignment with least squares 25 | 26 | For more information see: 27 | 28 | Least-Squares Rigid Motion Using SVD 29 | Olga Sorkine-Hornung and Michael Rabinovich 30 | 31 | ''' 32 | super(RotationTranslationAlignment, self).__init__() 33 | 34 | def forward( 35 | self, 36 | p: Tensor, 37 | q: Tensor) -> Tensor: 38 | ''' Aligns two point clouds using the optimal R, T 39 | 40 | Parameters 41 | ---------- 42 | p: BxNx3, torch.Tensor 43 | The first of points 44 | q: BxNx3, torch.Tensor 45 | 46 | Returns 47 | ------- 48 | p_hat: BxNx3, torch.Tensor 49 | The points p after least squares alignment to q 50 | ''' 51 | batch_size = p.shape[0] 52 | dtype = p.dtype 53 | device = p.device 54 | 55 | p_transpose = p.transpose(1, 2) 56 | q_transpose = q.transpose(1, 2) 57 | 58 | # 1. Remove mean. 59 | p_mean = torch.mean(p_transpose, dim=-1, keepdim=True) 60 | q_mean = torch.mean(q_transpose, dim=-1, keepdim=True) 61 | 62 | p_centered = p_transpose - p_mean 63 | q_centered = q_transpose - q_mean 64 | 65 | # 2. Compute variance of X1 used for scale. 66 | var_p = torch.sum(p_centered.pow(2), dim=(1, 2), keepdim=True) 67 | # var_q = torch.sum(q_centered.pow(2), dim=(1, 2), keepdim=True) 68 | 69 | # Compute the outer product of the two point sets 70 | # Should be Bx3x3 71 | K = torch.bmm(p_centered, q_centered.transpose(1, 2)) 72 | # Apply SVD on the outer product matrix to recover the rotation 73 | U, S, V = torch.svd(K) 74 | 75 | # Make sure that the computed rotation does not contain a reflection 76 | Z = torch.eye(3, dtype=dtype, device=device).view( 77 | 1, 3, 3).expand(batch_size, -1, -1).contiguous() 78 | 79 | raw_product = torch.bmm(U, V.transpose(1, 2)) 80 | Z[:, -1, -1] *= torch.sign(torch.det(raw_product)) 81 | 82 | # Compute the final rotation matrix 83 | rotation = torch.bmm(V, torch.bmm(Z, U.transpose(1, 2))) 84 | 85 | scale = torch.einsum('bii->b', [torch.bmm(rotation, K)]) / var_p.view( 86 | -1) 87 | 88 | # Compute the translation vector 89 | translation = q_mean - scale.reshape(batch_size, 1, 1) * torch.bmm( 90 | rotation, p_mean) 91 | 92 | return ( 93 | scale.reshape(batch_size, 1, 1) * 94 | torch.bmm(rotation, p_transpose) + 95 | translation).transpose(1, 2) 96 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/hand_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from .build import build as build_hand_head 12 | from .hand_heads import * 13 | from .registry import HAND_HEAD_REGISTRY 14 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/hand_heads/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from .hand_heads import HAND_HEAD_REGISTRY 12 | 13 | 14 | def build(exp_cfg): 15 | network_cfg = exp_cfg.get('network', {}) 16 | hand_cfg = exp_cfg.get('hand_model', {}) 17 | batch_size = exp_cfg.datasets.hand.batch_size 18 | network_type = network_cfg.get('type', 'MANORegressor') 19 | if network_type == 'MANORegressor': 20 | loss_cfg = exp_cfg.get('losses', {}).get('hand', {}) 21 | network_cfg = network_cfg.get('hand', {}) 22 | encoder_cfg = exp_cfg.get('losses', {}).get('encoder', {}) 23 | temporal_backbone_cfg = network_cfg.get('temporal_backbone', {}) 24 | 25 | elif network_type == 'MANOGroupRegressor': 26 | loss_cfg = exp_cfg.get('losses', {}).get('hand', {}) 27 | network_cfg = network_cfg.get('hand', {}) 28 | encoder_cfg = exp_cfg.get('losses', {}).get('encoder', {}) 29 | 30 | else: 31 | raise ValueError(f'Unknown network type: {network_type}') 32 | return HAND_HEAD_REGISTRY.get(network_type)( 33 | hand_cfg, network_cfg=network_cfg, loss_cfg=loss_cfg, encoder_cfg=encoder_cfg, 34 | temporal_backbone_cfg=temporal_backbone_cfg, batch_size=batch_size) 35 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/hand_heads/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from fvcore.common.registry import Registry 12 | 13 | 14 | HAND_HEAD_REGISTRY = Registry('HAND_HEAD_REGISTRY') 15 | HAND_HEAD_REGISTRY.__doc__ = """ 16 | Registry for the hand prediction heads, which predict a 3D head/face 17 | from a single image. 18 | """ 19 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/nnutils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from .init_layer import init_weights 12 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/nnutils/init_layer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.init as nninit 14 | 15 | from loguru import logger 16 | 17 | 18 | def init_weights(layer, 19 | name='', 20 | init_type='xavier', distr='uniform', 21 | gain=1.0, 22 | activ_type='leaky-relu', lrelu_slope=0.01, **kwargs): 23 | if len(name) < 1: 24 | name = str(layer) 25 | logger.info( 26 | 'Initializing {} with {}_{}: gain={}', name, init_type, distr, gain) 27 | weights = layer.weight 28 | if init_type == 'xavier': 29 | if distr == 'uniform': 30 | nninit.xavier_uniform_(weights, gain=gain) 31 | elif distr == 'normal': 32 | nninit.xavier_normal_(weights, gain=gain) 33 | else: 34 | raise ValueError( 35 | 'Unknown distribution "{}" for Xavier init'.format(distr)) 36 | elif init_type == 'kaiming': 37 | 38 | activ_type = activ_type.replace('-', '_') 39 | if distr == 'uniform': 40 | nninit.kaiming_uniform_(weights, a=lrelu_slope, 41 | nonlinearity=activ_type) 42 | elif distr == 'normal': 43 | nninit.kaiming_normal_(weights, a=lrelu_slope, 44 | nonlinearity=activ_type) 45 | else: 46 | raise ValueError( 47 | 'Unknown distribution "{}" for Kaiming init'.format(distr)) 48 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/rendering/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from .renderer import SRenderY 12 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/models/rendering/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import numpy as np 12 | import torch 13 | import torch.nn.functional as F 14 | 15 | from TempCLR.utils import Array, Tensor 16 | 17 | 18 | def face_vertices( 19 | vertices: Tensor, 20 | faces: Tensor 21 | ) -> Tensor: 22 | """ Returns the triangles formed by the input vertices and faces 23 | 24 | Source: https://github.com/daniilidis-group/neural_renderer/ 25 | 26 | :param vertices: [batch size, number of vertices, 3] 27 | :param faces: [batch size, number of faces, 3] 28 | :return: [batch size, number of faces, 3, 3] 29 | """ 30 | assert (vertices.ndimension() == 3) 31 | assert (faces.ndimension() == 3) 32 | assert (vertices.shape[0] == faces.shape[0]) 33 | assert (vertices.shape[2] == 3) 34 | assert (faces.shape[2] == 3) 35 | 36 | bs, nv = vertices.shape[:2] 37 | bs, nf = faces.shape[:2] 38 | device = vertices.device 39 | faces = faces + ( 40 | torch.arange(bs, dtype=torch.int32).to(device) * nv)[:, None, None] 41 | vertices = vertices.reshape((bs * nv, 3)) 42 | # pytorch only supports long and byte tensors for indexing 43 | return vertices[faces.long()] 44 | 45 | 46 | def dict2obj(d): 47 | # if isinstance(d, list): 48 | # d = [dict2obj(x) for x in d] 49 | if not isinstance(d, dict): 50 | return d 51 | 52 | class C(object): 53 | pass 54 | 55 | o = C() 56 | for k in d: 57 | o.__dict__[k] = dict2obj(d[k]) 58 | return o 59 | 60 | 61 | # ---------------------------- process/generate vertices, normals, faces 62 | def generate_triangles(h, w, mask=None): 63 | # quad layout: 64 | # 0 1 ... w-1 65 | # w w+1 66 | # . 67 | # w*h 68 | triangles = [] 69 | margin = 0 70 | for x in range(margin, w - 1 - margin): 71 | for y in range(margin, h - 1 - margin): 72 | triangle0 = [y * w + x, y * w + x + 1, (y + 1) * w + x] 73 | triangle1 = [y * w + x + 1, (y + 1) * w + x + 1, (y + 1) * w + x] 74 | triangles.append(triangle0) 75 | triangles.append(triangle1) 76 | triangles = np.array(triangles) 77 | triangles = triangles[:, [0, 2, 1]] 78 | return triangles 79 | 80 | 81 | def vertex_normals( 82 | vertices: Tensor, 83 | faces: Tensor 84 | ) -> Tensor: 85 | """ 86 | :param vertices: [batch size, number of vertices, 3] 87 | :param faces: [batch size, number of faces, 3] 88 | :return: [batch size, number of vertices, 3] 89 | """ 90 | assert (vertices.ndimension() == 3) 91 | assert (faces.ndimension() == 3) 92 | assert (vertices.shape[0] == faces.shape[0]) 93 | assert (vertices.shape[2] == 3) 94 | assert (faces.shape[2] == 3) 95 | bs, nv = vertices.shape[:2] 96 | bs, nf = faces.shape[:2] 97 | device = vertices.device 98 | normals = torch.zeros(bs * nv, 3, dtype=vertices.dtype, device=device) 99 | 100 | faces = faces + ( 101 | torch.arange(bs, dtype=torch.int32).to(device) * nv)[:, None, None] 102 | vertices_faces = vertices.reshape((bs * nv, 3))[faces.long()] 103 | 104 | faces = faces.reshape(-1, 3) 105 | vertices_faces = vertices_faces.reshape(-1, 3, 3) 106 | 107 | normals.index_add_( 108 | 0, faces[:, 1].long(), 109 | torch.cross( 110 | vertices_faces[:, 2] - vertices_faces[:, 1], 111 | vertices_faces[:, 0] - vertices_faces[:, 1]) 112 | ) 113 | normals.index_add_( 114 | 0, faces[:, 2].long(), 115 | torch.cross( 116 | vertices_faces[:, 0] - vertices_faces[:, 2], 117 | vertices_faces[:, 1] - vertices_faces[:, 2]) 118 | ) 119 | normals.index_add_( 120 | 0, faces[:, 0].long(), 121 | torch.cross( 122 | vertices_faces[:, 1] - vertices_faces[:, 0], 123 | vertices_faces[:, 2] - vertices_faces[:, 0]) 124 | ) 125 | 126 | normals = F.normalize(normals, eps=1e-6, dim=1) 127 | normals = normals.reshape((bs, nv, 3)) 128 | # pytorch only supports long and byte tensors for indexing 129 | return normals 130 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from .typing import * 12 | from .np_utils import * 13 | from .timer import Timer 14 | from .bool_utils import nand 15 | from .img_utils import read_img 16 | from .cfg_utils import cfg_to_dict 17 | from .plot_utils import ( 18 | create_skel_img, 19 | keyp_target_to_image, 20 | create_bbox_img, 21 | COLORS, 22 | OverlayRenderer, 23 | HDRenderer, 24 | undo_img_normalization, 25 | GTRenderer) 26 | from .torch_utils import tensor_scalar_dict_to_float 27 | from .rotation_utils import batch_rodrigues, batch_rot2aa, rot_mat_to_euler 28 | from .data_structs import Struct 29 | from .metrics import build_alignment, point_error, PointError 30 | from .checkpointer import Checkpointer 31 | from .transf_utils import get_transform, crop 32 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/utils/bool_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | def nand(x: bool, y: bool) -> bool: 12 | return not (x and y) 13 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/utils/cfg_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from loguru import logger 12 | from yacs.config import CfgNode as CN 13 | from omegaconf import OmegaConf 14 | 15 | BUILTINS = [list, dict, tuple, set, str, int, float, bool] 16 | 17 | 18 | def cfg_to_dict(cfg_node): 19 | if isinstance(cfg_node, (CN,)): 20 | return yacs_cfg_to_dict(cfg_node) 21 | elif OmegaConf.is_config(cfg_node): 22 | return OmegaConf.to_container(cfg_node) 23 | else: 24 | raise ValueError(f'Unknown object type: {type(cfg_node)}') 25 | 26 | 27 | def yacs_cfg_to_dict(cfg_node): 28 | if type(cfg_node) in BUILTINS: 29 | return cfg_node 30 | else: 31 | curr_dict = dict(cfg_node) 32 | for key, val in curr_dict.items(): 33 | curr_dict[key] = cfg_to_dict(val) 34 | return curr_dict 35 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/utils/checkpointer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import sys 12 | 13 | import os 14 | import os.path as osp 15 | 16 | import torch 17 | 18 | from loguru import logger 19 | 20 | 21 | class Checkpointer(object): 22 | def __init__(self, model, optimizer=None, scheduler=None, 23 | adv_optimizer=None, 24 | pretrained='', 25 | distributed=False, 26 | rank=0, 27 | save_dir='/tmp/exp', 28 | backup_dir='/tmp/exp'): 29 | self.rank = rank 30 | self.distributed = distributed 31 | 32 | self.model = model 33 | self.optimizer = optimizer 34 | self.scheduler = scheduler 35 | self.adv_optimizer = adv_optimizer 36 | 37 | self.save_dir = save_dir 38 | self.backup_dir = backup_dir 39 | if self.rank == 0: 40 | logger.info(f'Creating directory {self.save_dir}') 41 | os.makedirs(self.save_dir, exist_ok=True) 42 | self.pretrained = pretrained 43 | 44 | def save_checkpoint(self, name, **kwargs): 45 | if self.rank > 0: 46 | return 47 | ckpt_data = {} 48 | ckpt_data['model'] = self.model.state_dict() 49 | 50 | if self.optimizer is not None: 51 | logger.info('Adding optimizer state ...') 52 | ckpt_data['optimizer'] = self.optimizer.state_dict() 53 | if self.scheduler is not None: 54 | logger.info('Adding scheduler state ...') 55 | ckpt_data['scheduler'] = self.scheduler.state_dict() 56 | 57 | ckpt_data.update(kwargs) 58 | 59 | curr_ckpt_fn = osp.join(self.save_dir, name) 60 | logger.info('Saving checkpoint to {}'.format(curr_ckpt_fn)) 61 | try: 62 | torch.save(ckpt_data, curr_ckpt_fn) 63 | with open(osp.join(self.save_dir, 'latest_checkpoint'), 'w') as f: 64 | f.write(curr_ckpt_fn) 65 | except OSError: 66 | curr_ckpt_fn = osp.join(self.backup_dir, name) 67 | logger.warning("Saving checkpoints on backup path ") 68 | with open(osp.join(self.backup_dir, 'latest_checkpoint'), 'w') as f: 69 | f.write(curr_ckpt_fn) 70 | ckpt_data.clear() 71 | 72 | def load_checkpoint(self): 73 | save_fn = osp.join(self.save_dir, 'latest_checkpoint') 74 | load_pretrained = False 75 | 76 | if not osp.exists(save_fn): 77 | # If no previous checkpoint exists, load from the pretrained model 78 | if len(self.pretrained) > 1: 79 | self.pretrained = osp.expandvars(self.pretrained) 80 | load_pretrained = True 81 | save_fn = osp.join( 82 | self.pretrained, 'checkpoints', 'latest_checkpoint') 83 | # If neither the pretrained model exists nor there is a previous 84 | # checkpoint then initialize from scratch 85 | if not osp.exists(save_fn): 86 | logger.warning(f'No checkpoint found in {self.save_dir}!') 87 | return {} 88 | 89 | logger.info('Load pretrained: {}', load_pretrained) 90 | with open(save_fn, 'r') as f: 91 | latest_ckpt_fn = f.read().strip() 92 | logger.warning(f'Loading checkpoint from {latest_ckpt_fn}!') 93 | 94 | if self.distributed: 95 | map_location = torch.device(f'cuda:{self.rank}') 96 | else: 97 | map_location = torch.device('cpu') 98 | ckpt_data = torch.load(latest_ckpt_fn, map_location=map_location) 99 | 100 | if load_pretrained: 101 | logger.info("Loading backbone only") 102 | ckpt_data['model'] = {k: v for k, v in ckpt_data["model"].items() if "backbone" in k} 103 | 104 | missing, unexpected = self.model.load_state_dict( 105 | ckpt_data['model'], strict=False) 106 | if len(missing) > 0: 107 | logger.warning( 108 | f'The following keys were not found: {missing}') 109 | if len(unexpected): 110 | logger.warning( 111 | f'The following keys were not expected: {unexpected}') 112 | 113 | if self.optimizer is not None and 'optimizer' in ckpt_data: 114 | if not load_pretrained: 115 | logger.warning('Loading optimizer data from: {}'.format( 116 | self.save_dir)) 117 | self.optimizer.load_state_dict(ckpt_data['optimizer']) 118 | 119 | if self.scheduler is not None and 'scheduler' in ckpt_data: 120 | if not load_pretrained: 121 | logger.warning('Loading scheduler data from: {}'.format( 122 | self.save_dir)) 123 | self.scheduler.load_state_dict(ckpt_data['scheduler']) 124 | 125 | if load_pretrained: 126 | ckpt_data['iteration'] = 0 127 | ckpt_data['epoch_number'] = 0 128 | 129 | return ckpt_data 130 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/utils/data_structs.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from dataclasses import make_dataclass, fields, field 12 | from loguru import logger 13 | 14 | 15 | class Struct(object): 16 | def __new__(cls, **kwargs): 17 | class_fields = [ 18 | [key, type(val), field(default=val)] 19 | for key, val in kwargs.items() 20 | ] 21 | 22 | object_type = make_dataclass( 23 | 'Struct', 24 | class_fields, 25 | namespace={ 26 | 'keys': lambda self: [f.name for f in fields(self)], 27 | }) 28 | return object_type() 29 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/utils/img_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import numpy as np 12 | import jpeg4py as jpeg 13 | import PIL.Image as pil_img 14 | 15 | from loguru import logger 16 | 17 | import cv2 18 | import PIL.ExifTags 19 | 20 | 21 | def read_img(img_fn, dtype=np.float32): 22 | if img_fn.endswith('jpeg') or img_fn.endswith('jpg'): 23 | try: 24 | with open(img_fn, 'rb') as f: 25 | img = jpeg.JPEG(f).decode() 26 | except jpeg.JPEGRuntimeError: 27 | logger.warning('{} produced a JPEGRuntimeError', img_fn) 28 | img = cv2.cvtColor(cv2.imread(img_fn), cv2.COLOR_BGR2RGB) 29 | try: 30 | exif_raw_dict = pil_img.open(img_fn)._getexif() 31 | if exif_raw_dict is not None: 32 | exif_data = { 33 | PIL.ExifTags.TAGS[k]: v 34 | for k, v in exif_raw_dict.items() 35 | if k in PIL.ExifTags.TAGS 36 | } 37 | orientation = exif_data.get('Orientation', None) 38 | if orientation is not None: 39 | if orientation == 1 or orientation == 0: 40 | # Normal image - nothing to do! 41 | pass 42 | elif orientation == 2: 43 | # Mirrored left to right 44 | img = np.fliplr(img) 45 | elif orientation == 3: 46 | # Rotated 180 degrees 47 | img = np.rot90(img, k=2) 48 | elif orientation == 4: 49 | # Mirrored top to bottom 50 | img = np.fliplr(np.rot90(img, k=2)) 51 | elif orientation == 5: 52 | # Mirrored along top-left diagonal 53 | img = np.fliplr(np.rot90(img, axes=(1, 0))) 54 | elif orientation == 6: 55 | # Rotated 90 degrees 56 | img = np.rot90(img, axes=(1, 0)) 57 | elif orientation == 7: 58 | # Mirrored along top-right diagonal 59 | img = np.fliplr(np.rot90(img)) 60 | elif orientation == 8: 61 | # Rotated 270 degrees 62 | img = np.rot90(img) 63 | else: 64 | raise NotImplementedError 65 | except SyntaxError: 66 | pass 67 | else: 68 | # elif img_fn.endswith('png') or img_fn.endswith('JPG') or img_fn.endswith(''): 69 | img = cv2.cvtColor(cv2.imread(img_fn), cv2.COLOR_BGR2RGB) 70 | if dtype == np.float32: 71 | if img.dtype == np.uint8: 72 | img = img.astype(dtype) / 255.0 73 | img = np.clip(img, 0, 1) 74 | return img 75 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/utils/np_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import numpy as np 12 | import open3d as o3d 13 | from .typing import Array 14 | 15 | __all__ = [ 16 | 'rel_change', 17 | 'binarize', 18 | 'max_grad_change', 19 | 'to_np', 20 | 'np2o3d_pcl', 21 | ] 22 | 23 | 24 | def rel_change(prev_val, curr_val): 25 | return (prev_val - curr_val) / max([np.abs(prev_val), np.abs(curr_val), 1]) 26 | 27 | 28 | def binarize( 29 | array: Array, 30 | thresh: float = -1, 31 | dtype: type = np.float32 32 | ) -> Array: 33 | if thresh > 0: 34 | return (array >= thresh).astype(dtype) 35 | else: 36 | return (array > 0).astype(dtype) 37 | 38 | 39 | def max_grad_change(grad_arr): 40 | return grad_arr.abs().max() 41 | 42 | 43 | def to_np(array, dtype=np.float32): 44 | if 'scipy.sparse' in str(type(array)): 45 | array = array.todense() 46 | return np.array(array, dtype=dtype) 47 | 48 | 49 | def np2o3d_pcl(x: np.ndarray) -> o3d.geometry.PointCloud: 50 | pcl = o3d.geometry.PointCloud() 51 | pcl.points = o3d.utility.Vector3dVector(x) 52 | 53 | return pcl 54 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/utils/rotation_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import torch 12 | from .typing import Tensor 13 | 14 | 15 | def batch_rodrigues(rot_vecs: Tensor, epsilon: float = 1e-8) -> Tensor: 16 | ''' Calculates the rotation matrices for a batch of rotation vectors 17 | Parameters 18 | ---------- 19 | rot_vecs: torch.tensor Nx3 20 | array of N axis-angle vectors 21 | Returns 22 | ------- 23 | R: torch.tensor Nx3x3 24 | The rotation matrices for the given axis-angle parameters 25 | ''' 26 | 27 | batch_size = rot_vecs.shape[0] 28 | device = rot_vecs.device 29 | dtype = rot_vecs.dtype 30 | 31 | angle = torch.norm(rot_vecs + epsilon, dim=1, keepdim=True, p=2) 32 | rot_dir = rot_vecs / angle 33 | 34 | cos = torch.unsqueeze(torch.cos(angle), dim=1) 35 | sin = torch.unsqueeze(torch.sin(angle), dim=1) 36 | 37 | # Bx1 arrays 38 | rx, ry, rz = torch.split(rot_dir, 1, dim=1) 39 | K = torch.zeros((batch_size, 3, 3), dtype=dtype, device=device) 40 | 41 | zeros = torch.zeros((batch_size, 1), dtype=dtype, device=device) 42 | K = torch.cat([zeros, -rz, ry, rz, zeros, -rx, -ry, rx, zeros], dim=1) \ 43 | .view((batch_size, 3, 3)) 44 | 45 | ident = torch.eye(3, dtype=dtype, device=device).unsqueeze(dim=0) 46 | rot_mat = ident + sin * K + (1 - cos) * torch.bmm(K, K) 47 | return rot_mat 48 | 49 | 50 | def batch_rot2aa(Rs: Tensor, epsilon=1e-7) -> Tensor: 51 | """ 52 | Rs is B x 3 x 3 53 | void cMathUtil::RotMatToAxisAngle(const tMatrix& mat, tVector& out_axis, 54 | double& out_theta) 55 | { 56 | double c = 0.5 * (mat(0, 0) + mat(1, 1) + mat(2, 2) - 1); 57 | c = cMathUtil::Clamp(c, -1.0, 1.0); 58 | 59 | out_theta = std::acos(c); 60 | 61 | if (std::abs(out_theta) < 0.00001) 62 | { 63 | out_axis = tVector(0, 0, 1, 0); 64 | } 65 | else 66 | { 67 | double m21 = mat(2, 1) - mat(1, 2); 68 | double m02 = mat(0, 2) - mat(2, 0); 69 | double m10 = mat(1, 0) - mat(0, 1); 70 | double denom = std::sqrt(m21 * m21 + m02 * m02 + m10 * m10); 71 | out_axis[0] = m21 / denom; 72 | out_axis[1] = m02 / denom; 73 | out_axis[2] = m10 / denom; 74 | out_axis[3] = 0; 75 | } 76 | } 77 | """ 78 | 79 | cos = 0.5 * (torch.einsum('bii->b', [Rs]) - 1) 80 | cos = torch.clamp(cos, -1 + epsilon, 1 - epsilon) 81 | 82 | theta = torch.acos(cos) 83 | 84 | m21 = Rs[:, 2, 1] - Rs[:, 1, 2] 85 | m02 = Rs[:, 0, 2] - Rs[:, 2, 0] 86 | m10 = Rs[:, 1, 0] - Rs[:, 0, 1] 87 | denom = torch.sqrt(m21 * m21 + m02 * m02 + m10 * m10 + epsilon) 88 | 89 | axis0 = torch.where(torch.abs(theta) < 0.00001, m21, m21 / denom) 90 | axis1 = torch.where(torch.abs(theta) < 0.00001, m02, m02 / denom) 91 | axis2 = torch.where(torch.abs(theta) < 0.00001, m10, m10 / denom) 92 | 93 | return theta.unsqueeze(1) * torch.stack([axis0, axis1, axis2], 1) 94 | 95 | 96 | def rot_mat_to_euler(rot_mats: Tensor) -> Tensor: 97 | # Calculates rotation matrix to euler angles 98 | # Careful for extreme cases of eular angles like [0.0, pi, 0.0] 99 | 100 | sy = torch.sqrt(rot_mats[:, 0, 0] * rot_mats[:, 0, 0] + 101 | rot_mats[:, 1, 0] * rot_mats[:, 1, 0]) 102 | return torch.atan2(-rot_mats[:, 2, 0], sy) 103 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/utils/timer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import time 12 | import numpy as np 13 | import torch 14 | 15 | from loguru import logger 16 | 17 | 18 | class Timer(object): 19 | def __init__(self, name='', sync=False, verbose=False): 20 | super(Timer, self).__init__() 21 | self.elapsed = [] 22 | self.name = name 23 | self.sync = sync 24 | self.verbose = verbose 25 | 26 | def __enter__(self): 27 | if self.sync: 28 | torch.cuda.synchronize() 29 | self.start = time.perf_counter() 30 | 31 | def print(self): 32 | logger.info(f'[{self.name}]: {np.mean(self.elapsed):.3f}') 33 | 34 | def __exit__(self, type, value, traceback): 35 | if self.sync: 36 | torch.cuda.synchronize() 37 | elapsed = time.perf_counter() - self.start 38 | self.elapsed.append(elapsed) 39 | if self.verbose: 40 | logger.info( 41 | f'[{self.name}]: {elapsed:.3f}, {np.mean(self.elapsed):.3f}') 42 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from typing import Dict 12 | import torch 13 | 14 | from .typing import Tensor 15 | 16 | 17 | def no_reduction(arg): 18 | return arg 19 | 20 | 21 | def tensor_scalar_dict_to_float(tensor_dict: Dict[str, Tensor]): 22 | return {key: val.detach() if torch.is_tensor(val) else val 23 | for key, val in tensor_dict.items()} 24 | 25 | 26 | def to_tensor(tensor, device=None, dtype=torch.float32): 27 | if isinstance(tensor, torch.Tensor): 28 | return tensor 29 | else: 30 | return torch.tensor(tensor, dtype=dtype, device=device) 31 | 32 | 33 | def get_reduction_method(reduction='mean'): 34 | if reduction == 'mean': 35 | reduction = torch.mean 36 | elif reduction == 'sum': 37 | reduction = torch.sum 38 | elif reduction == 'none': 39 | reduction = no_reduction 40 | else: 41 | raise ValueError('Unknown reduction type: {}'.format(reduction)) 42 | return reduction 43 | 44 | 45 | def tensor_to_numpy(tensor, default=None): 46 | if tensor is None: 47 | return default 48 | else: 49 | return tensor.detach().cpu().numpy() 50 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/utils/transf_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | import numpy as np 12 | 13 | import PIL.Image as pil_img 14 | import time 15 | from loguru import logger 16 | import cv2 17 | 18 | 19 | def get_transform(center, scale, res, rot=0): 20 | """ 21 | General image processing functions 22 | """ 23 | # Generate transformation matrix 24 | h = 200 * scale 25 | t = np.zeros((3, 3), dtype=np.float32) 26 | t[0, 0] = float(res[1]) / h 27 | t[1, 1] = float(res[0]) / h 28 | t[0, 2] = res[1] * (-float(center[0]) / h + .5) 29 | t[1, 2] = res[0] * (-float(center[1]) / h + .5) 30 | t[2, 2] = 1 31 | if not rot == 0: 32 | rot = -rot # To match direction of rotation from cropping 33 | rot_mat = np.zeros((3, 3), dtype=np.float32) 34 | rot_rad = rot * np.pi / 180 35 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 36 | rot_mat[0, :2] = [cs, -sn] 37 | rot_mat[1, :2] = [sn, cs] 38 | rot_mat[2, 2] = 1 39 | # Need to rotate around center 40 | t_mat = np.eye(3) 41 | t_mat[0, 2] = -res[1] / 2 42 | t_mat[1, 2] = -res[0] / 2 43 | t_inv = t_mat.copy() 44 | t_inv[:2, 2] *= -1 45 | t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t))) 46 | return t.astype(np.float32) 47 | 48 | 49 | # Consistent with https://github.com/bearpaw/pytorch-pose 50 | # and the lua version of https://github.com/anewell/pose-hg-train 51 | def transform(pt, center, scale, res, invert=0, rot=0): 52 | # Transform pixel location to different reference 53 | t = get_transform(center, scale, res, rot=rot) 54 | if invert: 55 | t = np.linalg.inv(t) 56 | new_pt = np.array([pt[0] - 1, pt[1] - 1, 1.], dtype=np.float32).T 57 | new_pt = np.dot(t, new_pt) 58 | return new_pt[:2].astype(int) + 1 59 | 60 | 61 | def crop(img, center, scale, res, rot=0, dtype=np.float32): 62 | # Upper left point 63 | ul = np.array(transform([1, 1], center, scale, res, invert=1)) - 1 64 | # Bottom right point 65 | br = np.array(transform([res[0] + 1, res[1] + 1], 66 | center, scale, res, invert=1)) - 1 67 | 68 | # size of cropped image 69 | # crop_shape = [br[1] - ul[1], br[0] - ul[0]] 70 | # Padding so that when rotated proper amount of context is included 71 | pad = int(np.linalg.norm(br - ul) / 2 - float(br[1] - ul[1]) / 2) 72 | 73 | if not rot == 0: 74 | ul -= pad 75 | br += pad 76 | 77 | new_shape = [br[1] - ul[1], br[0] - ul[0]] 78 | if len(img.shape) > 2: 79 | new_shape += [img.shape[2]] 80 | new_shape = list(map(int, new_shape)) 81 | new_img = np.zeros(new_shape, dtype=img.dtype) 82 | 83 | # Range to fill new array 84 | new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0] 85 | new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1] 86 | 87 | # Range to sample from original image 88 | old_x = max(0, ul[0]), min(len(img[0]), br[0]) 89 | old_y = max(0, ul[1]), min(len(img), br[1]) 90 | 91 | new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1], old_x[0]:old_x[1]] 92 | 93 | # pixel_scale = 1.0 if new_img.max() > 1.0 else 255 94 | # resample = pil_img.BILINEAR 95 | if not rot == 0: 96 | new_H, new_W, _ = new_img.shape 97 | 98 | rotn_center = (new_W / 2.0, new_H / 2.0) 99 | M = cv2.getRotationMatrix2D(rotn_center, rot, 1.0).astype(np.float32) 100 | 101 | new_img = cv2.warpAffine(new_img, M, tuple(new_shape[:2]), 102 | cv2.INTER_LINEAR_EXACT) 103 | new_img = new_img[pad:new_H - pad, pad:new_W - pad] 104 | 105 | output = cv2.resize(new_img, tuple(res), interpolation=cv2.INTER_LINEAR) 106 | 107 | return output.astype(np.float32) 108 | 109 | 110 | def is_invalid_image(img, center, scale, res, rot=0, dtype=np.float32): 111 | # Upper left point 112 | ul = np.array(transform([1, 1], center, scale, res, invert=1)) - 1 113 | # Bottom right point 114 | br = np.array(transform([res[0] + 1, res[1] + 1], 115 | center, scale, res, invert=1)) - 1 116 | # size of cropped image 117 | # crop_shape = [br[1] - ul[1], br[0] - ul[0]] 118 | # Padding so that when rotated proper amount of context is included 119 | pad = int(np.linalg.norm(br - ul) / 2 - float(br[1] - ul[1]) / 2) 120 | 121 | if not rot == 0: 122 | ul -= pad 123 | br += pad 124 | 125 | new_shape = [br[1] - ul[1], br[0] - ul[0]] 126 | if len(img.shape) > 2: 127 | new_shape += [img.shape[2]] 128 | new_shape = list(map(int, new_shape)) 129 | new_img = np.zeros(new_shape, dtype=img.dtype) 130 | 131 | # Range to fill new array 132 | new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0] 133 | new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1] 134 | 135 | # Range to sample from original image 136 | old_x = max(0, ul[0]), min(len(img[0]), br[0]) 137 | old_y = max(0, ul[1]), min(len(img), br[1]) 138 | 139 | try: 140 | new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1], old_x[0]:old_x[1]] 141 | except ValueError: 142 | return True 143 | 144 | return False 145 | -------------------------------------------------------------------------------- /TempCLR/TempCLR/utils/typing.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | from typing import NewType, List, Union, Tuple, Optional 12 | from dataclasses import dataclass, fields 13 | import numpy as np 14 | import torch 15 | from yacs.config import CfgNode as CN 16 | 17 | 18 | __all__ = [ 19 | 'CN', 20 | 'Tensor', 21 | 'Array', 22 | 'IntList', 23 | 'IntTuple', 24 | 'IntPair', 25 | 'FloatList', 26 | 'FloatTuple', 27 | 'StringTuple', 28 | 'StringList', 29 | 'TensorTuple', 30 | 'TensorList', 31 | 'DataLoader', 32 | 'BlendShapeDescription', 33 | 'AppearanceDescription', 34 | ] 35 | 36 | 37 | Tensor = NewType('Tensor', torch.Tensor) 38 | Array = NewType('Array', np.ndarray) 39 | IntList = NewType('IntList', List[int]) 40 | IntTuple = NewType('IntTuple', Tuple[int]) 41 | IntPair = NewType('IntPair', Tuple[int, int]) 42 | FloatList = NewType('FloatList', List[float]) 43 | FloatTuple = NewType('FloatTuple', Tuple[float]) 44 | StringTuple = NewType('StringTuple', Tuple[str]) 45 | StringList = NewType('StringList', List[str]) 46 | 47 | TensorTuple = NewType('TensorTuple', Tuple[Tensor]) 48 | TensorList = NewType('TensorList', List[Tensor]) 49 | 50 | DataLoader = torch.utils.data.DataLoader 51 | 52 | 53 | @dataclass 54 | class BlendShapeDescription: 55 | dim: int 56 | mean: Optional[Tensor] = None 57 | 58 | def keys(self): 59 | return [f.name for f in fields(self)] 60 | 61 | def __getitem__(self, key): 62 | for f in fields(self): 63 | if f.name == key: 64 | return getattr(self, key) 65 | 66 | 67 | @dataclass 68 | class AppearanceDescription: 69 | dim: int 70 | mean: Optional[Tensor] = None 71 | 72 | def keys(self): 73 | return [f.name for f in fields(self)] 74 | 75 | def __getitem__(self, key): 76 | for f in fields(self): 77 | if f.name == key: 78 | return getattr(self, key) 79 | -------------------------------------------------------------------------------- /TempCLR/configs/evaluation_freihand.yaml: -------------------------------------------------------------------------------- 1 | checkpoint_folder: 'checkpoints' 2 | output_folder: '$INFERENCE/FreiHAND_model' 3 | summary_folder: 'summary' 4 | 5 | # Set this two flags True if you want to save re-projection images. 6 | save_reproj_images: False 7 | create_image_summaries: False 8 | 9 | # Flag for using the GPU 10 | use_cuda: True 11 | float_dtype: "float32" 12 | is_training: False 13 | part_key: 'hand' 14 | 15 | datasets: 16 | hand: 17 | batch_size: 16 18 | splits: 19 | test: [ 'freihand' ] 20 | num_workers: 21 | test: 0 22 | 23 | transforms: 24 | max_size: 800 25 | 26 | freihand: 27 | data_folder: '$DATASETS/freihand/' 28 | metrics: [ 'mpjpe', 'v2v' ] 29 | file_format: 'json' 30 | return_vertices: True 31 | 32 | ho3d: 33 | data_folder: '$DATASETS/HO3D_v2/' 34 | metrics: [ 'mpjpe' ] 35 | file_format: 'pkl' 36 | 37 | 38 | network: 39 | hand_randomize_global_rot: False 40 | hand_global_rot_min: -90.0 41 | hand_global_rot_max: 90.0 42 | hand_global_rot_noise_prob: 0.3 43 | 44 | hand_add_shape_noise: False 45 | hand_shape_std: 1.0 46 | hand_shape_prob: 0.3 47 | 48 | add_hand_pose_noise: False 49 | hand_pose_std: 1.0 50 | num_hand_components: 3 51 | hand_noise_prob: 0.3 52 | 53 | type: 'MANORegressor' 54 | hand: 55 | type: 'iterative-mlp' 56 | num_stages: 3 57 | pose_last_stage: True 58 | backbone: 59 | type: 'resnet50' 60 | pretrained: True 61 | mlp: 62 | layers: [ 1024, 1024 ] 63 | dropout: 0.0 64 | activation: 65 | type: 'relu' 66 | normalization: 67 | type: 'none' 68 | camera: 69 | weak_persp: 70 | regress_translation: True 71 | regress_scale: True 72 | mean_scale: 0.9 73 | hand_model: 74 | type: 'mano' 75 | model_folder: '$MODELS/' 76 | 77 | mano: 78 | betas: 79 | num: 10 80 | extra_joint_path: 'mano_right_extra_joints.yaml' 81 | wrist_pose: 82 | type: 'cont_rot_repr' 83 | hand_pose: 84 | type: 'cont_rot_repr' 85 | -------------------------------------------------------------------------------- /TempCLR/configs/evaluation_ho3d.yaml: -------------------------------------------------------------------------------- 1 | checkpoint_folder: 'checkpoints' 2 | output_folder: '$INFERENCE/HO3D_model' 3 | summary_folder: 'summary' 4 | 5 | # Set this two flags True if you want to save re-projection images. 6 | save_reproj_images: True 7 | create_image_summaries: True 8 | 9 | # Flag for using the GPU 10 | use_cuda: True 11 | float_dtype: "float32" 12 | is_training: False 13 | part_key: 'hand' 14 | 15 | datasets: 16 | hand: 17 | batch_size: 16 18 | splits: 19 | test: [ 'ho3d' ] 20 | num_workers: 21 | test: 8 22 | 23 | transforms: 24 | max_size: 800 25 | 26 | freihand: 27 | data_folder: '$DATASETS/freihand/' 28 | metrics: [ 'mpjpe', 'v2v' ] 29 | file_format: 'json' 30 | return_vertices: True 31 | 32 | ho3d: 33 | data_folder: '$DATASETS/HO3D_v2/' 34 | metrics: [ 'mpjpe' ] 35 | file_format: 'pkl' 36 | 37 | 38 | network: 39 | hand_randomize_global_rot: False 40 | hand_global_rot_min: -90.0 41 | hand_global_rot_max: 90.0 42 | hand_global_rot_noise_prob: 0.3 43 | 44 | hand_add_shape_noise: False 45 | hand_shape_std: 1.0 46 | hand_shape_prob: 0.3 47 | 48 | add_hand_pose_noise: False 49 | hand_pose_std: 1.0 50 | num_hand_components: 3 51 | hand_noise_prob: 0.3 52 | 53 | type: 'MANORegressor' 54 | hand: 55 | type: 'iterative-mlp' 56 | num_stages: 3 57 | pose_last_stage: True 58 | backbone: 59 | type: 'resnet18' 60 | pretrained: True 61 | mlp: 62 | layers: [ 1024, 1024 ] 63 | dropout: 0.0 64 | activation: 65 | type: 'relu' 66 | normalization: 67 | type: 'none' 68 | camera: 69 | weak_persp: 70 | regress_translation: True 71 | regress_scale: True 72 | mean_scale: 0.9 73 | hand_model: 74 | type: 'mano' 75 | model_folder: '$MODELS/' 76 | 77 | mano: 78 | betas: 79 | num: 10 80 | extra_joint_path: 'mano_right_extra_joints.yaml' 81 | wrist_pose: 82 | type: 'cont_rot_repr' 83 | hand_pose: 84 | type: 'cont_rot_repr' 85 | -------------------------------------------------------------------------------- /TempCLR/main.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ETH Zurich and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | """ 9 | Our code started from the original code in https://github.com/vchoutas/expose; We have made significant modification to the original code in developing TempCLR. 10 | """ 11 | 12 | import OpenGL 13 | import os 14 | 15 | os.environ['PYOPENGL_PLATFORM'] = 'egl' 16 | if os.environ.get('TEST_NO_ACCELERATE'): 17 | OpenGL.USE_ACCELERATE = False 18 | 19 | import sys 20 | import os.path as osp 21 | from tqdm import tqdm 22 | import torch 23 | 24 | from threadpoolctl import threadpool_limits 25 | from loguru import logger 26 | 27 | from TempCLR.utils.checkpointer import Checkpointer 28 | from TempCLR.data import make_all_data_loaders 29 | from TempCLR.models.build import build_model 30 | from TempCLR.config import parse_args 31 | from TempCLR.evaluation import build as build_evaluator 32 | 33 | DEFAULT_FORMAT = ('{time:YYYY-MM-DD HH:mm:ss.SSS} |' 34 | ' {level: <8} |' 35 | ' {name}:{function}:' 36 | '{line} - {message}') 37 | 38 | DIST_FORMAT = ('{{time:YYYY-MM-DD HH:mm:ss.SSS}} |' 39 | ' {{level: <8}} |' 40 | ' Rank {rank: <3} |' 41 | ' {{name}}:{{function}}:' 42 | '{{line}} - {{message}}') 43 | 44 | 45 | def main(): 46 | exp_cfg = parse_args() 47 | 48 | device = torch.device(f'cuda') 49 | 50 | if not torch.cuda.is_available(): 51 | logger.error('CUDA is not available!') 52 | sys.exit(3) 53 | 54 | local_rank = 0 55 | distributed = False 56 | output_folder = osp.expandvars(exp_cfg.output_folder) 57 | save_images = exp_cfg.save_reproj_images 58 | logger_format = DEFAULT_FORMAT 59 | logger.remove() 60 | logger.add(lambda x: tqdm.write(x, end=''), 61 | level=exp_cfg.logger_level.upper(), 62 | format=logger_format, 63 | colorize=True) 64 | 65 | logger.info(f'Rank = {local_rank}: device = {device}') 66 | 67 | model_dict = build_model(exp_cfg) 68 | model = model_dict['network'] 69 | 70 | for name, param in model.named_parameters(): 71 | if not param.requires_grad: 72 | continue 73 | logger.opt(ansi=True).info( 74 | f'{name} :' 75 | f' {str(param.requires_grad)}' 76 | f', {str(tuple(param.shape))}') 77 | 78 | # Copy the model to the correct device 79 | model = model.to(device=device) 80 | 81 | checkpoint_folder = osp.join(output_folder, exp_cfg.checkpoint_folder) 82 | os.makedirs(checkpoint_folder, exist_ok=True) 83 | 84 | checkpointer = Checkpointer( 85 | model, save_dir=checkpoint_folder, pretrained=exp_cfg.pretrained, 86 | distributed=distributed, rank=local_rank) 87 | 88 | code_folder = osp.join(output_folder, exp_cfg.code_folder) 89 | os.makedirs(code_folder, exist_ok=True) 90 | 91 | # Set the model to evaluation mode 92 | data_loaders = make_all_data_loaders(exp_cfg, split='test') 93 | 94 | arguments = {'iteration': 0} 95 | extra_checkpoint_data = checkpointer.load_checkpoint() 96 | for key in arguments: 97 | if key in extra_checkpoint_data: 98 | arguments[key] = extra_checkpoint_data[key] 99 | 100 | model.eval() 101 | 102 | evaluator = build_evaluator( 103 | exp_cfg, rank=local_rank, distributed=distributed, save_imgs=save_images) 104 | 105 | with evaluator: 106 | evaluator.run(model, data_loaders, exp_cfg, device, 107 | step=arguments['iteration']) 108 | 109 | 110 | if __name__ == '__main__': 111 | with threadpool_limits(limits=1): 112 | main() 113 | -------------------------------------------------------------------------------- /TempCLR/mano_left_extra_joints.yaml: -------------------------------------------------------------------------------- 1 | left_index: 2 | bc: 3 | - 0.0 4 | - 1.0 5 | - 0.0 6 | face: 411 7 | left_middle: 8 | bc: 9 | - 0.0 10 | - 0.0 11 | - 1.0 12 | face: 674 13 | left_pinky: 14 | bc: 15 | - 0.0 16 | - 1.0 17 | - 0.0 18 | face: 1141 19 | left_ring: 20 | bc: 21 | - 0.0 22 | - 0.0 23 | - 1.0 24 | face: 910 25 | left_thumb: 26 | bc: 27 | - 0.0 28 | - 0.0 29 | - 1.0 30 | face: 1312 -------------------------------------------------------------------------------- /TempCLR/mano_right_extra_joints.yaml: -------------------------------------------------------------------------------- 1 | index: 2 | bc: 3 | - 0.0 4 | - 1.0 5 | - 0.0 6 | face: 411 7 | middle: 8 | bc: 9 | - 0.0 10 | - 0.0 11 | - 1.0 12 | face: 674 13 | pinky: 14 | bc: 15 | - 0.0 16 | - 1.0 17 | - 0.0 18 | face: 1141 19 | ring: 20 | bc: 21 | - 0.0 22 | - 0.0 23 | - 1.0 24 | face: 910 25 | thumb: 26 | bc: 27 | - 1.0 28 | - 0.0 29 | - 0.0 30 | face: 1277 -------------------------------------------------------------------------------- /assets/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-ait/tempclr/c62d0e17e451a952ad65e0a1321cb13a5467f6cc/assets/.DS_Store -------------------------------------------------------------------------------- /assets/digit.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-ait/tempclr/c62d0e17e451a952ad65e0a1321cb13a5467f6cc/assets/digit.gif -------------------------------------------------------------------------------- /assets/tempclr.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-ait/tempclr/c62d0e17e451a952ad65e0a1321cb13a5467f6cc/assets/tempclr.gif -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: tempclr-env 2 | channels: 3 | - defaults 4 | dependencies: 5 | - _libgcc_mutex=0.1=main 6 | - _openmp_mutex=4.5=1_gnu 7 | - ca-certificates=2021.7.5=h06a4308_1 8 | - certifi=2021.5.30=py37h06a4308_0 9 | - cudatoolkit=10.1.243=h6bb024c_0 10 | - ld_impl_linux-64=2.35.1=h7274673_9 11 | - libffi=3.3=he6710b0_2 12 | - libgcc-ng=9.3.0=h5101ec6_17 13 | - libgomp=9.3.0=h5101ec6_17 14 | - libstdcxx-ng=9.3.0=hd4cf53a_17 15 | - ncurses=6.2=he6710b0_1 16 | - openssl=1.1.1l=h7f8727e_0 17 | - pip=21.0.1=py37h06a4308_0 18 | - python=3.7.11=h12debd9_0 19 | - readline=8.1=h27cfd23_0 20 | - setuptools=58.0.4=py37h06a4308_0 21 | - sqlite=3.36.0=hc218d9a_0 22 | - tk=8.6.10=hbc83047_0 23 | - wheel=0.37.0=pyhd3eb1b0_1 24 | - xz=5.2.5=h7b6447c_0 25 | - zlib=1.2.11=h7b6447c_3 26 | - pip: 27 | - absl-py==0.14.0 28 | - adabelief-pytorch==0.2.1 29 | - addict==2.4.0 30 | - antlr4-python3-runtime==4.8 31 | - anyio==3.3.1 32 | - argcomplete==1.12.3 33 | - argon2-cffi==21.1.0 34 | - attrs==21.2.0 35 | - babel==2.9.1 36 | - backcall==0.2.0 37 | - bleach==4.1.0 38 | - blessings==1.7 39 | - cachetools==4.2.2 40 | - cffi==1.14.6 41 | - charset-normalizer==2.0.6 42 | - chumpy==0.70 43 | - colorama==0.4.4 44 | - comet-ml==3.17.0 45 | - configobj==5.0.6 46 | - cycler==0.10.0 47 | - cython==0.29.24 48 | - debugpy==1.4.3 49 | - decorator==5.1.0 50 | - defusedxml==0.7.1 51 | - deprecation==2.1.0 52 | - dulwich==0.20.25 53 | - entrypoints==0.3 54 | - everett==2.0.1 55 | - freetype-py==2.2.0 56 | - future==0.18.2 57 | - fvcore==0.1.5.post20210924 58 | - google-auth==1.35.0 59 | - google-auth-oauthlib==0.4.6 60 | - gpustat==0.6.0 61 | - grpcio==1.40.0 62 | - idna==3.2 63 | - imageio==2.9.0 64 | - importlib-metadata==4.8.1 65 | - iopath==0.1.9 66 | - ipykernel==6.4.1 67 | - ipython==7.28.0 68 | - ipython-genutils==0.2.0 69 | - ipywidgets==7.6.5 70 | - jedi==0.18.0 71 | - jinja2==3.0.1 72 | - joblib==1.0.1 73 | - jpeg4py==0.1.4 74 | - json5==0.9.6 75 | - jsonschema==3.2.0 76 | - jupyter-client==7.0.3 77 | - jupyter-core==4.8.1 78 | - jupyter-packaging==0.10.5 79 | - jupyter-server==1.11.0 80 | - jupyterlab==3.1.13 81 | - jupyterlab-pygments==0.1.2 82 | - jupyterlab-server==2.8.2 83 | - jupyterlab-widgets==1.0.2 84 | - kiwisolver==1.3.2 85 | - kornia==0.5.0 86 | - loguru==0.5.3 87 | - markdown==3.3.4 88 | - markupsafe==2.0.1 89 | - matplotlib==3.4.3 90 | - matplotlib-inline==0.1.3 91 | - mistune==0.8.4 92 | - nbclassic==0.3.2 93 | - nbclient==0.5.4 94 | - nbconvert==6.2.0 95 | - nbformat==5.1.3 96 | - nest-asyncio==1.5.1 97 | - networkx==2.6.3 98 | - notebook==6.4.4 99 | - numpy==1.21.2 100 | - nvidia-ml-py3==7.352.0 101 | - oauthlib==3.1.1 102 | - omegaconf==2.1.1 103 | - open3d==0.13.0 104 | - opencv-python==4.5.3.56 105 | - packaging==21.0 106 | - pandas==1.3.3 107 | - pandocfilters==1.5.0 108 | - parso==0.8.2 109 | - pexpect==4.8.0 110 | - pickleshare==0.7.5 111 | - pillow==8.3.2 112 | - portalocker==2.3.2 113 | - prometheus-client==0.11.0 114 | - prompt-toolkit==3.0.20 115 | - protobuf==3.18.0 116 | - psutil==5.8.0 117 | - ptyprocess==0.7.0 118 | - pyasn1==0.4.8 119 | - pyasn1-modules==0.2.8 120 | - pycocotools==2.0.2 121 | - pycparser==2.20 122 | - pyglet==1.5.21 123 | - pygments==2.10.0 124 | - pyopengl==3.1.0 125 | - pyopengl-accelerate==3.1.5 126 | - pyparsing==2.4.7 127 | - pyrender==0.1.45 128 | - pyrsistent==0.18.0 129 | - python-dateutil==2.8.2 130 | - python-fcl==0.0.12 131 | - pytorch3d==0.3.0 132 | - pytz==2021.1 133 | - pywavelets==1.1.1 134 | - pyyaml==5.4.1 135 | - pyzmq==22.3.0 136 | - requests==2.26.0 137 | - requests-oauthlib==1.3.0 138 | - requests-toolbelt==0.9.1 139 | - requests-unixsocket==0.2.0 140 | - rsa==4.7.2 141 | - scikit-image==0.18.3 142 | - scikit-learn==1.0 143 | - scipy==1.7.1 144 | - semantic-version==2.8.5 145 | - send2trash==1.8.0 146 | - six==1.16.0 147 | - smplx==0.1.28 148 | - sniffio==1.2.0 149 | - tabulate==0.8.9 150 | - tensorboard==2.6.0 151 | - tensorboard-data-server==0.6.1 152 | - tensorboard-plugin-wit==1.8.0 153 | - termcolor==1.1.0 154 | - terminado==0.12.1 155 | - testpath==0.5.0 156 | - threadpoolctl==2.2.0 157 | - tifffile==2021.8.30 158 | - tomlkit==0.7.2 159 | - torch==1.6.0 160 | - torchvision==0.7.0 161 | - tornado==6.1 162 | - tqdm==4.62.3 163 | - traitlets==5.1.0 164 | - trimesh==3.9.32 165 | - typing-extensions==3.10.0.2 166 | - urllib3==1.26.7 167 | - wcwidth==0.2.5 168 | - webencodings==0.5.1 169 | - websocket-client==1.2.1 170 | - werkzeug==2.0.1 171 | - widgetsnbextension==3.5.1 172 | - wrapt==1.12.1 173 | - wurlitzer==3.0.2 174 | - yacs==0.1.8 175 | - zipp==3.5.0 176 | -------------------------------------------------------------------------------- /mano_left_extra_joints.yaml: -------------------------------------------------------------------------------- 1 | left_index: 2 | bc: 3 | - 0.0 4 | - 1.0 5 | - 0.0 6 | face: 411 7 | left_middle: 8 | bc: 9 | - 0.0 10 | - 0.0 11 | - 1.0 12 | face: 674 13 | left_pinky: 14 | bc: 15 | - 0.0 16 | - 1.0 17 | - 0.0 18 | face: 1141 19 | left_ring: 20 | bc: 21 | - 0.0 22 | - 0.0 23 | - 1.0 24 | face: 910 25 | left_thumb: 26 | bc: 27 | - 0.0 28 | - 0.0 29 | - 1.0 30 | face: 1312 31 | -------------------------------------------------------------------------------- /mano_right_extra_joints.yaml: -------------------------------------------------------------------------------- 1 | right_index: 2 | bc: 3 | - 0.0 4 | - 1.0 5 | - 0.0 6 | face: 411 7 | right_middle: 8 | bc: 9 | - 0.0 10 | - 0.0 11 | - 1.0 12 | face: 674 13 | right_pinky: 14 | bc: 15 | - 0.0 16 | - 1.0 17 | - 0.0 18 | face: 1141 19 | right_ring: 20 | bc: 21 | - 0.0 22 | - 0.0 23 | - 1.0 24 | face: 910 25 | right_thumb: 26 | bc: 27 | - 1.0 28 | - 0.0 29 | - 0.0 30 | face: 1277 31 | --------------------------------------------------------------------------------