├── .gitignore ├── LICENSE ├── README.md ├── assets └── img │ └── overview.png ├── conf ├── democap │ └── democap_HRNET_2_views_fp │ │ ├── democap_data.yaml │ │ ├── democap_losses.yaml │ │ ├── democap_metrics.yaml │ │ ├── democap_model.yaml │ │ ├── democap_official.yaml │ │ └── democap_options.yaml └── src │ ├── data │ ├── test │ │ └── dataset │ │ │ └── human_pose │ │ │ └── H4DIR.yaml │ ├── train │ │ └── dataset │ │ │ └── human_pose │ │ │ └── H4DIR.yaml │ └── val │ │ └── dataset │ │ └── human_pose │ │ └── H4DIR.yaml │ └── model │ ├── modules │ └── models │ │ ├── cmpm.yaml │ │ ├── cpm.yaml │ │ ├── highres_standard_1.yaml │ │ ├── highres_standard_2.yaml │ │ ├── hopenet.yaml │ │ ├── hourglass_1.yaml │ │ ├── hourglass_2.yaml │ │ ├── hrnet_1.yaml │ │ ├── hrnet_2.yaml │ │ ├── hrnet_e2e_1.yaml │ │ ├── hrnet_e2e_2.yaml │ │ ├── hrnet_ps_1.yaml │ │ ├── hrnet_ps_2.yaml │ │ ├── oml_dual.yaml │ │ ├── stacked_hourglass_1.yaml │ │ ├── stacked_hourglass_2.yaml │ │ ├── stacked_hourglass_e2e_1.yaml │ │ ├── stacked_hourglass_e2e_2.yaml │ │ ├── stage_transition_standard_1.yaml │ │ ├── stage_transition_standard_2.yaml │ │ ├── start_transition_standard_1.yaml │ │ ├── start_transition_standard_2.yaml │ │ ├── top_branch_1.yaml │ │ └── top_branch_2.yaml │ ├── monads │ ├── distribution │ │ └── zmean.yaml │ └── keypoints │ │ └── fuse_coords.yaml │ └── validation │ └── metric │ └── human_pose │ ├── MAE.yaml │ └── RMSE.yaml └── src ├── data └── datasets │ └── human_pose │ └── H4DIR │ ├── h4dir.py │ └── importers │ ├── __init__.py │ ├── enums.py │ ├── image.py │ ├── loader.py │ ├── markermap.py │ └── projections.py ├── modules └── lightning │ └── models │ ├── __init__.py │ ├── cmpm.py │ ├── cpm.py │ ├── dsntnn.py │ ├── graphunet.py │ ├── hopenet.py │ ├── hrnet_e2e.py │ ├── hrnet_mod.py │ ├── hrnet_ps.py │ ├── oml_dual.py │ ├── resnet.py │ ├── stacked_hourglass.py │ └── stacked_hourglass_e2e.py ├── monads ├── __init__.py ├── distribution │ └── zmean.py └── keypoints │ └── fuse_coords.py └── validation └── metrics └── human_pose ├── mae.py └── rmse.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # other 132 | prive/ 133 | actions/ 134 | .vscode/ 135 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Attribution 4.0 International 2 | 3 | Creative Commons Corporation ("Creative Commons") is not a law firm and 4 | does not provide legal services or legal advice. Distribution of 5 | Creative Commons public licenses does not create a lawyer-client or 6 | other relationship. Creative Commons makes its licenses and related 7 | information available on an "as-is" basis. Creative Commons gives no 8 | warranties regarding its licenses, any material licensed under their 9 | terms and conditions, or any related information. Creative Commons 10 | disclaims all liability for damages resulting from their use to the 11 | fullest extent possible. 12 | 13 | Using Creative Commons Public Licenses 14 | 15 | Creative Commons public licenses provide a standard set of terms and 16 | conditions that creators and other rights holders may use to share 17 | original works of authorship and other material subject to copyright and 18 | certain other rights specified in the public license below. The 19 | following considerations are for informational purposes only, are not 20 | exhaustive, and do not form part of our licenses. 21 | 22 | Considerations for licensors: Our public licenses are intended for use 23 | by those authorized to give the public permission to use material in 24 | ways otherwise restricted by copyright and certain other rights. Our 25 | licenses are irrevocable. Licensors should read and understand the terms 26 | and conditions of the license they choose before applying it. Licensors 27 | should also secure all rights necessary before applying our licenses so 28 | that the public can reuse the material as expected. Licensors should 29 | clearly mark any material not subject to the license. This includes 30 | other CC-licensed material, or material used under an exception or 31 | limitation to copyright. More considerations for licensors : 32 | wiki.creativecommons.org/Considerations_for_licensors 33 | 34 | Considerations for the public: By using one of our public licenses, a 35 | licensor grants the public permission to use the licensed material under 36 | specified terms and conditions. If the licensor's permission is not 37 | necessary for any reason–for example, because of any applicable 38 | exception or limitation to copyright–then that use is not regulated by 39 | the license. Our licenses grant only permissions under copyright and 40 | certain other rights that a licensor has authority to grant. Use of the 41 | licensed material may still be restricted for other reasons, including 42 | because others have copyright or other rights in the material. A 43 | licensor may make special requests, such as asking that all changes be 44 | marked or described. Although not required by our licenses, you are 45 | encouraged to respect those requests where reasonable. More 46 | considerations for the public : 47 | wiki.creativecommons.org/Considerations_for_licensees 48 | 49 | Creative Commons Attribution 4.0 International Public License 50 | 51 | By exercising the Licensed Rights (defined below), You accept and agree 52 | to be bound by the terms and conditions of this Creative Commons 53 | Attribution 4.0 International Public License ("Public License"). To the 54 | extent this Public License may be interpreted as a contract, You are 55 | granted the Licensed Rights in consideration of Your acceptance of these 56 | terms and conditions, and the Licensor grants You such rights in 57 | consideration of benefits the Licensor receives from making the Licensed 58 | Material available under these terms and conditions. 59 | 60 | Section 1 – Definitions. 61 | 62 | - a. Adapted Material means material subject to Copyright and Similar 63 | Rights that is derived from or based upon the Licensed Material and 64 | in which the Licensed Material is translated, altered, arranged, 65 | transformed, or otherwise modified in a manner requiring permission 66 | under the Copyright and Similar Rights held by the Licensor. For 67 | purposes of this Public License, where the Licensed Material is a 68 | musical work, performance, or sound recording, Adapted Material is 69 | always produced where the Licensed Material is synched in timed 70 | relation with a moving image. 71 | - b. Adapter's License means the license You apply to Your Copyright 72 | and Similar Rights in Your contributions to Adapted Material in 73 | accordance with the terms and conditions of this Public License. 74 | - c. Copyright and Similar Rights means copyright and/or similar 75 | rights closely related to copyright including, without limitation, 76 | performance, broadcast, sound recording, and Sui Generis Database 77 | Rights, without regard to how the rights are labeled or categorized. 78 | For purposes of this Public License, the rights specified in Section 79 | 2(b)(1)-(2) are not Copyright and Similar Rights. 80 | - d. Effective Technological Measures means those measures that, in 81 | the absence of proper authority, may not be circumvented under laws 82 | fulfilling obligations under Article 11 of the WIPO Copyright Treaty 83 | adopted on December 20, 1996, and/or similar international 84 | agreements. 85 | - e. Exceptions and Limitations means fair use, fair dealing, and/or 86 | any other exception or limitation to Copyright and Similar Rights 87 | that applies to Your use of the Licensed Material. 88 | - f. Licensed Material means the artistic or literary work, database, 89 | or other material to which the Licensor applied this Public License. 90 | - g. Licensed Rights means the rights granted to You subject to the 91 | terms and conditions of this Public License, which are limited to 92 | all Copyright and Similar Rights that apply to Your use of the 93 | Licensed Material and that the Licensor has authority to license. 94 | - h. Licensor means the individual(s) or entity(ies) granting rights 95 | under this Public License. 96 | - i. Share means to provide material to the public by any means or 97 | process that requires permission under the Licensed Rights, such as 98 | reproduction, public display, public performance, distribution, 99 | dissemination, communication, or importation, and to make material 100 | available to the public including in ways that members of the public 101 | may access the material from a place and at a time individually 102 | chosen by them. 103 | - j. Sui Generis Database Rights means rights other than copyright 104 | resulting from Directive 96/9/EC of the European Parliament and of 105 | the Council of 11 March 1996 on the legal protection of databases, 106 | as amended and/or succeeded, as well as other essentially equivalent 107 | rights anywhere in the world. 108 | - k. You means the individual or entity exercising the Licensed Rights 109 | under this Public License. Your has a corresponding meaning. 110 | 111 | Section 2 – Scope. 112 | 113 | - a. License grant. 114 | - 1. Subject to the terms and conditions of this Public License, 115 | the Licensor hereby grants You a worldwide, royalty-free, 116 | non-sublicensable, non-exclusive, irrevocable license to 117 | exercise the Licensed Rights in the Licensed Material to: 118 | - A. reproduce and Share the Licensed Material, in whole or in 119 | part; and 120 | - B. produce, reproduce, and Share Adapted Material. 121 | - 2. Exceptions and Limitations. For the avoidance of doubt, where 122 | Exceptions and Limitations apply to Your use, this Public 123 | License does not apply, and You do not need to comply with its 124 | terms and conditions. 125 | - 3. Term. The term of this Public License is specified in Section 126 | 6(a). 127 | - 4. Media and formats; technical modifications allowed. The 128 | Licensor authorizes You to exercise the Licensed Rights in all 129 | media and formats whether now known or hereafter created, and to 130 | make technical modifications necessary to do so. The Licensor 131 | waives and/or agrees not to assert any right or authority to 132 | forbid You from making technical modifications necessary to 133 | exercise the Licensed Rights, including technical modifications 134 | necessary to circumvent Effective Technological Measures. For 135 | purposes of this Public License, simply making modifications 136 | authorized by this Section 2(a)(4) never produces Adapted 137 | Material. 138 | - 5. Downstream recipients. 139 | - A. Offer from the Licensor – Licensed Material. Every 140 | recipient of the Licensed Material automatically receives an 141 | offer from the Licensor to exercise the Licensed Rights 142 | under the terms and conditions of this Public License. 143 | - B. No downstream restrictions. You may not offer or impose 144 | any additional or different terms or conditions on, or apply 145 | any Effective Technological Measures to, the Licensed 146 | Material if doing so restricts exercise of the Licensed 147 | Rights by any recipient of the Licensed Material. 148 | - 6. No endorsement. Nothing in this Public License constitutes or 149 | may be construed as permission to assert or imply that You are, 150 | or that Your use of the Licensed Material is, connected with, or 151 | sponsored, endorsed, or granted official status by, the Licensor 152 | or others designated to receive attribution as provided in 153 | Section 3(a)(1)(A)(i). 154 | - b. Other rights. 155 | - 1. Moral rights, such as the right of integrity, are not 156 | licensed under this Public License, nor are publicity, privacy, 157 | and/or other similar personality rights; however, to the extent 158 | possible, the Licensor waives and/or agrees not to assert any 159 | such rights held by the Licensor to the limited extent necessary 160 | to allow You to exercise the Licensed Rights, but not otherwise. 161 | - 2. Patent and trademark rights are not licensed under this 162 | Public License. 163 | - 3. To the extent possible, the Licensor waives any right to 164 | collect royalties from You for the exercise of the Licensed 165 | Rights, whether directly or through a collecting society under 166 | any voluntary or waivable statutory or compulsory licensing 167 | scheme. In all other cases the Licensor expressly reserves any 168 | right to collect such royalties. 169 | 170 | Section 3 – License Conditions. 171 | 172 | Your exercise of the Licensed Rights is expressly made subject to the 173 | following conditions. 174 | 175 | - a. Attribution. 176 | - 1. If You Share the Licensed Material (including in modified 177 | form), You must: 178 | - A. retain the following if it is supplied by the Licensor 179 | with the Licensed Material: 180 | - i. identification of the creator(s) of the Licensed 181 | Material and any others designated to receive 182 | attribution, in any reasonable manner requested by the 183 | Licensor (including by pseudonym if designated); 184 | - ii. a copyright notice; 185 | - iii. a notice that refers to this Public License; 186 | - iv. a notice that refers to the disclaimer of 187 | warranties; 188 | - v. a URI or hyperlink to the Licensed Material to the 189 | extent reasonably practicable; 190 | - B. indicate if You modified the Licensed Material and retain 191 | an indication of any previous modifications; and 192 | - C. indicate the Licensed Material is licensed under this 193 | Public License, and include the text of, or the URI or 194 | hyperlink to, this Public License. 195 | - 2. You may satisfy the conditions in Section 3(a)(1) in any 196 | reasonable manner based on the medium, means, and context in 197 | which You Share the Licensed Material. For example, it may be 198 | reasonable to satisfy the conditions by providing a URI or 199 | hyperlink to a resource that includes the required information. 200 | - 3. If requested by the Licensor, You must remove any of the 201 | information required by Section 3(a)(1)(A) to the extent 202 | reasonably practicable. 203 | - 4. If You Share Adapted Material You produce, the Adapter's 204 | License You apply must not prevent recipients of the Adapted 205 | Material from complying with this Public License. 206 | 207 | Section 4 – Sui Generis Database Rights. 208 | 209 | Where the Licensed Rights include Sui Generis Database Rights that apply 210 | to Your use of the Licensed Material: 211 | 212 | - a. for the avoidance of doubt, Section 2(a)(1) grants You the right 213 | to extract, reuse, reproduce, and Share all or a substantial portion 214 | of the contents of the database; 215 | - b. if You include all or a substantial portion of the database 216 | contents in a database in which You have Sui Generis Database 217 | Rights, then the database in which You have Sui Generis Database 218 | Rights (but not its individual contents) is Adapted Material; and 219 | - c. You must comply with the conditions in Section 3(a) if You Share 220 | all or a substantial portion of the contents of the database. 221 | 222 | For the avoidance of doubt, this Section 4 supplements and does not 223 | replace Your obligations under this Public License where the Licensed 224 | Rights include other Copyright and Similar Rights. 225 | 226 | Section 5 – Disclaimer of Warranties and Limitation of Liability. 227 | 228 | - a. Unless otherwise separately undertaken by the Licensor, to the 229 | extent possible, the Licensor offers the Licensed Material as-is and 230 | as-available, and makes no representations or warranties of any kind 231 | concerning the Licensed Material, whether express, implied, 232 | statutory, or other. This includes, without limitation, warranties 233 | of title, merchantability, fitness for a particular purpose, 234 | non-infringement, absence of latent or other defects, accuracy, or 235 | the presence or absence of errors, whether or not known or 236 | discoverable. Where disclaimers of warranties are not allowed in 237 | full or in part, this disclaimer may not apply to You. 238 | - b. To the extent possible, in no event will the Licensor be liable 239 | to You on any legal theory (including, without limitation, 240 | negligence) or otherwise for any direct, special, indirect, 241 | incidental, consequential, punitive, exemplary, or other losses, 242 | costs, expenses, or damages arising out of this Public License or 243 | use of the Licensed Material, even if the Licensor has been advised 244 | of the possibility of such losses, costs, expenses, or damages. 245 | Where a limitation of liability is not allowed in full or in part, 246 | this limitation may not apply to You. 247 | - c. The disclaimer of warranties and limitation of liability provided 248 | above shall be interpreted in a manner that, to the extent possible, 249 | most closely approximates an absolute disclaimer and waiver of all 250 | liability. 251 | 252 | Section 6 – Term and Termination. 253 | 254 | - a. This Public License applies for the term of the Copyright and 255 | Similar Rights licensed here. However, if You fail to comply with 256 | this Public License, then Your rights under this Public License 257 | terminate automatically. 258 | - b. Where Your right to use the Licensed Material has terminated 259 | under Section 6(a), it reinstates: 260 | - 1. automatically as of the date the violation is cured, provided 261 | it is cured within 30 days of Your discovery of the violation; 262 | or 263 | - 2. upon express reinstatement by the Licensor. 264 | - c. For the avoidance of doubt, this Section 6(b) does not affect any 265 | right the Licensor may have to seek remedies for Your violations of 266 | this Public License. 267 | - d. For the avoidance of doubt, the Licensor may also offer the 268 | Licensed Material under separate terms or conditions or stop 269 | distributing the Licensed Material at any time; however, doing so 270 | will not terminate this Public License. 271 | - e. Sections 1, 5, 6, 7, and 8 survive termination of this Public 272 | License. 273 | 274 | Section 7 – Other Terms and Conditions. 275 | 276 | - a. The Licensor shall not be bound by any additional or different 277 | terms or conditions communicated by You unless expressly agreed. 278 | - b. Any arrangements, understandings, or agreements regarding the 279 | Licensed Material not stated herein are separate from and 280 | independent of the terms and conditions of this Public License. 281 | 282 | Section 8 – Interpretation. 283 | 284 | - a. For the avoidance of doubt, this Public License does not, and 285 | shall not be interpreted to, reduce, limit, restrict, or impose 286 | conditions on any use of the Licensed Material that could lawfully 287 | be made without permission under this Public License. 288 | - b. To the extent possible, if any provision of this Public License 289 | is deemed unenforceable, it shall be automatically reformed to the 290 | minimum extent necessary to make it enforceable. If the provision 291 | cannot be reformed, it shall be severed from this Public License 292 | without affecting the enforceability of the remaining terms and 293 | conditions. 294 | - c. No term or condition of this Public License will be waived and no 295 | failure to comply consented to unless expressly agreed to by the 296 | Licensor. 297 | - d. Nothing in this Public License constitutes or may be interpreted 298 | as a limitation upon, or waiver of, any privileges and immunities 299 | that apply to the Licensor or You, including from the legal 300 | processes of any jurisdiction or authority. 301 | 302 | Creative Commons is not a party to its public licenses. Notwithstanding, 303 | Creative Commons may elect to apply one of its public licenses to 304 | material it publishes and in those instances will be considered the 305 | "Licensor." The text of the Creative Commons public licenses is 306 | dedicated to the public domain under the CC0 Public Domain Dedication. 307 | Except for the limited purpose of indicating that material is shared 308 | under a Creative Commons public license or as otherwise permitted by the 309 | Creative Commons policies published at creativecommons.org/policies, 310 | Creative Commons does not authorize the use of the trademark "Creative 311 | Commons" or any other trademark or logo of Creative Commons without its 312 | prior written consent including, without limitation, in connection with 313 | any unauthorized modifications to any of its public licenses or any 314 | other arrangements, understandings, or agreements concerning use of 315 | licensed material. For the avoidance of doubt, this paragraph does not 316 | form part of the public licenses. 317 | 318 | Creative Commons may be contacted at creativecommons.org. 319 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeMoCap: Low-cost Marker-based Motion Capture 2 | Official implementation of "DeMoCap: Low-cost Marker-based Motion Capture" method. 3 | 4 | [![](https://img.shields.io/badge/PDF-DeMoCap-red)](https://rdcu.be/czAXF) 5 | 6 | [![](https://img.shields.io/badge/IJCV-DeMoCap-blueviolet)](https://link.springer.com/article/10.1007/s11263-021-01526-z) 7 | [![made-with-python](https://img.shields.io/badge/Made%20with-Python-1f425f.svg)](https://www.python.org/) 8 | [![Maintaner](https://img.shields.io/badge/maintainer-Anargyros_Chatzitofis-blue)](http://tofis.github.io) 9 | 10 | ![DeMoCap Concept](./assets/img/overview.png) 11 | ### Paper Abstract 12 | Optical marker-based motion capture (MoCap) remains the predominant way to acquire high-fidelity articulated body motions. We introduce DeMoCap, the first data-driven approach for end-to-end marker-based MoCap, using only a sparse setup of spatio-temporally aligned, consumer-grade infrared-depth cameras. Trading off some of their typical features, our approach is the sole robust option for far lower-cost marker-based MoCap than high-end solutions. We introduce an end-to-end differentiable markers-to-pose model to solve a set of challenges such as under-constrained position estimates, noisy input data and spatial configuration invariance. We simultaneously handle depth and marker detection noise, label and localize the markers, and estimate the 3D pose by introducing a novel spatial 3D coordinate regression technique under a multi-view rendering and supervision concept. DeMoCap is driven by a special dataset captured with 4 spatio-temporally aligned low-cost Intel RealSense D415 sensors and a 24 MXT40S camera professional MoCap system, used as input and ground truth, respectively. 13 | 14 | 15 | ## Requirements 16 | This code has been tested with: 17 | - [PyTorch 1.10.2](https://pytorch.org/get-started/previous-versions/) 18 | - [Python 3.8.11](https://www.python.org/downloads/release/python-3811/) 19 | - [CUDA 11.3](https://developer.nvidia.com/cuda-11-3-1-download-archive) 20 | 21 | Besides PyTorch, the following Python packages are needed: 22 | - [moai](https://pypi.org/project/moai-mdk/) 23 | - [opencv-python](https://pypi.org/project/opencv-python/) 24 | - [numpy](https://numpy.org/) 25 | - [matplotlib](https://matplotlib.org/) 26 | - [torchvision](https://pypi.org/project/torchvision/) 27 | - [visdom](https://github.com/facebookresearch/visdom) 28 | 29 | ## Dataset 30 | 31 | You can download [here](https://drive.google.com/file/d/1R0nqyBaKPp5wfJ0LH4hekUNrq4e3kodt/view?usp=sharing) the data used for training, validation and testing of DeMoCap. 32 | For accessing the data, a google drive request with a message confirming the consent with the data license (CC-BY-4.0) is mandatory. You will receive the decryption pass via email after that. 33 | 34 | ## Installation 35 | 36 | The code is powered by [moai-mdk](https://pypi.org/project/moai-mdk/) framework enabling the building of the model on a configuration-based implementation along with custom extra modules. 37 | For further details with respect to the framework and the way it should be installed for running the code, please visit the [documentation](https://moai.readthedocs.io/en/latest/) of the framework. 38 | 39 | After the succesfull installation of the requirements, from the root folder of the project, run the code below: 40 | 41 | ``` 42 | moai train democap\democap_HRNET_2_views_fp\democap_official.yaml --config-dir conf H4DIR_train_split={your_train_folder} H4DIR_test_split={your_test_folder} H4DIR_val_split={your_val_folder} +project_path=[{project_path_root}] 43 | ``` 44 | ------ 45 | 46 | If you used the method or found this work useful, please cite: 47 | ``` 48 | @article{chatzitofis2021democap, 49 | title={DeMoCap: Low-Cost Marker-Based Motion Capture}, 50 | author={Chatzitofis, Anargyros and Zarpalas, Dimitrios and Daras, Petros and Kollias, Stefanos}, 51 | journal={International Journal of Computer Vision}, 52 | volume={129}, 53 | number={12}, 54 | pages={3338--3366}, 55 | year={2021}, 56 | publisher={Springer} 57 | } 58 | ``` 59 | -------------------------------------------------------------------------------- /assets/img/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tofis/democap/bc7f8cd27163085f78e164ac464df2336f0c6ad9/assets/img/overview.png -------------------------------------------------------------------------------- /conf/democap/democap_HRNET_2_views_fp/democap_data.yaml: -------------------------------------------------------------------------------- 1 | H4DIR_train_split: ??? 2 | H4DIR_test_split: ??? 3 | H4DIR_val_split: ??? 4 | 5 | data: 6 | train: 7 | iterator: 8 | datasets: 9 | H4DIR: 10 | markers_out: 53 11 | joints_out: 19 12 | views: ["f","b", "3d"] 13 | resolution: 160 14 | augment: true 15 | rs: true 16 | test: 17 | iterator: 18 | datasets: 19 | H4DIR: 20 | markers_out: 53 21 | joints_out: 19 22 | views: ["f","b", "3d"] 23 | resolution: 160 24 | augment: false 25 | rs: true 26 | val: 27 | iterator: 28 | datasets: 29 | H4DIR: 30 | markers_out: 53 31 | joints_out: 19 32 | views: ["f","b","3d"] 33 | resolution: 160 34 | augment: true 35 | rs: true -------------------------------------------------------------------------------- /conf/democap/democap_HRNET_2_views_fp/democap_losses.yaml: -------------------------------------------------------------------------------- 1 | all_heatmaps_gt: [f_hms_m_gt, b_hms_m_gt, f_hms_j_gt, b_hms_j_gt] 2 | all_heatmaps_pred: [f_hm_markers, b_hm_markers, f_hm_joints, b_hm_joints] 3 | all_kpts_gt: [f_gt_markers_3d, f_gt_joints_3d] 4 | all_kpts: [pred_markers_3d, pred_joints_3d] 5 | 6 | model: 7 | supervision: 8 | losses: 9 | wing: 10 | omega: 10.0 11 | epsilon: 2.0 12 | wing: 13 | gt: ${all_kpts_gt} 14 | pred: ${all_kpts} 15 | weight: [1, 1] 16 | JS: 17 | gt: ${all_heatmaps_gt} 18 | pred: ${all_heatmaps_pred} 19 | weight: [400, 400, 400, 400] -------------------------------------------------------------------------------- /conf/democap/democap_HRNET_2_views_fp/democap_metrics.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | model: 4 | validation: 5 | human_pose_rmse: 6 | gt: [f_gt_markers_3d_original, f_gt_joints_3d_original] 7 | pred: [pred_markers_3d_original, pred_joints_3d_original] 8 | out: [rmse_m, rmse_j] 9 | human_pose_mae: 10 | gt: [f_gt_markers_3d_original, f_gt_joints_3d_original] 11 | pred: [pred_markers_3d_original, pred_joints_3d_original] 12 | out: [mae_m, mae_j] 13 | 14 | indicators: 15 | rmse_X_mae: '[rmse_m] * [rmse_j] * [mae_m] * [mae_j] / 100000000.0' -------------------------------------------------------------------------------- /conf/democap/democap_HRNET_2_views_fp/democap_model.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | conv_type: conv2d 4 | downscale_type: maxpool2d 5 | model: 6 | configuration: 7 | branch1: 8 | in_features: 1 9 | data: [f_depth, b_depth] 10 | out: 11 | - [f_hm_markers, f_features] 12 | - [b_hm_markers, b_features] 13 | branch2: 14 | in_features: 309 15 | data: [f_features, b_features] 16 | out: 17 | - [f_hm_joints, ""] 18 | - [b_hm_joints, ""] 19 | 20 | modules: 21 | branch1: 22 | modules: 23 | highres_standard_1: 24 | residual: 25 | type: bottleneck 26 | bottleneck_features: 128 27 | convolution: conv2d 28 | activation: relu 29 | fuse: 30 | convolution: conv2d 31 | activation: 32 | intermediate: bn2d_relu 33 | prefusion: bn2d 34 | final: relu 35 | upscale: 36 | type: upsample2d 37 | conv_up: true 38 | start_transition_standard_1: 39 | identity: 40 | convolution: conv2d 41 | kernel_size: 3 42 | stride: 1 43 | padding: 1 44 | branched: 45 | convolution: conv2d 46 | downscale: none 47 | kernel_size: 3 48 | stride: 2 49 | padding: 1 50 | stage_transition_standard_1: 51 | branched: 52 | convolution: conv2d 53 | downscale: none 54 | kernel_size: 3 55 | stride: 2 56 | padding: 1 57 | top_branch_1: 58 | convolution: conv2d 59 | activation: relu 60 | kernel_size: 1 61 | padding: 0 62 | inplace: false 63 | branch2: 64 | modules: 65 | highres_standard_2: 66 | residual: 67 | type: bottleneck 68 | bottleneck_features: 128 69 | convolution: conv2d 70 | activation: relu 71 | fuse: 72 | convolution: conv2d 73 | activation: 74 | intermediate: bn2d_relu 75 | prefusion: bn2d 76 | final: relu 77 | upscale: 78 | type: upsample2d 79 | conv_up: true 80 | start_transition_standard_2: 81 | identity: 82 | convolution: conv2d 83 | kernel_size: 3 84 | stride: 1 85 | padding: 1 86 | branched: 87 | convolution: conv2d 88 | downscale: none 89 | kernel_size: 3 90 | stride: 2 91 | padding: 1 92 | stage_transition_standard_2: 93 | branched: 94 | convolution: conv2d 95 | downscale: none 96 | kernel_size: 3 97 | stride: 2 98 | padding: 1 99 | top_branch_2: 100 | convolution: conv2d 101 | activation: relu 102 | kernel_size: 1 103 | padding: 0 104 | inplace: false 105 | 106 | parameters: 107 | optimization: 108 | optimizers: 109 | adam: 110 | lr: 3.0e-4 111 | schedule: 112 | schedulers: 113 | step: 114 | step_size: 4 115 | gamma: 0.95 116 | 117 | monads: 118 | grid: 119 | width: 40 # x 120 | height: 40 # y 121 | inclusive: true 122 | order: xy 123 | mode: norm 124 | 125 | isotropic_gaussian: 126 | std: 3.0 127 | normalize: true 128 | scale: false 129 | grid_type: norm 130 | 131 | center_of_mass: 132 | flip: false 133 | 134 | concat: 135 | dim: 2 136 | 137 | transform: 138 | xyz_in_at: channel 139 | xyz_out_at: channel 140 | transpose: false 141 | 142 | feedforward: 143 | preprocess: 144 | grid: 145 | tensor: [f_depth] 146 | out: [grid] 147 | 148 | isotropic_gaussian: 149 | keypoints: [f_gt_markers_2d, b_gt_markers_2d, f_gt_joints_2d, b_gt_joints_2d] 150 | grid: [grid, grid, grid, grid] 151 | out: [f_hms_m_gt, b_hms_m_gt, f_hms_j_gt, b_hms_j_gt] 152 | 153 | postprocess: 154 | zmean: 155 | heatmaps: [f_hm_markers, b_hm_markers, f_hm_joints, b_hm_joints] 156 | out: [f_m_z_coords, b_m_z_coords, f_j_z_coords, b_j_z_coords] 157 | 158 | spatial_softmax: 159 | tensor: [f_hm_markers, b_hm_markers, f_hm_joints, b_hm_joints] 160 | out: [f_hm_markers, b_hm_markers, f_hm_joints, b_hm_joints] 161 | 162 | center_of_mass: 163 | grid: [grid, grid, grid, grid] 164 | heatmaps: [f_hm_markers, b_hm_markers, f_hm_joints, b_hm_joints] 165 | out: [f_pred_markers_2d, b_pred_markers_2d, f_pred_joints_2d, b_pred_joints_2d] 166 | 167 | concat: 168 | tensors: 169 | - [f_pred_markers_2d, f_m_z_coords] 170 | - [b_pred_markers_2d, b_m_z_coords] 171 | - [f_pred_joints_2d, f_j_z_coords] 172 | - [b_pred_joints_2d, b_j_z_coords] 173 | out: [f_pred_markers_3d, b_pred_markers_3d, f_pred_joints_3d, b_pred_joints_3d] 174 | 175 | fuse_coords: 176 | coords: 177 | - [f_pred_markers_3d, b_pred_markers_3d] 178 | - [f_pred_joints_3d, b_pred_joints_3d] 179 | out: [pred_markers_3d, pred_joints_3d] 180 | 181 | scale_coords: 182 | coords: [f_gt_markers_3d, f_gt_joints_3d, pred_markers_3d, pred_joints_3d] 183 | scales: [f_scale, f_scale, f_scale, f_scale] 184 | out: [f_gt_markers_3d_original, f_gt_joints_3d_original, pred_markers_3d_original, pred_joints_3d_original] -------------------------------------------------------------------------------- /conf/democap/democap_HRNET_2_views_fp/democap_official.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | ###### HYDRA 3 | - hydra/job_logging: colorlog 4 | - hydra/hydra_logging: colorlog 5 | 6 | ###### ENGINE 7 | - engine: engine 8 | - engine/train: lightning 9 | - engine/modules: manual_seed 10 | - engine/modules: import 11 | 12 | - engine/visualization: collection 13 | - engine/visualization/visdom: image2d 14 | # - engine/visualization/visdom: feature2d 15 | - engine/visualization/visdom: pose2d 16 | 17 | - engine/log/lightning: collection 18 | - engine/log/lightning/logger: visdom 19 | - engine/log/lightning/logger: tabular 20 | 21 | - engine/checkpoint/lightning: default 22 | 23 | ###### DATA 24 | - data/train/loader: torch 25 | - data/train/iterator: indexed 26 | - data/train/augmentation: none 27 | - src/data/train/dataset/human_pose: H4DIR 28 | 29 | - data/test/loader: torch 30 | - data/test/iterator: indexed 31 | - data/test/augmentation: none 32 | - src/data/test/dataset/human_pose: H4DIR 33 | 34 | - data/val/loader: torch 35 | - data/val/iterator: indexed 36 | - data/val/augmentation: none 37 | - src/data/val/dataset/human_pose: H4DIR 38 | 39 | # ###### MODEL 40 | - model/networks/lightning/factory: cascade 41 | - src/model/modules/models: hrnet_1 42 | - src/model/modules/models: hrnet_2 43 | - src/model/modules/models: highres_standard_1 44 | - src/model/modules/models: highres_standard_2 45 | - src/model/modules/models: top_branch_1 46 | - src/model/modules/models: top_branch_2 47 | - src/model/modules/models: start_transition_standard_1 48 | - src/model/modules/models: stage_transition_standard_1 49 | - src/model/modules/models: start_transition_standard_2 50 | - src/model/modules/models: stage_transition_standard_2 51 | 52 | - model/feedforward: preprocess 53 | - model/monads/generation: grid 54 | - model/monads/distribution/reconstruction: isotropic_gaussian 55 | 56 | - model/feedforward: postprocess 57 | - src/model/monads/distribution: zmean 58 | - model/monads/distribution/prior: spatial_softmax 59 | - model/monads/distribution: center_of_mass 60 | - model/monads/tensor: concat 61 | - src/model/monads/keypoints: fuse_coords 62 | - model/monads/keypoints: scale_coords 63 | 64 | - model/parameters/initialization: default 65 | - model/parameters/optimization: single 66 | - model/parameters/optimization/optimizer: adam 67 | - model/parameters/optimization/scheduling: single 68 | - model/parameters/optimization/scheduling/scheduler: step 69 | - model/parameters/regularization: none 70 | 71 | - model/supervision: weighted 72 | - model/supervision/loss/regression/robust: wing 73 | - model/supervision/loss/distribution: JS 74 | 75 | - model/validation: indicators 76 | - src/model/validation/metric/human_pose: rmse 77 | - src/model/validation/metric/human_pose: mae 78 | 79 | - democap/democap_HRNET_2_views_fp/democap_options 80 | - democap/democap_HRNET_2_views_fp/democap_data 81 | - democap/democap_HRNET_2_views_fp/democap_model 82 | - democap/democap_HRNET_2_views_fp/democap_losses 83 | - democap/democap_HRNET_2_views_fp/democap_metrics -------------------------------------------------------------------------------- /conf/democap/democap_HRNET_2_views_fp/democap_options.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | experiment: 4 | visdom_ip: localhost 5 | name: democap_hrnet_2_views 6 | batch_size: 16 7 | workers: 4 8 | 9 | engine: 10 | modules: 11 | import: 12 | run_path: false 13 | other_paths: ${project_path} 14 | manual_seed: 15 | seed: 1337 16 | deterministic: true 17 | 18 | trainer: 19 | gpus: [1] 20 | check_val_every_n_epoch: 1 21 | accumulate_grad_batches: 1 22 | max_epochs: 200 23 | deterministic: false 24 | checkpoint: 25 | monitor: rmse_X_mae 26 | mode: min 27 | filename: '{epoch}_{rmse_X_mae:.2f}' 28 | 29 | logging: 30 | name: ${experiment.name} 31 | loggers: 32 | visdom: 33 | name: ${experiment.name} 34 | ip: ${experiment.visdom_ip} 35 | tabular: 36 | name: ${experiment.name} 37 | 38 | visualization: 39 | batch_interval: 100 40 | visualizers: 41 | # feature2d: 42 | # name: ${experiment.name} 43 | # ip: ${experiment.visdom_ip} 44 | # image: [b_hms_j_gt, b_hm_joints] 45 | # type: [color, color] 46 | # colormap: [turbo, turbo] 47 | # transform: [minmax, minmax] 48 | 49 | image2d: 50 | name: ${experiment.name} 51 | ip: ${experiment.visdom_ip} 52 | image: [b_depth] 53 | type: [color] 54 | colormap: [turbo] 55 | transform: [minmax] 56 | 57 | pose2d: 58 | name: ${experiment.name} 59 | ip: ${experiment.visdom_ip} 60 | images: [f_depth] 61 | poses: [human_pose2d] 62 | gt: [f_gt_joints_2d] 63 | pred: [f_pred_joints_2d] 64 | pose_structure: 65 | - [0, 1, 2, 3, 4] 66 | - [5, 6, 7] 67 | - [8, 9, 10] 68 | - [11, 12, 13, 14] 69 | - [15, 16, 17, 18] 70 | coords: [norm] 71 | color_gt: [cyan] 72 | color_pred: [red] 73 | reverse_coords: true 74 | rotate_image: false 75 | use_mask: false -------------------------------------------------------------------------------- /conf/src/data/test/dataset/human_pose/H4DIR.yaml: -------------------------------------------------------------------------------- 1 | # @package data.test.iterator.datasets._name_ 2 | 3 | _target_: src.data.datasets.human_pose.H4DIR.h4dir.H4DIR 4 | root_path: ${H4DIR_test_split} 5 | markers_out: 53 6 | joints_out: 19 7 | views: ["f","b","3d"] 8 | augment: False 9 | rs: True 10 | -------------------------------------------------------------------------------- /conf/src/data/train/dataset/human_pose/H4DIR.yaml: -------------------------------------------------------------------------------- 1 | # @package data.train.iterator.datasets._name_ 2 | 3 | _target_: src.data.datasets.human_pose.H4DIR.h4dir.H4DIR 4 | root_path: ${H4DIR_train_split} 5 | markers_out: 53 6 | joints_out: 19 7 | resolution: 160 8 | views: ["f","b","3d"] 9 | augment: True 10 | rs: True 11 | -------------------------------------------------------------------------------- /conf/src/data/val/dataset/human_pose/H4DIR.yaml: -------------------------------------------------------------------------------- 1 | # @package data.val.iterator.datasets._name_ 2 | 3 | _target_: src.data.datasets.human_pose.H4DIR.h4dir.H4DIR 4 | root_path: ${H4DIR_val_split} 5 | markers_out: 53 6 | joints_out: 19 7 | resolution: 160 8 | views: ["f","b","3d"] 9 | augment: False 10 | rs: True 11 | -------------------------------------------------------------------------------- /conf/src/model/modules/models/cmpm.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch1 2 | 3 | _target_: moai.modules.lightning.models.CMPM 4 | # configuration: 5 | # in_features: 1 6 | # out_features: 53 7 | # output: _markers_hms_pred -------------------------------------------------------------------------------- /conf/src/model/modules/models/cpm.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch1 2 | 3 | _target_: moai.modules.lightning.models.CPM 4 | num_markers: 53 5 | num_joints: 19 -------------------------------------------------------------------------------- /conf/src/model/modules/models/highres_standard_1.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch1.modules.highres_standard_1 2 | 3 | _target_: moai.modules.lightning.highres.HighResolution 4 | residual: 5 | type: preactiv_bottleneck 6 | bottleneck_features: 128 7 | convolution: conv2d 8 | activation: relu 9 | fuse: 10 | convolution: conv2d 11 | activation: 12 | intermediate: relu_bn2d 13 | prefusion: bn2d 14 | final: relu 15 | upscale: 16 | type: upsample2d 17 | conv_up: true -------------------------------------------------------------------------------- /conf/src/model/modules/models/highres_standard_2.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch2.modules.highres_standard_2 2 | 3 | _target_: moai.modules.lightning.highres.HighResolution 4 | residual: 5 | type: preactiv_bottleneck 6 | bottleneck_features: 128 7 | convolution: conv2d 8 | activation: relu 9 | fuse: 10 | convolution: conv2d 11 | activation: 12 | intermediate: relu_bn2d 13 | prefusion: bn2d 14 | final: relu 15 | upscale: 16 | type: upsample2d 17 | conv_up: true -------------------------------------------------------------------------------- /conf/src/model/modules/models/hopenet.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch1 2 | 3 | _target_: moai.modules.lightning.models.HopeNet -------------------------------------------------------------------------------- /conf/src/model/modules/models/hourglass_1.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch1.modules.hourglass_1 2 | 3 | _target_: moai.modules.lightning.Hourglass 4 | features: 256 5 | depth: 1 6 | convolution: conv2d 7 | activation: relu_bn2d 8 | residual: bottleneck 9 | # residual: preactiv_bottleneck 10 | downscale: maxpool2d_aa 11 | upscale: upsample2d -------------------------------------------------------------------------------- /conf/src/model/modules/models/hourglass_2.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch2.modules.hourglass_2 2 | 3 | _target_: moai.modules.lightning.Hourglass 4 | features: 309 5 | depth: 1 6 | convolution: conv2d 7 | activation: relu_bn2d 8 | # residual: preactiv_bottleneck 9 | residual: bottleneck 10 | downscale: maxpool2d_aa 11 | upscale: upsample2d -------------------------------------------------------------------------------- /conf/src/model/modules/models/hrnet_1.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch1 2 | 3 | _target_: src.modules.lightning.models.hrnet_mod.HRNetMod 4 | configuration: 5 | stages: 4 6 | in_features: 1 7 | out_features: 53 8 | input: ??? 9 | output: _markers_hms_pred 10 | preproc: 11 | stem: 12 | blocks: [conv2d, conv2d] 13 | convolutions: [conv2d, conv2d] 14 | activations: [relu_bn2d, relu_bn2d] 15 | kernel_sizes: [3, 3] 16 | features: [32, 64] 17 | strides: [2, 2] 18 | paddings: [1, 1] 19 | residual: 20 | block: bottleneck 21 | convolution: conv2d 22 | activation: bn2d_relu 23 | features: 24 | in_features: [64, 256] 25 | out_features: [256, 256] 26 | bottleneck_features: [64, 64] 27 | branches: 28 | block: conv2d 29 | convolution: conv2d 30 | activation: relu 31 | kernel_size: 3 32 | stride: 1 33 | padding: 1 34 | start_features: 32 35 | modules: [1, 4, 3] 36 | depths: 37 | - [4] 38 | - [4, 4, 4, 4] 39 | - [4, 4, 4] 40 | -------------------------------------------------------------------------------- /conf/src/model/modules/models/hrnet_2.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch2 2 | 3 | _target_: src.modules.lightning.models.hrnet_mod.HRNetMod 4 | configuration: 5 | stages: 4 6 | in_features: 309 7 | out_features: 19 8 | input: ??? 9 | output: _joints_hms_pred 10 | # preproc: 11 | # stem: 12 | # blocks: [conv2d, conv2d] 13 | # convolutions: [conv2d, conv2d] 14 | # activations: [relu_bn2d, relu_bn2d] 15 | # kernel_sizes: [3, 3] 16 | # features: [32, 64] 17 | # strides: [2, 2] 18 | # paddings: [1, 1] 19 | residual: 20 | block: bottleneck 21 | # block: preactiv_bottleneck 22 | convolution: conv2d 23 | activation: relu_bn2d 24 | features: 25 | in_features: [64, 309] 26 | out_features: [309, 309] 27 | bottleneck_features: [64, 64] 28 | branches: 29 | block: conv2d 30 | convolution: conv2d 31 | activation: relu 32 | kernel_size: 3 33 | stride: 1 34 | padding: 1 35 | start_features: 32 36 | modules: [1, 4, 3] 37 | depths: 38 | - [4] 39 | - [4, 4, 4, 4] 40 | - [4, 4, 4] -------------------------------------------------------------------------------- /conf/src/model/modules/models/hrnet_e2e_1.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch1 2 | 3 | _target_: moai.modules.lightning.models.HRNetMod_e2e 4 | configuration: 5 | stages: 4 6 | in_features: 1 7 | out_features: 72 8 | input: ??? 9 | output: _markers_hms_pred 10 | preproc: 11 | stem: 12 | blocks: [conv2d, conv2d] 13 | convolutions: [conv2d, conv2d] 14 | activations: [relu_bn2d, relu_bn2d] 15 | kernel_sizes: [3, 3] 16 | features: [32, 64] 17 | strides: [2, 2] 18 | paddings: [1, 1] 19 | residual: 20 | block: bottleneck 21 | convolution: conv2d 22 | activation: bn2d_relu 23 | features: 24 | in_features: [64, 256] 25 | out_features: [256, 256] 26 | bottleneck_features: [64, 64] 27 | branches: 28 | block: conv2d 29 | convolution: conv2d 30 | activation: relu 31 | kernel_size: 3 32 | stride: 1 33 | padding: 1 34 | start_features: 32 35 | modules: [1, 4, 3] 36 | depths: 37 | - [4] 38 | - [4, 4, 4, 4] 39 | - [4, 4, 4] 40 | -------------------------------------------------------------------------------- /conf/src/model/modules/models/hrnet_e2e_2.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch2 2 | 3 | _target_: moai.modules.lightning.models.HRNetMod_e2e 4 | configuration: 5 | stages: 4 6 | in_features: 328 7 | out_features: 72 8 | input: ??? 9 | output: _joints_hms_pred 10 | # preproc: 11 | # stem: 12 | # blocks: [conv2d, conv2d] 13 | # convolutions: [conv2d, conv2d] 14 | # activations: [relu_bn2d, relu_bn2d] 15 | # kernel_sizes: [3, 3] 16 | # features: [32, 64] 17 | # strides: [2, 2] 18 | # paddings: [1, 1] 19 | residual: 20 | block: bottleneck 21 | # block: preactiv_bottleneck 22 | convolution: conv2d 23 | activation: relu_bn2d 24 | features: 25 | in_features: [64, 328] 26 | out_features: [328, 328] 27 | bottleneck_features: [64, 64] 28 | branches: 29 | block: conv2d 30 | convolution: conv2d 31 | activation: relu 32 | kernel_size: 3 33 | stride: 1 34 | padding: 1 35 | start_features: 32 36 | modules: [1, 4, 3] 37 | depths: 38 | - [4] 39 | - [4, 4, 4, 4] 40 | - [4, 4, 4] -------------------------------------------------------------------------------- /conf/src/model/modules/models/hrnet_ps_1.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch1 2 | 3 | _target_: moai.modules.lightning.models.HRNetModPS 4 | configuration: 5 | stages: 4 6 | in_features: 1 7 | out_features: 53 8 | input: ??? 9 | output: _markers_hms_pred 10 | preproc: 11 | stem: 12 | blocks: [conv2d, conv2d] 13 | convolutions: [conv2d, conv2d] 14 | activations: [relu_bn2d, relu_bn2d] 15 | kernel_sizes: [3, 3] 16 | features: [32, 64] 17 | strides: [2, 2] 18 | paddings: [1, 1] 19 | residual: 20 | block: bottleneck 21 | convolution: conv2d 22 | activation: bn2d_relu 23 | features: 24 | in_features: [64, 256] 25 | out_features: [256, 256] 26 | bottleneck_features: [64, 64] 27 | branches: 28 | block: conv2d 29 | convolution: conv2d 30 | activation: relu 31 | kernel_size: 3 32 | stride: 1 33 | padding: 1 34 | start_features: 32 35 | modules: [1, 4, 3] 36 | depths: 37 | - [4] 38 | - [4, 4, 4, 4] 39 | - [4, 4, 4] 40 | -------------------------------------------------------------------------------- /conf/src/model/modules/models/hrnet_ps_2.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch2 2 | 3 | _target_: moai.modules.lightning.models.HRNetModPS 4 | configuration: 5 | stages: 4 6 | in_features: 309 7 | out_features: 19 8 | input: ??? 9 | output: _joints_hms_pred 10 | # preproc: 11 | # stem: 12 | # blocks: [conv2d, conv2d] 13 | # convolutions: [conv2d, conv2d] 14 | # activations: [relu_bn2d, relu_bn2d] 15 | # kernel_sizes: [3, 3] 16 | # features: [32, 64] 17 | # strides: [2, 2] 18 | # paddings: [1, 1] 19 | residual: 20 | block: bottleneck 21 | # block: preactiv_bottleneck 22 | convolution: conv2d 23 | activation: relu_bn2d 24 | features: 25 | in_features: [64, 309] 26 | out_features: [309, 309] 27 | bottleneck_features: [64, 64] 28 | branches: 29 | block: conv2d 30 | convolution: conv2d 31 | activation: relu 32 | kernel_size: 3 33 | stride: 1 34 | padding: 1 35 | start_features: 32 36 | modules: [1, 4, 3] 37 | depths: 38 | - [4] 39 | - [4, 4, 4, 4] 40 | - [4, 4, 4] -------------------------------------------------------------------------------- /conf/src/model/modules/models/oml_dual.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch1 2 | 3 | _target_: moai.modules.lightning.models.OmlDual 4 | num_markers: 53 5 | num_joints: 19 -------------------------------------------------------------------------------- /conf/src/model/modules/models/stacked_hourglass_1.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch1 2 | 3 | _target_: moai.modules.lightning.models.StackedHourglassMod 4 | configuration: 5 | stacks: 4 6 | in_features: 1 7 | out_features: 53 8 | output: _markers_hms_pred 9 | preproc: 10 | block: conv2d 11 | convolution: conv2d 12 | activation: relu_bn2d 13 | residual: bottleneck 14 | # residual: preactiv_bottleneck 15 | downscale: maxpool2d_aa 16 | stem: 17 | kernel_size: 7 18 | stride: 2 19 | padding: 3 20 | projection: 21 | block: conv2d 22 | convolution: conv2d 23 | activation: relu 24 | prediction: 25 | block: conv2d 26 | convolution: conv2d 27 | activation: relu 28 | dropout: 0.0 29 | merge: 30 | dropout: 0.0 31 | -------------------------------------------------------------------------------- /conf/src/model/modules/models/stacked_hourglass_2.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch2 2 | 3 | _target_: moai.modules.lightning.models.StackedHourglassMod 4 | configuration: 5 | stacks: 4 6 | in_features: 309 7 | out_features: 19 8 | output: _joints_hms_pred 9 | # preproc: 10 | # block: identity 11 | # convolution: identity 12 | # activation: identity 13 | # residual: identity 14 | # downscale: identity 15 | # stem: 16 | # kernel_size: 7 17 | # stride: 2 18 | # padding: 3 19 | projection: 20 | block: conv2d 21 | convolution: conv2d 22 | activation: relu 23 | prediction: 24 | block: conv2d 25 | convolution: conv2d 26 | activation: relu 27 | dropout: 0.0 28 | merge: 29 | dropout: 0.0 -------------------------------------------------------------------------------- /conf/src/model/modules/models/stacked_hourglass_e2e_1.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch1 2 | 3 | _target_: moai.modules.lightning.models.StackedHourglassMod_e2e 4 | configuration: 5 | stacks: 4 6 | in_features: 1 7 | out_features: 72 8 | output: _markers_hms_pred 9 | preproc: 10 | block: conv2d 11 | convolution: conv2d 12 | activation: relu_bn2d 13 | residual: bottleneck 14 | # residual: preactiv_bottleneck 15 | downscale: maxpool2d_aa 16 | stem: 17 | kernel_size: 7 18 | stride: 2 19 | padding: 3 20 | projection: 21 | block: conv2d 22 | convolution: conv2d 23 | activation: relu 24 | prediction: 25 | block: conv2d 26 | convolution: conv2d 27 | activation: relu 28 | dropout: 0.0 29 | merge: 30 | dropout: 0.0 31 | -------------------------------------------------------------------------------- /conf/src/model/modules/models/stacked_hourglass_e2e_2.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch2 2 | 3 | _target_: moai.modules.lightning.models.StackedHourglassMod_e2e 4 | configuration: 5 | stacks: 4 6 | in_features: 328 7 | out_features: 72 8 | output: _markers_hms_pred 9 | # preproc: 10 | # block: conv2d 11 | # convolution: conv2d 12 | # activation: relu_bn2d 13 | # residual: bottleneck 14 | # # residual: preactiv_bottleneck 15 | # downscale: maxpool2d_aa 16 | # stem: 17 | # kernel_size: 7 18 | # stride: 2 19 | # padding: 3 20 | projection: 21 | block: conv2d 22 | convolution: conv2d 23 | activation: relu 24 | prediction: 25 | block: conv2d 26 | convolution: conv2d 27 | activation: relu 28 | dropout: 0.0 29 | merge: 30 | dropout: 0.0 31 | -------------------------------------------------------------------------------- /conf/src/model/modules/models/stage_transition_standard_1.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch1.modules.stage_transition_standard_1 2 | 3 | _target_: moai.modules.lightning.highres.StageTransition 4 | branched: 5 | convolution: conv2d 6 | activation: relu 7 | downscale: none # maxpool2d 8 | kernel_size: 3 9 | stride: 2 10 | padding: 1 -------------------------------------------------------------------------------- /conf/src/model/modules/models/stage_transition_standard_2.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch2.modules.stage_transition_standard_2 2 | 3 | _target_: moai.modules.lightning.highres.StageTransition 4 | branched: 5 | convolution: conv2d 6 | activation: relu 7 | downscale: none # maxpool2d 8 | kernel_size: 3 9 | stride: 2 10 | padding: 1 -------------------------------------------------------------------------------- /conf/src/model/modules/models/start_transition_standard_1.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch1.modules.start_transition_standard_1 2 | 3 | _target_: moai.modules.lightning.highres.StartTransition 4 | identity: 5 | convolution: conv2d 6 | activation: relu 7 | kernel_size: 3 8 | stride: 1 9 | padding: 1 10 | branched: 11 | convolution: conv2d 12 | activation: relu 13 | downscale: none # maxpool2d 14 | kernel_size: 3 15 | stride: 2 16 | padding: 1 -------------------------------------------------------------------------------- /conf/src/model/modules/models/start_transition_standard_2.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch2.modules.start_transition_standard_2 2 | 3 | _target_: moai.modules.lightning.highres.StartTransition 4 | identity: 5 | convolution: conv2d 6 | activation: relu 7 | kernel_size: 3 8 | stride: 1 9 | padding: 1 10 | branched: 11 | convolution: conv2d 12 | activation: relu 13 | downscale: none # maxpool2d 14 | kernel_size: 3 15 | stride: 2 16 | padding: 1 -------------------------------------------------------------------------------- /conf/src/model/modules/models/top_branch_1.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch1.modules.top_branch_1 2 | 3 | _target_: moai.modules.lightning.highres.TopBranchHead 4 | convolution: conv2d 5 | activation: none 6 | kernel_size: 1 7 | padding: 0 8 | inplace: True -------------------------------------------------------------------------------- /conf/src/model/modules/models/top_branch_2.yaml: -------------------------------------------------------------------------------- 1 | # @package model.modules.branch2.modules.top_branch_2 2 | 3 | _target_: moai.modules.lightning.highres.TopBranchHead 4 | convolution: conv2d 5 | activation: none 6 | kernel_size: 1 7 | padding: 0 8 | inplace: True -------------------------------------------------------------------------------- /conf/src/model/monads/distribution/zmean.yaml: -------------------------------------------------------------------------------- 1 | # @package model.monads._name_ 2 | 3 | _target_: src.monads.distribution.zmean.zMean -------------------------------------------------------------------------------- /conf/src/model/monads/keypoints/fuse_coords.yaml: -------------------------------------------------------------------------------- 1 | # @package model.monads._name_ 2 | 3 | _target_: src.monads.keypoints.fuse_coords.FuseCoords 4 | mode: two -------------------------------------------------------------------------------- /conf/src/model/validation/metric/human_pose/MAE.yaml: -------------------------------------------------------------------------------- 1 | # @package model.validation.metrics.human_pose__name_ 2 | 3 | _target_: src.validation.metrics.human_pose.mae.MAE -------------------------------------------------------------------------------- /conf/src/model/validation/metric/human_pose/RMSE.yaml: -------------------------------------------------------------------------------- 1 | # @package model.validation.metrics.human_pose__name_ 2 | 3 | _target_: src.validation.metrics.human_pose.rmse.RMSE -------------------------------------------------------------------------------- /src/data/datasets/human_pose/H4DIR/h4dir.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | from scipy.spatial.transform import Rotation as R 4 | import logging 5 | from src.data.datasets.human_pose.H4DIR.importers import ( 6 | load_3d_data 7 | ) 8 | from src.data.datasets.human_pose.H4DIR.importers import ( 9 | get_depth_image_from_points, 10 | f_rotate_back, 11 | f_rotate_left, 12 | f_rotate_right 13 | ) 14 | 15 | from torch.utils.data.dataset import Dataset 16 | 17 | log = logging.getLogger(__name__) 18 | 19 | __all__ = ["H4DIR"] 20 | 21 | class H4DIR(Dataset): 22 | def __init__(self, 23 | root_path, 24 | markers_out, 25 | joints_out, 26 | resolution, 27 | views, 28 | augment, 29 | rs, 30 | scale_res=5.0, 31 | x_range = 20.0, 32 | y_range = 360.0, 33 | z_range = 20.0 34 | ): 35 | super(H4DIR,self).__init__() 36 | self.root_path = root_path 37 | self.markers_out = markers_out 38 | self.joints_out = joints_out 39 | self.resolution = resolution 40 | self.views = views 41 | self.augment = augment 42 | self.rs = rs 43 | self.scale_res = scale_res 44 | self.x_range = x_range 45 | self.y_range = y_range 46 | self.z_range = z_range 47 | 48 | if not os.path.exists(root_path): 49 | raise ValueError("{} does not exist, exiting.".format(root_path)) 50 | 51 | self.data = {} 52 | # Iterate over each recorded folder 53 | for recording in os.listdir(root_path): 54 | data_path = os.path.join(root_path, recording) 55 | if not os.path.isdir(data_path): 56 | continue 57 | for file in os.listdir(data_path): 58 | full_filename = os.path.join(data_path, file) 59 | filename, ext = os.path.splitext(full_filename) 60 | if ext != ".txt" or "_rs" in filename: # TODO: refactor the data loading and labeling 61 | continue 62 | splits = file.split("_") 63 | if len(splits) == 3 or len(splits) == 4: 64 | _type = splits[0] 65 | _id = splits[1] 66 | _view = splits[2].split('.')[0] 67 | else: 68 | continue 69 | unique_name = recording + "-" + str(_id) 70 | if _view not in self.views: 71 | continue 72 | if unique_name not in self.data: 73 | self.data[unique_name] = {} 74 | if _view not in self.data[unique_name]: 75 | self.data[unique_name][_view] = {} 76 | self.data[unique_name][_view][_type] = full_filename 77 | 78 | def __len__(self): 79 | return len(self.data) 80 | 81 | def __getitem__(self, idx): 82 | key = list(self.data.keys())[idx] 83 | datum = self.data[key] 84 | datum_out = {} 85 | random_rot = R.from_euler('xyz', [random.random() * self.x_range - self.x_range // 2, random.random() * self.y_range - self.y_range // 2, random.random() * self.z_range - self.z_range // 2], degrees=True) 86 | 87 | if "3d" in self.views: 88 | rs_markers_f, gt_markers_f, gt_joints_f, scale, com = load_3d_data( 89 | datum["3d"]["txt"], 90 | datum["3d"]["txt"].replace('gt', 'rs'), 91 | self.markers_out, 92 | self.joints_out, 93 | rs=self.rs, 94 | S1S4=True if "_S1_" in datum["3d"]["txt"] or "_S4_" in datum["3d"]["txt"] else False, # TODO: to discuss the marker placement type 95 | random_rot=random_rot, 96 | augment=self.augment 97 | ) 98 | 99 | for view in self.views: 100 | if (view == "3d"): 101 | continue 102 | else: 103 | if (view == 'f'): 104 | gt_markers = gt_markers_f.clone() 105 | gt_joints = gt_joints_f.clone() 106 | rs_markers = rs_markers_f.clone() 107 | depth_img = get_depth_image_from_points(self.resolution, self.scale_res, rs_markers.clone()) 108 | elif (view == 'b'): 109 | gt_markers = f_rotate_back(gt_markers_f.clone()) 110 | gt_joints = f_rotate_back(gt_joints_f.clone()) 111 | rs_markers = f_rotate_back(rs_markers_f.clone()) 112 | depth_img = get_depth_image_from_points(self.resolution, self.scale_res, rs_markers) 113 | elif (view == 'l'): 114 | gt_markers = f_rotate_left(gt_markers_f.clone()) 115 | gt_joints = f_rotate_left(gt_joints_f.clone()) 116 | rs_markers = f_rotate_left(rs_markers_f.clone()) 117 | depth_img = get_depth_image_from_points(self.resolution, self.scale_res, rs_markers) 118 | elif (view == 'r'): 119 | gt_markers = f_rotate_right(gt_markers_f.clone()) 120 | gt_joints = f_rotate_right(gt_joints_f.clone()) 121 | rs_markers = f_rotate_right(rs_markers_f.clone()) 122 | depth_img = get_depth_image_from_points(self.resolution, self.scale_res, rs_markers) 123 | else: 124 | raise ("Error. View {} is not supported.", view) 125 | 126 | datum_out.update({ 127 | view + "_depth" : depth_img.squeeze(0), 128 | view + "_gt_markers_3d" : gt_markers, 129 | view + "_gt_markers_2d" : gt_markers[..., :2], 130 | view + "_gt_joints_3d" : gt_joints, 131 | view + "_gt_joints_2d" : gt_joints[..., :2], 132 | view + "_scale" : scale, 133 | view + "_com" : com.unsqueeze(0) 134 | }) 135 | 136 | return datum_out 137 | 138 | def get_data(self): 139 | return self.data -------------------------------------------------------------------------------- /src/data/datasets/human_pose/H4DIR/importers/__init__.py: -------------------------------------------------------------------------------- 1 | from src.data.datasets.human_pose.H4DIR.importers.loader import ( 2 | load_3d_data 3 | ) 4 | from src.data.datasets.human_pose.H4DIR.importers.enums import ( 5 | joint_selection, 6 | ) 7 | from src.data.datasets.human_pose.H4DIR.importers.markermap import ( 8 | MARKER_S1S4, 9 | MARKER_S1S4_new, 10 | MARKER_S2S3, 11 | MARKER_S2S3_new, 12 | S1S4_Mapping, 13 | S2S3_Mapping, 14 | ) 15 | from src.data.datasets.human_pose.H4DIR.importers.image import ( 16 | get_depth_image_from_points 17 | ) 18 | from src.data.datasets.human_pose.H4DIR.importers.projections import ( 19 | f_rotate_back, 20 | f_rotate_left, 21 | f_rotate_right 22 | ) -------------------------------------------------------------------------------- /src/data/datasets/human_pose/H4DIR/importers/enums.py: -------------------------------------------------------------------------------- 1 | def get_markers(): 2 | MARKERS = {} 3 | 4 | MARKERS["01"] = (4, 106, 1+ 100) # 00 spinebase 5 | MARKERS["02"] = (8, 104, 2+ 100) # 01 left chest 6 | MARKERS["03"] = (12, 102, 3+ 100) # 02 right chest 7 | MARKERS["04"] = (16, 100, 4+ 100) # 03 left head 8 | MARKERS["05"] = (20, 98, 5+ 100) # 04 right head 9 | MARKERS["06"] = (24, 96, 6+ 100) # 05 back_head 10 | MARKERS["07"] = (28, 94, 7+ 100) # 06 back_high 11 | MARKERS["08"] = (32, 92, 8+ 100) # 07 back_low 12 | MARKERS["09"] = (36, 90, 9+ 100) # 08 left b_shoulder 13 | MARKERS["10"] = (40, 88, 10+ 100) # 09 left f_shoulder 14 | MARKERS["11"] = (44, 86, 11+ 100) # 10 left upperarm 15 | MARKERS["12"] = (48, 84, 12+ 100) # 11 left forearm 16 | MARKERS["13"] = (52, 82, 13+ 100) # 12 right b_shoulder 17 | MARKERS["14"] = (56, 80, 14+ 100) # 13 right f_shoulder 18 | MARKERS["15"] = (60, 78, 15+ 100) # 14 right upperarm 19 | MARKERS["16"] = (64, 76, 16+ 100) # 15 right forearm 20 | MARKERS["17"] = (68, 74, 17+ 100) # 16 left pelvis 21 | MARKERS["18"] = (72, 72, 18+ 100) # 17 left thigh 22 | MARKERS["19"] = (76, 70, 19+ 100) # 18 left calf 23 | MARKERS["20"] = (80, 68, 20+ 100) # 19 right pelvis 24 | MARKERS["21"] = (84, 66, 21+ 100) # 20 right thigh 25 | MARKERS["22"] = (88, 64, 22+ 100) # 21 right calf 26 | 27 | MARKERS["23"] = (92, 62, 23+ 100) # 22 left hand 28 | MARKERS["24"] = (96, 60, 24+ 100) # 23 left foot 29 | 30 | MARKERS["25"] = (100, 58, 25+ 100) # 24 right hand 31 | MARKERS["26"] = (104, 56, 26+ 100) # 25 right foot 32 | 33 | MARKERS["27"] = (108, 54, 27+ 100) # 22 left hand 34 | MARKERS["28"] = (112, 52, 28+ 100) # 23 left foot 35 | 36 | MARKERS["29"] = (116, 50, 29+ 100) # 24 right hand 37 | MARKERS["30"] = (120, 48, 30+ 100) # 25 right foot 38 | 39 | MARKERS["31"] = (124, 46, 31+ 100) # 00 spinebase 40 | MARKERS["32"] = (128, 44, 32+ 100) # 01 left chest 41 | MARKERS["33"] = (132, 42, 33+ 100) # 02 right chest 42 | MARKERS["34"] = (136, 40, 34+ 100) # 03 left head 43 | MARKERS["35"] = (140, 38, 35+ 100) # 04 right head 44 | MARKERS["36"] = (144, 36, 36+ 100) # 05 back_head 45 | MARKERS["37"] = (148, 34, 37+ 100) # 06 back_high 46 | MARKERS["38"] = (152, 32, 38+ 100) # 07 back_low 47 | MARKERS["39"] = (156, 30, 39+ 100) # 08 left b_shoulder 48 | MARKERS["40"] = (160, 28, 40+ 100) # 09 left f_shoulder 49 | MARKERS["41"] = (164, 26, 41+ 100) # 10 left upperarm 50 | MARKERS["42"] = (168, 24, 42+ 100) # 11 left forearm 51 | MARKERS["43"] = (172, 22, 43+ 100) # 12 right b_shoulder 52 | MARKERS["44"] = (176, 20, 44+ 100) # 13 right f_shoulder 53 | MARKERS["45"] = (180, 18, 45+ 100) # 14 right upperarm 54 | MARKERS["46"] = (184, 16, 46+ 100) # 15 right forearm 55 | MARKERS["47"] = (188, 14, 47+ 100) # 16 left pelvis 56 | MARKERS["48"] = (192, 12, 48+ 100) # 17 left thigh 57 | MARKERS["49"] = (196, 10, 49+ 100) # 18 left calf 58 | MARKERS["50"] = (200, 8, 50+ 100) # 19 right pelvis 59 | MARKERS["51"] = (204, 6, 51+ 100) # 20 right thigh 60 | MARKERS["52"] = (208, 4, 52+ 100) # 21 right calf 61 | MARKERS["53"] = (216, 2, 53+ 100) # 21 right calf 62 | 63 | 64 | 65 | 66 | return MARKERS 67 | 68 | def get_markers_deepmocap(): 69 | MARKERS = {} 70 | 71 | MARKERS["01"] = (0, 255, 0+ 100) # 00 spinebase 72 | MARKERS["02"] = (255, 0, 0+ 100) # 01 left chest 73 | MARKERS["03"] = (255, 255, 0+ 100) # 02 right chest 74 | MARKERS["04"] = (0, 255, 255+ 100) # 03 left head 75 | MARKERS["05"] = (255, 0, 255+ 100) # 04 right head 76 | MARKERS["06"] = (185, 255, 0+ 100) # 05 back_head 77 | MARKERS["07"] = (0, 185, 255+ 100) # 06 back_high 78 | MARKERS["08"] = (255, 0, 185+ 100) # 07 back_low 79 | MARKERS["09"] = (185, 0, 255+ 100) # 08 left b_shoulder 80 | MARKERS["10"] = (0, 255, 185+ 100) # 09 left f_shoulder 81 | MARKERS["11"] = (255, 185, 0+ 100) # 10 left upperarm 82 | MARKERS["12"] = (132, 0, 255+ 100) # 11 left forearm 83 | MARKERS["13"] = (0, 255, 132+ 100) # 22 left hand 84 | MARKERS["14"] = (255, 132, 0+ 100) # 12 right b_shoulder 85 | MARKERS["15"] = (224, 255, 0+ 100) # 13 right f_shoulder 86 | MARKERS["16"] = (0, 225, 255+ 100) # 14 right upperarm 87 | MARKERS["17"] = (255, 0, 225+ 100) # 15 right forearm 88 | MARKERS["18"] = (138, 255, 0+ 100) # 24 right hand 89 | MARKERS["19"] = (0, 138, 255+ 100) # 16 left pelvis 90 | MARKERS["20"] = (255, 0, 138+ 100) # 17 left thigh 91 | MARKERS["21"] = (222, 0, 255+ 100) # 18 left calf 92 | MARKERS["22"] = (0, 255, 222+ 100) # 23 left foot 93 | MARKERS["23"] = (255, 222, 0+ 100) # 19 right pelvis 94 | MARKERS["24"] = (97, 0, 255+ 100) # 20 right thigh 95 | MARKERS["25"] = (0, 255, 97+ 100) # 21 right calf 96 | MARKERS["26"] = (255, 95, 0+ 100) # 25 right foot 97 | 98 | 99 | return MARKERS 100 | 101 | joint_selection = [ 102 | 0, # Hips 103 | 2, # Spine1 104 | 3, # Spine2 105 | 5, # Neck 106 | 8, # Head 107 | 10, # RightArm 108 | 11, # RightForeArm 109 | 12, # RightHand 110 | 16, # LeftArm 111 | 17, # LeftForeArm 112 | 18, # LeftHand 113 | 21, # RightUpLeg 114 | 22, # RightLeg 115 | 23, # RightFoot 116 | 25, # RightToeBase 117 | # 25, # LeftUpLeg 118 | # 27, # LeftLeg 119 | # 28, # LeftFoot 120 | # 30, # LeftForeFoot 121 | # 31, # LeftToeBase 122 | 27, # LeftUpLeg 123 | 28, # LeftLeg 124 | 29, # LeftFoot 125 | 31, # LeftToeBase 126 | ] 127 | 128 | # 0 Hips 129 | # 1 Spine 130 | # 2 Spine1 131 | # 3 Spine2 132 | # 4 Spine3 133 | # 5 Neck 134 | # 6 Neck1 135 | # 7 Head 136 | # 8 HeadEnd 137 | # 9 RightShoulder 138 | # 10 RightArm 139 | # 11 RightForeArm 140 | # 12 RightHand 141 | # 13 RightHandThumb1 142 | # 14 RightHandMiddle1 143 | # 15 LeftShoulder 144 | # 16 LeftArm 145 | # 17 LeftForeArm 146 | # 18 LeftHand 147 | # 19 LeftHandThumb1 148 | # 20 LeftHandMiddle1 149 | # 21 RightUpLeg 150 | # 22 RightLeg 151 | # 23 RightFoot 152 | # 24 RightForeFoot 153 | # 25 RightToeBase 154 | # 26 RightToeBaseEnd 155 | # 27 LeftUpLeg 156 | # 28 LeftLeg 157 | # 29 LeftFoot 158 | # 30 LeftForeFoot 159 | # 31 LeftToeBase 160 | # 32 LeftToeBaseEnd 161 | 162 | joint_selection2 = [ 163 | 0, # Hips 164 | 11, # Spine1 165 | 12, # Spine2 166 | 20, # Neck 167 | 21, # Head 168 | 24, # RightArm 169 | 25, # RightForeArm 170 | 26, # RightHand 171 | 14, # LeftArm 172 | 15, # LeftForeArm 173 | 16, # LeftHand 174 | 6, # RightUpLeg 175 | 7, # RightLeg 176 | 8, # RightFoot 177 | 9, # RightToeBase 178 | # 25, # LeftUpLeg 179 | # 27, # LeftLeg 180 | # 28, # LeftFoot 181 | # 30, # LeftForeFoot 182 | # 31, # LeftToeBase 183 | 1, # LeftUpLeg 184 | 2, # LeftLeg 185 | 3, # LeftFoot 186 | 4, # LeftToeBase 187 | ] 188 | 189 | marker_mapping_sfu_2_h4d = [ 190 | 0, 191 | 1, 192 | 2, 193 | 3, 194 | 4, 195 | 5, 196 | 6, 197 | 7, 198 | 8, 199 | 10, 200 | 201 | ] 202 | 203 | -------------------------------------------------------------------------------- /src/data/datasets/human_pose/H4DIR/importers/image.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import torch 3 | import numpy as np 4 | 5 | # TODO: avoid the back and forth between torch and numpy 6 | def get_depth_image_from_points(resolution, res_scale, points): 7 | depth_tensor = torch.zeros([1, 1, int(resolution), int(resolution)], dtype=torch.float32) 8 | width = resolution * res_scale 9 | height = resolution * res_scale 10 | depth_img = np.zeros([int(height), int(width), 1]) 11 | points_np = points.cpu().numpy() 12 | points_np = np.asarray(sorted(points_np.squeeze(), key=lambda x: x[..., 2]))[::-1] 13 | 14 | for i in range(points_np.shape[0]): 15 | norm_x_value = points_np[i, 0] 16 | norm_y_value = points_np[i, 1] 17 | norm_depth_value = points_np[i, 2] 18 | y = int(norm_y_value * height) - 1 19 | x = int(norm_x_value * width) - 1 20 | offset = 0 21 | if (x > offset-1 and x < width-offset and y > offset-1 and y < height-offset): 22 | depth_img = cv2.circle(depth_img, (x, y), 2 * int(res_scale), float(norm_depth_value), -1) 23 | else: 24 | print("error") 25 | 26 | depth_img = cv2.resize(depth_img, (int(resolution), int(resolution)), interpolation=cv2.INTER_LINEAR_EXACT) 27 | depth_tensor[0, 0, ...] = torch.from_numpy(depth_img) 28 | return depth_tensor 29 | -------------------------------------------------------------------------------- /src/data/datasets/human_pose/H4DIR/importers/loader.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from src.data.datasets.human_pose.H4DIR.importers.enums import ( 4 | joint_selection 5 | ) 6 | from src.data.datasets.human_pose.H4DIR.importers.markermap import ( 7 | S1S4_Mapping, 8 | S2S3_Mapping 9 | ) 10 | 11 | __all__ = [ 12 | 'load_3d_data', 13 | ] 14 | 15 | def load_3d_data(filename, filename_rs, markers_out, joints_out, \ 16 | rs=True, random_rot=None, augment=False, S1S4=True, \ 17 | scale_on_image=1.25, data_type=torch.float32): 18 | # ground truth data 19 | file = open(filename, "r") 20 | lines = file.readlines() 21 | raw_points = np.zeros([markers_out + joints_out, 3], dtype=float) 22 | # num in the original ground truth data 23 | NUM_OF_MARKERS_IN_GT = 53 24 | NUM_OF_JOINTS_IN_GT = 33 25 | NUM_OF_LINES = NUM_OF_MARKERS_IN_GT + NUM_OF_JOINTS_IN_GT 26 | line_counter = 0 27 | j_counter = 0 28 | assert(len(lines) == NUM_OF_LINES) 29 | for line in lines: 30 | index = line_counter % NUM_OF_LINES 31 | if (joints_out == 0 and index == markers_out): 32 | break 33 | values = line.split(' ') 34 | x_gt = float(values[2]) 35 | y_gt = float(values[3]) 36 | z_gt = float(values[1]) 37 | if (index < NUM_OF_MARKERS_IN_GT and index < markers_out): 38 | raw_points[j_counter, 0] = x_gt 39 | raw_points[j_counter, 1] = -y_gt # (-) for imaging 40 | raw_points[j_counter, 2] = z_gt 41 | j_counter += 1 42 | elif (index - NUM_OF_MARKERS_IN_GT) < NUM_OF_JOINTS_IN_GT and (index - NUM_OF_MARKERS_IN_GT) in joint_selection: 43 | raw_points[j_counter, 0] = x_gt 44 | raw_points[j_counter, 1] = -y_gt # (-) for imaging 45 | raw_points[j_counter, 2] = z_gt 46 | j_counter += 1 47 | line_counter += 1 48 | # raw points list to be filled either with raw or noisy vicon data 49 | raw_rs_points_list = [] 50 | if (rs): 51 | file_rs = open(filename_rs, "r") 52 | lines_rs = file_rs.readlines() 53 | for line in lines_rs: 54 | values = line.split(' ') 55 | x_rs = float(values[1]) 56 | y_rs = float(values[2]) 57 | z_rs = float(values[0]) 58 | # this is thresholding the floor and the top. TODO: to be better investigated 59 | if y_rs > -830 and y_rs < 1170: 60 | raw_rs_points_list.append([x_rs, -y_rs, z_rs]) # (-) for imaging 61 | rs_raw_points = np.asarray(raw_rs_points_list) 62 | else: 63 | raw_rs_points_list = raw_points 64 | # subtraction of CoM and rotational augmentation 65 | com = np.mean(rs_raw_points, axis=0) 66 | raw_points -= com 67 | rs_raw_points -= com 68 | if augment: 69 | raw_points = random_rot.apply(raw_points) 70 | rs_raw_points = random_rot.apply(rs_raw_points) 71 | 72 | points = np.zeros([(markers_out + joints_out), 3], dtype=float) 73 | rs_points = rs_raw_points 74 | 75 | minval_z = 100000 76 | maxval_z = 0 77 | minval_y = minval_z 78 | maxval_y = maxval_z 79 | minval_x = minval_z 80 | maxval_x = maxval_z 81 | counter = 0 82 | 83 | for index in range(raw_points.shape[0]): 84 | if (index < markers_out): 85 | if (S1S4): 86 | points[index] = raw_points[S1S4_Mapping[index]] 87 | else: 88 | points[index] = raw_points[S2S3_Mapping[index]] 89 | else: 90 | points[index] = raw_points[index] 91 | counter += 1 92 | 93 | minval_x = np.minimum(np.min(rs_points[..., 0]), np.min(points[..., 0])) 94 | maxval_x = np.maximum(np.max(rs_points[..., 0]), np.max(points[..., 0])) 95 | minval_y = np.minimum(np.min(rs_points[..., 1]), np.min(points[..., 1])) 96 | maxval_y = np.maximum(np.max(rs_points[..., 1]), np.max(points[..., 1])) 97 | minval_z = np.minimum(np.min(rs_points[..., 2]), np.min(points[..., 2])) 98 | maxval_z = np.maximum(np.max(rs_points[..., 2]), np.max(points[..., 2])) 99 | 100 | tcom = torch.from_numpy(com).type(torch.FloatTensor) 101 | scale = torch.zeros([3], dtype=torch.float32) 102 | gt_markers = torch.zeros([markers_out, 3]) 103 | gt_joints = torch.zeros([joints_out, 3]) 104 | scale_on_image_offset = (1.0 - 1.0 / scale_on_image) / 2.0 105 | for i in range(0, markers_out + joints_out): 106 | scale[0] = scale_on_image * float(maxval_x - minval_x) 107 | scale[1] = scale_on_image * float(maxval_y - minval_y) 108 | scale[2] = scale_on_image * float(maxval_z - minval_z) 109 | 110 | norm_depth_value = (float(points[i][2]) - float(minval_z)) \ 111 | / scale[2] \ 112 | + scale_on_image_offset 113 | norm_x_value = (float(points[i][0]) - float(minval_x)) \ 114 | / scale[0] \ 115 | + scale_on_image_offset 116 | norm_y_value = (float(points[i][1]) - float(minval_y)) \ 117 | / scale[1] \ 118 | + scale_on_image_offset 119 | 120 | if (i < markers_out): 121 | gt_markers[i][0] = norm_x_value 122 | gt_markers[i][1] = norm_y_value 123 | gt_markers[i][2] = norm_depth_value 124 | else: 125 | gt_joints[i-markers_out][0] = norm_x_value 126 | gt_joints[i-markers_out][1] = norm_y_value 127 | gt_joints[i-markers_out][2] = norm_depth_value 128 | 129 | rs_markers_ori = torch.zeros([rs_raw_points.shape[0], rs_raw_points.shape[1]]) 130 | 131 | tcom[0] += - scale[0] / 2 + scale[0] / scale_on_image / 2 + float(minval_x) 132 | tcom[1] += - scale[1] / 2 + scale[1] / scale_on_image / 2 + float(minval_y) 133 | tcom[2] += - scale[2] / 2 + scale[2] / scale_on_image / 2 + float(minval_z) 134 | 135 | for i in range(rs_raw_points.shape[0]): 136 | norm_depth_value = (float(rs_points[i][2]) - float(minval_z)) \ 137 | / scale[2] \ 138 | + scale_on_image_offset 139 | norm_x_value = (float(rs_points[i][0]) - float(minval_x)) \ 140 | / scale[0] \ 141 | + scale_on_image_offset 142 | norm_y_value = (float(rs_points[i][1]) - float(minval_y)) \ 143 | / scale[1] \ 144 | + scale_on_image_offset 145 | 146 | rs_markers_ori[i][0] = norm_x_value 147 | rs_markers_ori[i][1] = norm_y_value 148 | rs_markers_ori[i][2] = norm_depth_value 149 | 150 | return rs_markers_ori, gt_markers, gt_joints, scale, tcom 151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /src/data/datasets/human_pose/H4DIR/importers/markermap.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | class MARKER_S1S4(Enum): 3 | ARIEL=0, 4 | C7=1, 5 | CLAV=2, 6 | LANK=3, 7 | LBHD=4, 8 | LBSH=5, 9 | LBWT=6, 10 | LELB=7, 11 | LFHD=8, 12 | LFRM=9, 13 | LFSH=10, 14 | LFWT=11, 15 | LHEL=12, 16 | LIEL=13, 17 | LIHAND=14, 18 | LIWR=15, 19 | LKNE=16, 20 | LKNI=17, 21 | LMT1=18, 22 | LMT5=19, 23 | LOHAND=20, 24 | LOWR=21, 25 | LSHN=22, 26 | LTHI=23, 27 | LTOE=24, 28 | LUPA=25, 29 | MBWT=26, 30 | MFWT=27, 31 | RANK=28, 32 | RBHD=29, 33 | RBSH=30, 34 | RBWT=31, 35 | RELB=32, 36 | RFHD=33, 37 | RFRM=34, 38 | RFSH=35, 39 | RFWT=36, 40 | RHEL=37, 41 | RIEL=38, 42 | RIHAND=39, 43 | RIWR=40, 44 | RKNE=41, 45 | RKNI=42, 46 | RMT1=43, 47 | RMT5=44, 48 | ROHAND=45, 49 | ROWR=46, 50 | RSHN=47, 51 | RTHI=48, 52 | RTOE=49, 53 | RUPA=50, 54 | STRN=51, 55 | T10=52, 56 | 57 | class MARKER_S2S3(Enum): 58 | ARIEL=0, 59 | C7=1, 60 | CLAV=2, 61 | LANK=3, 62 | LBHD=4, 63 | LBSH=5, 64 | LBWT=6, 65 | LELB=7, 66 | LFHD=8, 67 | LFRM=9, 68 | LFSH=10, 69 | LFWT=11, 70 | LHEL=12, 71 | LIEL=13, 72 | LIHAND=14, 73 | LIWR=15, 74 | LKNE=16, 75 | LKNI=17, 76 | LMT1=18, 77 | LMT5=19, 78 | LMWT=20, 79 | LOHAND=21, 80 | LOWR=22, 81 | LSHN=23, 82 | LTHI=24, 83 | LTOE=25, 84 | LUPA=26, 85 | RANK=27, 86 | RBHD=28, 87 | RBSH=29, 88 | RBWT=30, 89 | RELB=31, 90 | RFHD=32, 91 | RFRM=33, 92 | RFSH=34, 93 | RFWT=35, 94 | RHEL=36, 95 | RIEL=37, 96 | RIHAND=38, 97 | RIWR=39, 98 | RKNE=40, 99 | RKNI=41, 100 | RMT1=42, 101 | RMT5=43, 102 | RMWT=44, 103 | ROHAND=45, 104 | ROWR=46, 105 | RSHN=47, 106 | RTHI=48, 107 | RTOE=49, 108 | RUPA=50, 109 | STRN=51, 110 | T10=52, 111 | 112 | S1S4_Mapping = [ 113 | 0, 114 | 1, 115 | 2, 116 | 3, 117 | 4, 118 | 5, 119 | 6, 120 | 7, 121 | 8, 122 | 9, 123 | 10, 124 | 11, 125 | 12, 126 | 13, 127 | 14, 128 | 15, 129 | 16, 130 | 17, 131 | 18, 132 | 19, 133 | 20, 134 | 21, 135 | 22, 136 | 23, 137 | 24, 138 | 25, 139 | 28, 140 | 29, 141 | 30, 142 | 31, 143 | 32, 144 | 33, 145 | 34, 146 | 35, 147 | 36, 148 | 37, 149 | 38, 150 | 39, 151 | 40, 152 | 41, 153 | 42, 154 | 43, 155 | 44, 156 | 45, 157 | 46, 158 | 47, 159 | 48, 160 | 49, 161 | 50, 162 | 51, 163 | 52, 164 | 26, 165 | 27 166 | ] 167 | 168 | 169 | S2S3_Mapping = [ 170 | 0, 171 | 1, 172 | 2, 173 | 3, 174 | 4, 175 | 5, 176 | 6, 177 | 7, 178 | 8, 179 | 9, 180 | 10, 181 | 11, 182 | 12, 183 | 13, 184 | 14, 185 | 15, 186 | 16, 187 | 17, 188 | 18, 189 | 19, 190 | 21, 191 | 22, 192 | 23, 193 | 24, 194 | 25, 195 | 26, 196 | 27, 197 | 28, 198 | 29, 199 | 30, 200 | 31, 201 | 32, 202 | 33, 203 | 34, 204 | 35, 205 | 36, 206 | 37, 207 | 38, 208 | 39, 209 | 40, 210 | 41, 211 | 42, 212 | 43, 213 | 45, 214 | 46, 215 | 47, 216 | 48, 217 | 49, 218 | 50, 219 | 51, 220 | 52, 221 | 20, 222 | 44 223 | ] 224 | 225 | class MARKER_S1S4_new(Enum): 226 | ARIEL=0, 227 | C7=1, 228 | CLAV=2, 229 | LANK=3, 230 | LBHD=4, 231 | LBSH=5, 232 | LBWT=6, 233 | LELB=7, 234 | LFHD=8, 235 | LFRM=9, 236 | LFSH=10, 237 | LFWT=11, 238 | LHEL=12, 239 | LIEL=13, 240 | LIHAND=14, 241 | LIWR=15, 242 | LKNE=16, 243 | LKNI=17, 244 | LMT1=18, 245 | LMT5=19, 246 | LOHAND=20, 247 | LOWR=21, 248 | LSHN=22, 249 | LTHI=23, 250 | LTOE=24, 251 | LUPA=25, 252 | RANK=26, 253 | RBHD=27, 254 | RBSH=28, 255 | RBWT=29, 256 | RELB=30, 257 | RFHD=31, 258 | RFRM=32, 259 | RFSH=33, 260 | RFWT=34, 261 | RHEL=35, 262 | RIEL=36, 263 | RIHAND=37, 264 | RIWR=38, 265 | RKNE=39, 266 | RKNI=40, 267 | RMT1=41, 268 | RMT5=42, 269 | ROHAND=43, 270 | ROWR=44, 271 | RSHN=45, 272 | RTHI=46, 273 | RTOE=47, 274 | RUPA=48, 275 | STRN=49, 276 | T10=50, 277 | MBWT=51, 278 | MFWT=52, 279 | 280 | class MARKER_S2S3_new(Enum): 281 | ARIEL=0, 282 | C7=1, 283 | CLAV=2, 284 | LANK=3, 285 | LBHD=4, 286 | LBSH=5, 287 | LBWT=6, 288 | LELB=7, 289 | LFHD=8, 290 | LFRM=9, 291 | LFSH=10, 292 | LFWT=11, 293 | LHEL=12, 294 | LIEL=13, 295 | LIHAND=14, 296 | LIWR=15, 297 | LKNE=16, 298 | LKNI=17, 299 | LMT1=18, 300 | LMT5=19, 301 | LOHAND=20, 302 | LOWR=21, 303 | LSHN=22, 304 | LTHI=23, 305 | LTOE=24, 306 | LUPA=25, 307 | RANK=26, 308 | RBHD=27, 309 | RBSH=28, 310 | RBWT=29, 311 | RELB=30, 312 | RFHD=31, 313 | RFRM=32, 314 | RFSH=33, 315 | RFWT=34, 316 | RHEL=35, 317 | RIEL=36, 318 | RIHAND=37, 319 | RIWR=38, 320 | RKNE=39, 321 | RKNI=40, 322 | RMT1=41, 323 | RMT5=42, 324 | ROHAND=43, 325 | ROWR=44, 326 | RSHN=45, 327 | RTHI=46, 328 | RTOE=47, 329 | RUPA=48, 330 | STRN=49, 331 | T10=50, 332 | LMWT=51, 333 | RMWT=52, -------------------------------------------------------------------------------- /src/data/datasets/human_pose/H4DIR/importers/projections.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy 3 | from scipy.spatial.transform import Rotation as R 4 | 5 | 6 | def f_rotate_back(coords): 7 | coords = coords.detach().clone() 8 | coords[..., 0] = 1.0 - coords[..., 0] 9 | coords[..., 2] = 1.0 - coords[..., 2] 10 | return coords 11 | 12 | def f_rotate_right(coords): 13 | rot = torch.tensor([[ 14 | [0.0, 0.0, -1.0], 15 | [0.0, 1.0, 0.0], 16 | [1.0, 0.0, 0.0], 17 | ]]).float() 18 | xformed_t = coords.detach().clone().float() 19 | xformed_t = rot @ xformed_t.permute(0, 2, 1) 20 | xformed_t += torch.tensor([1.0, 0.0, 0.0]).expand(1, xformed_t.size()[2], xformed_t.size()[1]).permute(0, 2, 1) 21 | return xformed_t.permute(0, 2, 1) 22 | # return torch.from_numpy(numpy.expand_dims(xformed, axis=0)) 23 | 24 | 25 | def f_rotate_left(coords): 26 | rot = torch.tensor([[ 27 | [0.0, 0.0, 1.0], 28 | [0.0, 1.0, 0.0], 29 | [-1.0, 0.0, 0.0], 30 | ]]).float() 31 | xformed_t = coords.detach().clone().float() 32 | xformed_t = rot @ xformed_t.permute(0, 2, 1) 33 | xformed_t += torch.tensor([0.0, 0.0, 1.0]).expand(1, xformed_t.size()[2], xformed_t.size()[1]).permute(0, 2, 1) 34 | return xformed_t.permute(0, 2, 1) 35 | 36 | def rotate_back(tensor, centered = True, masked=False): 37 | t = tensor.clone().detach() 38 | 39 | if (centered): 40 | if (masked): 41 | mask_0 = (t[..., 0] > 0).type(torch.FloatTensor).to('cuda') 42 | mask_2 = (t[..., 2] > 0).type(torch.FloatTensor).to('cuda') 43 | t[..., 0] = mask_0 * (- t[..., 0]) 44 | t[..., 2] = mask_2 * (1 - t[..., 2]) 45 | else: 46 | t[..., 0] = - t[..., 0] 47 | t[..., 2] = (1 - t[..., 2]) 48 | else: 49 | if (masked): 50 | mask_0 = (t[..., 0] > 0).type(torch.FloatTensor).to('cuda') 51 | mask_2 = (t[..., 2] > 0).type(torch.FloatTensor).to('cuda') 52 | t[..., 0] = mask_0 * (1 - t[..., 0]) 53 | t[..., 2] = mask_2 * (1 - t[..., 2]) 54 | else: 55 | t[..., 0] = 1 - t[..., 0] 56 | t[..., 2] = 1 - t[..., 2] 57 | 58 | return t 59 | 60 | def rotate_back_(tensor, centered = True, masked=False): 61 | t = tensor 62 | 63 | if (centered): 64 | if (masked): 65 | mask_0 = (t[..., 0] > 0).type(torch.FloatTensor).to('cuda') 66 | mask_2 = (t[..., 2] > 0).type(torch.FloatTensor).to('cuda') 67 | t[..., 0] = mask_0 * (- t[..., 0]) 68 | t[..., 2] = mask_2 * (1 - t[..., 2]) 69 | else: 70 | t[..., 0] = - t[..., 0] 71 | t[..., 2] = (1 - t[..., 2]) 72 | else: 73 | if (masked): 74 | mask_0 = (t[..., 0] > 0).type(torch.FloatTensor).to('cuda') 75 | mask_2 = (t[..., 2] > 0).type(torch.FloatTensor).to('cuda') 76 | t[..., 0] = mask_0 * (1 - t[..., 0]) 77 | t[..., 2] = mask_2 * (1 - t[..., 2]) 78 | else: 79 | t[..., 0] = 1 - t[..., 0] 80 | t[..., 2] = 1 - t[..., 2] 81 | 82 | return t 83 | 84 | def create_image_domain_grid(width, height, data_type=torch.float32): 85 | v_range = ( 86 | torch.arange(0, height) # [0 - h] 87 | .view(1, height, 1) # [1, [0 - h], 1] 88 | .expand(1, height, width) # [1, [0 - h], W] 89 | .type(data_type) # [1, H, W] 90 | ) 91 | u_range = ( 92 | torch.arange(0, width) # [0 - w] 93 | .view(1, 1, width) # [1, 1, [0 - w]] 94 | .expand(1, height, width) # [1, H, [0 - w]] 95 | .type(data_type) # [1, H, W] 96 | ) 97 | ones = ( 98 | torch.ones(1, height, width) # [1, H, W] := 1 99 | .type(data_type) 100 | ) 101 | return torch.stack((u_range, v_range, ones), dim=1) # [1, 3, H, W] 102 | 103 | def project_points_to_uvs(points, intrinsics): 104 | b, _, h, w = points.size() # [B, 3, H, W] 105 | x_coordinate3d = points[:, 0] #TODO: check if adding small value makes sense to avoid zeros? 106 | y_coordinate3d = points[:, 1] 107 | z_coordinate3d = points[:, 2].clamp(min=1e-3) 108 | x_homogeneous = x_coordinate3d / z_coordinate3d 109 | y_homogeneous = y_coordinate3d / z_coordinate3d 110 | ones = z_coordinate3d.new_ones(z_coordinate3d.size()) 111 | homogeneous_coordinates = ( # (x/z, y/z, 1.0) 112 | torch.stack([x_homogeneous, y_homogeneous, ones], dim=1) # [B, 3, H, W] 113 | .reshape(b, 3, -1) # [B, 3, H*W] 114 | ) 115 | uv_coordinates = intrinsics @ homogeneous_coordinates # [B, 3, H*W] 116 | return ( # image domain coordinates 117 | uv_coordinates[:, :2, :] # [B, 2, H*W] 118 | .reshape(b, 2, h, w) # [B, 2, H, W] 119 | ) # [B, 2, H, W] 120 | 121 | 122 | def project_single_point_to_uv(point, intrinsics): 123 | x_coordinate3d = point[0] #TODO: check if adding small value makes sense to avoid zeros? 124 | y_coordinate3d = point[1] 125 | z_coordinate3d = point[2] 126 | x_homogeneous = x_coordinate3d / z_coordinate3d 127 | y_homogeneous = y_coordinate3d / z_coordinate3d 128 | 129 | homogeneous_coordinates = (x_homogeneous, y_homogeneous, 1) 130 | uv_coordinates = intrinsics.numpy() @ homogeneous_coordinates # [B, 3, H*W] 131 | return uv_coordinates[:2] 132 | 133 | def normalize_uvs(uvs): 134 | _, __, h, w = uvs.size() 135 | normalized_u = 2 * uvs[:, 0, :, :] / (w - 1) - 1 136 | normalized_v = 2 * uvs[:, 1, :, :] / (h - 1) - 1 137 | return torch.stack([normalized_u, normalized_v], dim=1)\ 138 | .clamp(min=-1, max=1) #TODO: check clamping or masking /w 2s 139 | 140 | def deproject_depth_to_points(depth, grid, intrinsics_inv): 141 | b, _, h, w = depth.size() 142 | # check https://pytorch.org/docs/stable/torch.html#torch.matmul 143 | # need to return a one-dimensional tensor to use the matrix-vector product 144 | # as a result we reshape to [B, 3, H*W] in order to multiply the intrinsics matrix 145 | # with a 3x1 vector (u, v, 1) 146 | current_pixel_coords = ( # convert grid to appropriate dims for matrix multiplication 147 | grid # [1, 3, H, W] #grid[:,:,:h,:w] 148 | .expand(b, 3, h, w) # [B, 3, H, W] 149 | .reshape(b, 3, -1) # [B, 3, H*W] := [B, 3, UV1] 150 | ) 151 | # return ( # K_inv * [UV1] * depth 152 | # (intrinsics_inv @ current_pixel_coords) # [B, 3, 3] * [B, 3, UV1] 153 | # .reshape(b, 3, h, w) * # [B, 3, H, W] 154 | # depth 155 | # #.unsqueeze(1) # unsqueeze to tri-channel for element wise product 156 | # ) # [B, 3, H, W] 157 | p3d = ( # K_inv * [UV1] * depth 158 | (intrinsics_inv @ current_pixel_coords) # [B, 3, 3] * [B, 3, UV1] 159 | .reshape(b, 3, h, w) * # [B, 3, H, W] 160 | depth 161 | #.unsqueeze(1) # unsqueeze to tri-channel for element wise product 162 | ) # [B, 3, H, W] 163 | #p3d[:, 0, :, :] += 0.055 # magic3 number fixes all ! 164 | # p3d[:, 0, :, :] += 0.05 # magic number fixes all ! 165 | #p3d[:, 0, :, :] += 0.0275 # magic2 number 2 fixes all ! 166 | return p3d -------------------------------------------------------------------------------- /src/modules/lightning/models/__init__.py: -------------------------------------------------------------------------------- 1 | from src.modules.lightning.models.stacked_hourglass import StackedHourglassMod 2 | from src.modules.lightning.models.stacked_hourglass_e2e import StackedHourglassMod_e2e 3 | from src.modules.lightning.models.cmpm import CMPM 4 | from src.modules.lightning.models.cpm import CPM 5 | from src.modules.lightning.models.hrnet_mod import HRNetMod 6 | from src.modules.lightning.models.hrnet_e2e import HRNetMod_e2e 7 | from src.modules.lightning.models.hrnet_ps import HRNetModPS 8 | from src.modules.lightning.models.hopenet import HopeNet 9 | from src.modules.lightning.models.oml_dual import OmlDual 10 | 11 | __all__ = [ 12 | "StackedHourglassMod", 13 | "StackedHourglassMod_e2e", 14 | "CMPM", 15 | "CPM", 16 | "HRNetMod", 17 | "HRNetMod_e2e", 18 | "HopeNet", 19 | "HRNetModPS" 20 | "OmlDual" 21 | ] -------------------------------------------------------------------------------- /src/modules/lightning/models/cmpm.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | import torch 4 | from .dsntnn import ( 5 | flat_softmax, 6 | dsnt 7 | ) 8 | 9 | import typing 10 | 11 | 12 | class Interpolate(nn.Module): 13 | def __init__(self, size, mode): 14 | super(Interpolate, self).__init__() 15 | # size: expected size after interpolation 16 | # mode: interpolation type (e.g. bilinear, nearest) 17 | 18 | self.interp = nn.functional.interpolate 19 | self.size = size 20 | self.mode = mode 21 | 22 | def forward(self, x): 23 | out = self.interp(x, size=self.size, mode=self.mode) #, align_corners=False 24 | 25 | return out 26 | 27 | class CMPM(nn.Module): 28 | def __init__(self, 29 | ): 30 | super(CMPM, self).__init__() 31 | num_markers = 53 32 | num_joints = 19 33 | self.num_stages = 6 34 | self.num_joints = num_joints#configer.get('network', 'heatmap_out') 35 | self.num_markers = num_markers#configer.get('network', 'heatmap_out') 36 | self.out_c = num_markers + num_joints#configer.get('network', 'heatmap_out') 37 | self.pool_center_lower = nn.AvgPool2d(kernel_size=9, stride=8) 38 | self.conv1_stage1 = nn.Conv2d(1, 128, kernel_size=9, padding=4) #change input to one channel 39 | self.pool1_stage1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 40 | self.conv2_stage1 = nn.Conv2d(128, 128, kernel_size=9, padding=4) 41 | self.pool2_stage1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 42 | self.conv3_stage1 = nn.Conv2d(128, 128, kernel_size=9, padding=4) 43 | # self.pool3_stage1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 44 | self.conv4_stage1 = nn.Conv2d(128, 32, kernel_size=5, padding=2) 45 | self.conv5_stage1 = nn.Conv2d(32, 512, kernel_size=9, padding=4) 46 | self.conv6_stage1 = nn.Conv2d(512, 512, kernel_size=1) 47 | self.conv7_stage1 = nn.Conv2d(512, self.num_markers, kernel_size=1) 48 | 49 | self.conv1_stage2 = nn.Conv2d(1, 128, kernel_size=9, padding=4) 50 | self.pool1_stage2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 51 | self.conv2_stage2 = nn.Conv2d(128, 128, kernel_size=9, padding=4) 52 | self.pool2_stage2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 53 | self.conv3_stage2 = nn.Conv2d(128, 128, kernel_size=9, padding=4) 54 | self.pool3_stage2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 55 | self.conv4_stage2 = nn.Conv2d(128, 32, kernel_size=5, padding=2) 56 | 57 | self.Mconv1_stage2 = nn.Conv2d(32 + self.num_markers, 128, kernel_size=11, padding=5) 58 | self.Mconv2_stage2 = nn.Conv2d(128, 128, kernel_size=11, padding=5) 59 | self.Mconv3_stage2 = nn.Conv2d(128, 128, kernel_size=11, padding=5) 60 | self.Mconv4_stage2 = nn.Conv2d(128, 128, kernel_size=1, padding=0) 61 | self.Mconv5_stage2 = nn.Conv2d(128, self.num_markers, kernel_size=1, padding=0) 62 | 63 | self.conv1_stage3 = nn.Conv2d(128, 32, kernel_size=5, padding=2) 64 | 65 | self.Mconv1_stage3 = nn.Conv2d(32 + self.num_markers, 128, kernel_size=11, padding=5) 66 | self.Mconv2_stage3 = nn.Conv2d(128, 128, kernel_size=11, padding=5) 67 | self.Mconv3_stage3 = nn.Conv2d(128, 128, kernel_size=11, padding=5) 68 | self.Mconv4_stage3 = nn.Conv2d(128, 128, kernel_size=1, padding=0) 69 | self.Mconv5_stage3 = nn.Conv2d(128, self.num_markers, kernel_size=1, padding=0) 70 | 71 | self.conv1_stage4 = nn.Conv2d(128, 32, kernel_size=5, padding=2) 72 | 73 | self.Mconv1_stage4 = nn.Conv2d(32 + self.num_markers, 128, kernel_size=11, padding=5) 74 | self.Mconv2_stage4 = nn.Conv2d(128, 128, kernel_size=11, padding=5) 75 | self.Mconv3_stage4 = nn.Conv2d(128, 128, kernel_size=11, padding=5) 76 | self.Mconv4_stage4 = nn.Conv2d(128, 128, kernel_size=1, padding=0) 77 | self.Mconv5_stage4 = nn.Conv2d(128, self.num_joints, kernel_size=1, padding=0) 78 | 79 | self.conv1_stage5 = nn.Conv2d(128, 32, kernel_size=5, padding=2) 80 | 81 | self.Mconv1_stage5 = nn.Conv2d(32 + self.num_joints, 128, kernel_size=11, padding=5) 82 | self.Mconv2_stage5 = nn.Conv2d(128, 128, kernel_size=11, padding=5) 83 | self.Mconv3_stage5 = nn.Conv2d(128, 128, kernel_size=11, padding=5) 84 | self.Mconv4_stage5 = nn.Conv2d(128, 128, kernel_size=1, padding=0) 85 | self.Mconv5_stage5 = nn.Conv2d(128, self.num_joints, kernel_size=1, padding=0) 86 | 87 | self.conv1_stage6 = nn.Conv2d(128, 32, kernel_size=5, padding=2) 88 | 89 | self.Mconv1_stage6 = nn.Conv2d(32 + self.num_joints, 128, kernel_size=11, padding=5) 90 | self.Mconv2_stage6 = nn.Conv2d(128, 128, kernel_size=11, padding=5) 91 | self.Mconv3_stage6 = nn.Conv2d(128, 128, kernel_size=11, padding=5) 92 | self.Mconv4_stage6 = nn.Conv2d(128, 128, kernel_size=1, padding=0) 93 | self.Mconv5_stage6 = nn.Conv2d(128, self.num_joints, kernel_size=1, padding=0) 94 | 95 | # self.upsample_heatmaps_block = Interpolate((136,136), mode = "bicubic") 96 | 97 | 98 | def _stage1(self, image): 99 | """ 100 | Output result of stage 1 101 | :param image: source image with (368, 368) 102 | :return: conv7_stage1_map 103 | """ 104 | x = self.pool1_stage1(F.relu(self.conv1_stage1(image))) 105 | x = self.pool2_stage1(F.relu(self.conv2_stage1(x))) 106 | # x = self.pool3_stage1(F.relu(self.conv3_stage1(x))) 107 | x = F.relu(self.conv4_stage1(x)) 108 | x = F.relu(self.conv5_stage1(x)) 109 | x = F.relu(self.conv6_stage1(x)) 110 | # x = F.sigmoid(self.conv6_stage1(x)) 111 | x = self.conv7_stage1(x) 112 | return x 113 | 114 | def _middle(self, image): 115 | """ 116 | Compute shared pool3_stage_map for the following stage 117 | :param image: source image with (368, 368) 118 | :return: pool3_stage2_map 119 | """ 120 | x = self.pool1_stage2(F.relu(self.conv1_stage2(image))) 121 | x = self.pool2_stage2(F.relu(self.conv2_stage2(x))) 122 | # x = self.pool3_stage2(F.relu(self.conv3_stage2(x))) 123 | 124 | return x 125 | 126 | def _stage2(self, pool3_stage2_map, conv7_stage1_map): 127 | """ 128 | Output result of stage 2 129 | :param pool3_stage2_map 130 | :param conv7_stage1_map 131 | :return: Mconv5_stage2_map 132 | """ 133 | x = F.relu(self.conv4_stage2(pool3_stage2_map)) 134 | x = torch.cat([x, conv7_stage1_map], dim=1) 135 | x = F.relu(self.Mconv1_stage2(x)) 136 | x = F.relu(self.Mconv2_stage2(x)) 137 | x = F.relu(self.Mconv3_stage2(x)) 138 | x = F.relu(self.Mconv4_stage2(x)) 139 | # x = F.sigmoid(self.Mconv4_stage2(x)) 140 | x = self.Mconv5_stage2(x) 141 | 142 | return x 143 | 144 | def _stage3(self, pool3_stage2_map, Mconv5_stage2_map): 145 | """ 146 | Output result of stage 3 147 | :param pool3_stage2_map: 148 | :param Mconv5_stage2_map: 149 | :return: Mconv5_stage3_map 150 | """ 151 | x = F.relu(self.conv1_stage3(pool3_stage2_map)) 152 | x = torch.cat([x, Mconv5_stage2_map], dim=1) 153 | x = F.relu(self.Mconv1_stage3(x)) 154 | x = F.relu(self.Mconv2_stage3(x)) 155 | x = F.relu(self.Mconv3_stage3(x)) 156 | x = F.relu(self.Mconv4_stage3(x)) 157 | # x = F.sigmoid(self.Mconv4_stage3(x)) 158 | x = self.Mconv5_stage3(x) 159 | 160 | return x 161 | 162 | def _stage4(self, pool3_stage2_map, Mconv5_stage3_map): 163 | """ 164 | Output result of stage 4 165 | :param pool3_stage2_map: 166 | :param Mconv5_stage3_map: 167 | :return:Mconv5_stage4_map 168 | """ 169 | x = F.relu(self.conv1_stage4(pool3_stage2_map)) 170 | x = torch.cat([x, Mconv5_stage3_map], dim=1) 171 | x = F.relu(self.Mconv1_stage4(x)) 172 | x = F.relu(self.Mconv2_stage4(x)) 173 | x = F.relu(self.Mconv3_stage4(x)) 174 | x = F.relu(self.Mconv4_stage4(x)) 175 | # x = F.sigmoid(self.Mconv4_stage4(x)) 176 | x = self.Mconv5_stage4(x) 177 | 178 | return x 179 | 180 | def _stage5(self, pool3_stage2_map, Mconv5_stage4_map): 181 | """ 182 | Output result of stage 5 183 | :param pool3_stage2_map: 184 | :param Mconv5_stage4_map: 185 | :return:Mconv5_stage5_map 186 | """ 187 | x = F.relu(self.conv1_stage5(pool3_stage2_map)) 188 | x = torch.cat([x, Mconv5_stage4_map], dim=1) 189 | x = F.relu(self.Mconv1_stage5(x)) 190 | x = F.relu(self.Mconv2_stage5(x)) 191 | x = F.relu(self.Mconv3_stage5(x)) 192 | x = F.relu(self.Mconv4_stage5(x)) 193 | # x = F.sigmoid(self.Mconv4_stage5(x)) 194 | x = self.Mconv5_stage5(x) 195 | 196 | return x 197 | 198 | def _stage6(self, pool3_stage2_map, Mconv5_stage5_map): 199 | """ 200 | Output result of stage 6 201 | :param pool3_stage2_map: 202 | :param Mconv5_stage6_map: 203 | :param pool_center_lower_map: 204 | :return:Mconv5_stage6_map 205 | """ 206 | x = F.relu(self.conv1_stage6(pool3_stage2_map)) 207 | x = torch.cat([x, Mconv5_stage5_map], dim=1) 208 | x = F.relu(self.Mconv1_stage6(x)) 209 | x = F.relu(self.Mconv2_stage6(x)) 210 | x = F.relu(self.Mconv3_stage6(x)) 211 | x = F.relu(self.Mconv4_stage6(x)) 212 | # x = F.sigmoid(self.Mconv4_stage6(x)) 213 | x = self.Mconv5_stage6(x) 214 | 215 | return x 216 | 217 | 218 | def forward(self, 219 | data: torch.Tensor 220 | ) -> typing.Tuple[torch.Tensor, torch.Tensor]: 221 | depth_tensor = data 222 | conv7_stage1_map = self._stage1(depth_tensor) # result of stage 1 223 | pool3_stage2_map = self._middle(depth_tensor) 224 | 225 | Mconv5_stage2_map = self._stage2(pool3_stage2_map, conv7_stage1_map) # result of stage 2 226 | Mconv5_stage3_map = self._stage3(pool3_stage2_map, Mconv5_stage2_map) # result of stage 3 227 | Mconv5_stage4_map = self._stage4(pool3_stage2_map, Mconv5_stage3_map) # result of stage 4 228 | Mconv5_stage5_map = self._stage5(pool3_stage2_map, Mconv5_stage4_map) # result of stage 5 229 | Mconv5_stage6_map = self._stage6(pool3_stage2_map, Mconv5_stage5_map) # result of stage 6 230 | ###################### customization for DSTN 231 | full_unnormalized_heatmaps_markers = conv7_stage1_map + Mconv5_stage2_map + Mconv5_stage3_map 232 | full_unnormalized_heatmaps_joints = Mconv5_stage4_map + Mconv5_stage5_map + Mconv5_stage6_map 233 | 234 | return full_unnormalized_heatmaps_markers, full_unnormalized_heatmaps_joints 235 | -------------------------------------------------------------------------------- /src/modules/lightning/models/cpm.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | import torch 4 | from .dsntnn import ( 5 | flat_softmax, 6 | dsnt 7 | ) 8 | 9 | import typing 10 | 11 | 12 | class Interpolate(nn.Module): 13 | def __init__(self, size, mode): 14 | super(Interpolate, self).__init__() 15 | # size: expected size after interpolation 16 | # mode: interpolation type (e.g. bilinear, nearest) 17 | 18 | self.interp = nn.functional.interpolate 19 | self.size = size 20 | self.mode = mode 21 | 22 | def forward(self, x): 23 | out = self.interp(x, size=self.size, mode=self.mode) #, align_corners=False 24 | 25 | return out 26 | 27 | class CPM(nn.Module): 28 | def __init__(self, 29 | num_markers, 30 | num_joints 31 | ): 32 | super(CPM, self).__init__() 33 | self.total_out = num_markers + num_joints 34 | self.num_stages = 6 35 | self.num_joints = num_joints#configer.get('network', 'heatmap_out') 36 | self.num_markers = num_markers#configer.get('network', 'heatmap_out') 37 | self.out_c = num_markers + num_joints#configer.get('network', 'heatmap_out') 38 | self.pool_center_lower = nn.AvgPool2d(kernel_size=9, stride=8) 39 | self.conv1_stage1 = nn.Conv2d(1, 128, kernel_size=9, padding=4) #change input to one channel 40 | self.pool1_stage1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 41 | self.conv2_stage1 = nn.Conv2d(128, 128, kernel_size=9, padding=4) 42 | self.pool2_stage1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 43 | self.conv3_stage1 = nn.Conv2d(128, 128, kernel_size=9, padding=4) 44 | # self.pool3_stage1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 45 | self.conv4_stage1 = nn.Conv2d(128, 32, kernel_size=5, padding=2) 46 | self.conv5_stage1 = nn.Conv2d(32, 512, kernel_size=9, padding=4) 47 | self.conv6_stage1 = nn.Conv2d(512, 512, kernel_size=1) 48 | self.conv7_stage1 = nn.Conv2d(512, self.total_out, kernel_size=1) 49 | 50 | self.conv1_stage2 = nn.Conv2d(1, 128, kernel_size=9, padding=4) 51 | self.pool1_stage2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 52 | self.conv2_stage2 = nn.Conv2d(128, 128, kernel_size=9, padding=4) 53 | self.pool2_stage2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 54 | self.conv3_stage2 = nn.Conv2d(128, 128, kernel_size=9, padding=4) 55 | self.pool3_stage2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 56 | self.conv4_stage2 = nn.Conv2d(128, 32, kernel_size=5, padding=2) 57 | 58 | self.Mconv1_stage2 = nn.Conv2d(32 + self.total_out, 128, kernel_size=11, padding=5) 59 | self.Mconv2_stage2 = nn.Conv2d(128, 128, kernel_size=11, padding=5) 60 | self.Mconv3_stage2 = nn.Conv2d(128, 128, kernel_size=11, padding=5) 61 | self.Mconv4_stage2 = nn.Conv2d(128, 128, kernel_size=1, padding=0) 62 | self.Mconv5_stage2 = nn.Conv2d(128, self.total_out, kernel_size=1, padding=0) 63 | 64 | self.conv1_stage3 = nn.Conv2d(128, 32, kernel_size=5, padding=2) 65 | 66 | self.Mconv1_stage3 = nn.Conv2d(32 + self.total_out, 128, kernel_size=11, padding=5) 67 | self.Mconv2_stage3 = nn.Conv2d(128, 128, kernel_size=11, padding=5) 68 | self.Mconv3_stage3 = nn.Conv2d(128, 128, kernel_size=11, padding=5) 69 | self.Mconv4_stage3 = nn.Conv2d(128, 128, kernel_size=1, padding=0) 70 | self.Mconv5_stage3 = nn.Conv2d(128, self.total_out, kernel_size=1, padding=0) 71 | 72 | self.conv1_stage4 = nn.Conv2d(128, 32, kernel_size=5, padding=2) 73 | 74 | self.Mconv1_stage4 = nn.Conv2d(32 + self.total_out, 128, kernel_size=11, padding=5) 75 | self.Mconv2_stage4 = nn.Conv2d(128, 128, kernel_size=11, padding=5) 76 | self.Mconv3_stage4 = nn.Conv2d(128, 128, kernel_size=11, padding=5) 77 | self.Mconv4_stage4 = nn.Conv2d(128, 128, kernel_size=1, padding=0) 78 | self.Mconv5_stage4 = nn.Conv2d(128, self.total_out, kernel_size=1, padding=0) 79 | 80 | self.conv1_stage5 = nn.Conv2d(128, 32, kernel_size=5, padding=2) 81 | 82 | self.Mconv1_stage5 = nn.Conv2d(32 + self.total_out, 128, kernel_size=11, padding=5) 83 | self.Mconv2_stage5 = nn.Conv2d(128, 128, kernel_size=11, padding=5) 84 | self.Mconv3_stage5 = nn.Conv2d(128, 128, kernel_size=11, padding=5) 85 | self.Mconv4_stage5 = nn.Conv2d(128, 128, kernel_size=1, padding=0) 86 | self.Mconv5_stage5 = nn.Conv2d(128, self.total_out, kernel_size=1, padding=0) 87 | 88 | self.conv1_stage6 = nn.Conv2d(128, 32, kernel_size=5, padding=2) 89 | 90 | self.Mconv1_stage6 = nn.Conv2d(32 + self.total_out, 128, kernel_size=11, padding=5) 91 | self.Mconv2_stage6 = nn.Conv2d(128, 128, kernel_size=11, padding=5) 92 | self.Mconv3_stage6 = nn.Conv2d(128, 128, kernel_size=11, padding=5) 93 | self.Mconv4_stage6 = nn.Conv2d(128, 128, kernel_size=1, padding=0) 94 | self.Mconv5_stage6 = nn.Conv2d(128, self.total_out, kernel_size=1, padding=0) 95 | 96 | # self.upsample_heatmaps_block = Interpolate((136,136), mode = "bicubic") 97 | 98 | 99 | def _stage1(self, image): 100 | """ 101 | Output result of stage 1 102 | :param image: source image with (368, 368) 103 | :return: conv7_stage1_map 104 | """ 105 | x = self.pool1_stage1(F.relu(self.conv1_stage1(image))) 106 | x = self.pool2_stage1(F.relu(self.conv2_stage1(x))) 107 | # x = self.pool3_stage1(F.relu(self.conv3_stage1(x))) 108 | x = F.relu(self.conv4_stage1(x)) 109 | x = F.relu(self.conv5_stage1(x)) 110 | x = F.relu(self.conv6_stage1(x)) 111 | # x = F.sigmoid(self.conv6_stage1(x)) 112 | x = self.conv7_stage1(x) 113 | return x 114 | 115 | def _middle(self, image): 116 | """ 117 | Compute shared pool3_stage_map for the following stage 118 | :param image: source image with (368, 368) 119 | :return: pool3_stage2_map 120 | """ 121 | x = self.pool1_stage2(F.relu(self.conv1_stage2(image))) 122 | x = self.pool2_stage2(F.relu(self.conv2_stage2(x))) 123 | # x = self.pool3_stage2(F.relu(self.conv3_stage2(x))) 124 | 125 | return x 126 | 127 | def _stage2(self, pool3_stage2_map, conv7_stage1_map): 128 | """ 129 | Output result of stage 2 130 | :param pool3_stage2_map 131 | :param conv7_stage1_map 132 | :return: Mconv5_stage2_map 133 | """ 134 | x = F.relu(self.conv4_stage2(pool3_stage2_map)) 135 | x = torch.cat([x, conv7_stage1_map], dim=1) 136 | x = F.relu(self.Mconv1_stage2(x)) 137 | x = F.relu(self.Mconv2_stage2(x)) 138 | x = F.relu(self.Mconv3_stage2(x)) 139 | x = F.relu(self.Mconv4_stage2(x)) 140 | # x = F.sigmoid(self.Mconv4_stage2(x)) 141 | x = self.Mconv5_stage2(x) 142 | 143 | return x 144 | 145 | def _stage3(self, pool3_stage2_map, Mconv5_stage2_map): 146 | """ 147 | Output result of stage 3 148 | :param pool3_stage2_map: 149 | :param Mconv5_stage2_map: 150 | :return: Mconv5_stage3_map 151 | """ 152 | x = F.relu(self.conv1_stage3(pool3_stage2_map)) 153 | x = torch.cat([x, Mconv5_stage2_map], dim=1) 154 | x = F.relu(self.Mconv1_stage3(x)) 155 | x = F.relu(self.Mconv2_stage3(x)) 156 | x = F.relu(self.Mconv3_stage3(x)) 157 | x = F.relu(self.Mconv4_stage3(x)) 158 | # x = F.sigmoid(self.Mconv4_stage3(x)) 159 | x = self.Mconv5_stage3(x) 160 | 161 | return x 162 | 163 | def _stage4(self, pool3_stage2_map, Mconv5_stage3_map): 164 | """ 165 | Output result of stage 4 166 | :param pool3_stage2_map: 167 | :param Mconv5_stage3_map: 168 | :return:Mconv5_stage4_map 169 | """ 170 | x = F.relu(self.conv1_stage4(pool3_stage2_map)) 171 | x = torch.cat([x, Mconv5_stage3_map], dim=1) 172 | x = F.relu(self.Mconv1_stage4(x)) 173 | x = F.relu(self.Mconv2_stage4(x)) 174 | x = F.relu(self.Mconv3_stage4(x)) 175 | x = F.relu(self.Mconv4_stage4(x)) 176 | # x = F.sigmoid(self.Mconv4_stage4(x)) 177 | x = self.Mconv5_stage4(x) 178 | 179 | return x 180 | 181 | def _stage5(self, pool3_stage2_map, Mconv5_stage4_map): 182 | """ 183 | Output result of stage 5 184 | :param pool3_stage2_map: 185 | :param Mconv5_stage4_map: 186 | :return:Mconv5_stage5_map 187 | """ 188 | x = F.relu(self.conv1_stage5(pool3_stage2_map)) 189 | x = torch.cat([x, Mconv5_stage4_map], dim=1) 190 | x = F.relu(self.Mconv1_stage5(x)) 191 | x = F.relu(self.Mconv2_stage5(x)) 192 | x = F.relu(self.Mconv3_stage5(x)) 193 | x = F.relu(self.Mconv4_stage5(x)) 194 | # x = F.sigmoid(self.Mconv4_stage5(x)) 195 | x = self.Mconv5_stage5(x) 196 | 197 | return x 198 | 199 | def _stage6(self, pool3_stage2_map, Mconv5_stage5_map): 200 | """ 201 | Output result of stage 6 202 | :param pool3_stage2_map: 203 | :param Mconv5_stage6_map: 204 | :param pool_center_lower_map: 205 | :return:Mconv5_stage6_map 206 | """ 207 | x = F.relu(self.conv1_stage6(pool3_stage2_map)) 208 | x = torch.cat([x, Mconv5_stage5_map], dim=1) 209 | x = F.relu(self.Mconv1_stage6(x)) 210 | x = F.relu(self.Mconv2_stage6(x)) 211 | x = F.relu(self.Mconv3_stage6(x)) 212 | x = F.relu(self.Mconv4_stage6(x)) 213 | # x = F.sigmoid(self.Mconv4_stage6(x)) 214 | x = self.Mconv5_stage6(x) 215 | 216 | return x 217 | 218 | 219 | def forward(self, 220 | data: torch.Tensor 221 | ) -> typing.Tuple[torch.Tensor, torch.Tensor]: 222 | depth_tensor = data 223 | conv7_stage1_map = self._stage1(depth_tensor) # result of stage 1 224 | pool3_stage2_map = self._middle(depth_tensor) 225 | 226 | Mconv5_stage2_map = self._stage2(pool3_stage2_map, conv7_stage1_map) # result of stage 2 227 | Mconv5_stage3_map = self._stage3(pool3_stage2_map, Mconv5_stage2_map) # result of stage 3 228 | Mconv5_stage4_map = self._stage4(pool3_stage2_map, Mconv5_stage3_map) # result of stage 4 229 | Mconv5_stage5_map = self._stage5(pool3_stage2_map, Mconv5_stage4_map) # result of stage 5 230 | Mconv5_stage6_map = self._stage6(pool3_stage2_map, Mconv5_stage5_map) # result of stage 6 231 | ###################### customization for DSTN 232 | # full_unnormalized_heatmaps_markers = conv7_stage1_map + Mconv5_stage2_map + Mconv5_stage3_map 233 | full_unnormalized_heatmaps = conv7_stage1_map + Mconv5_stage2_map + Mconv5_stage3_map + Mconv5_stage4_map + Mconv5_stage5_map + Mconv5_stage6_map 234 | 235 | return full_unnormalized_heatmaps 236 | -------------------------------------------------------------------------------- /src/modules/lightning/models/dsntnn.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Aiden Nibali 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ 16 | DSNT (soft-argmax) operations for use in PyTorch computation graphs. 17 | """ 18 | 19 | from functools import reduce 20 | from operator import mul 21 | 22 | import torch 23 | import torch.nn.functional 24 | from torch.nn import functional as F 25 | 26 | 27 | def linear_expectation(probs, values): 28 | assert(len(values) == probs.ndimension() - 2) 29 | expectation = [] 30 | for i in range(2, probs.ndimension()): 31 | # Marginalise probabilities 32 | marg = probs 33 | for j in range(probs.ndimension() - 1, 1, -1): 34 | if i != j: 35 | marg = marg.sum(j, keepdim=False) 36 | # Calculate expectation along axis `i` 37 | expectation.append((marg * values[len(expectation)]).sum(-1, keepdim=False)) 38 | return torch.stack(expectation, -1) 39 | 40 | 41 | def normalized_linspace(length, dtype=None, device=None): 42 | """Generate a vector with values ranging from -1 to 1. 43 | 44 | Note that the values correspond to the "centre" of each cell, so 45 | -1 and 1 are always conceptually outside the bounds of the vector. 46 | For example, if length = 4, the following vector is generated: 47 | 48 | ```text 49 | [ -0.75, -0.25, 0.25, 0.75 ] 50 | ^ ^ ^ 51 | -1 0 1 52 | ``` 53 | 54 | Args: 55 | length: The length of the vector 56 | 57 | Returns: 58 | The generated vector 59 | """ 60 | if isinstance(length, torch.Tensor): 61 | length = length.to(device, dtype) 62 | first = -(length - 1.0) / length 63 | return torch.arange(length, dtype=dtype, device=device) * (2.0 / length) + first 64 | 65 | 66 | def soft_argmax(heatmaps, normalized_coordinates=True): 67 | if normalized_coordinates: 68 | values = [normalized_linspace(d, dtype=heatmaps.dtype, device=heatmaps.device) 69 | for d in heatmaps.size()[2:]] 70 | else: 71 | values = [torch.arange(0, d, dtype=heatmaps.dtype, device=heatmaps.device) 72 | for d in heatmaps.size()[2:]] 73 | coords = linear_expectation(heatmaps, values) 74 | # We flip the tensor like this instead of using `coords.flip(-1)` because aten::flip is not yet 75 | # supported by the ONNX exporter. 76 | coords = torch.cat(tuple(reversed(coords.split(1, -1))), -1) 77 | return coords 78 | 79 | def soft_argmax_3d(heatmaps, depth_dim_scale=3): 80 | assert isinstance(heatmaps, torch.Tensor) 81 | out_divider = 4 82 | num_of_points = heatmaps.size()[1] // depth_dim_scale 83 | out_size = [out_divider * out_divider * depth_dim_scale, heatmaps.size()[2] // out_divider, heatmaps.size()[3] // out_divider] 84 | 85 | heatmaps = heatmaps.reshape((-1, num_of_points, out_divider * out_divider * depth_dim_scale * out_size[1] * out_size[2])) 86 | heatmaps = F.softmax(heatmaps, 2) 87 | heatmaps = heatmaps.reshape((-1, num_of_points, out_size[0], out_size[1], out_size[2])) 88 | 89 | accu_x = heatmaps.sum(dim=(2,3)) 90 | accu_y = heatmaps.sum(dim=(2,4)) 91 | accu_z = heatmaps.sum(dim=(3,4)) 92 | 93 | accu_x = accu_x * torch.cuda.comm.broadcast(torch.arange(1,out_size[2]+1).type(torch.cuda.FloatTensor), devices=[accu_x.device.index])[0] 94 | accu_y = accu_y * torch.cuda.comm.broadcast(torch.arange(1,out_size[1]+1).type(torch.cuda.FloatTensor), devices=[accu_y.device.index])[0] 95 | accu_z = accu_z * torch.cuda.comm.broadcast(torch.arange(1,out_size[0]+1).type(torch.cuda.FloatTensor), devices=[accu_z.device.index])[0] 96 | 97 | accu_x = accu_x.sum(dim=2, keepdim=True) -1 98 | accu_y = accu_y.sum(dim=2, keepdim=True) -1 99 | accu_z = accu_z.sum(dim=2, keepdim=True) -1 100 | 101 | coord_out = torch.cat((accu_x, accu_y, accu_z), dim=2) 102 | 103 | return coord_out, heatmaps 104 | 105 | def dsnt(heatmaps, **kwargs): 106 | """Differentiable spatial to numerical transform. 107 | 108 | Args: 109 | heatmaps (torch.Tensor): Spatial representation of locations 110 | 111 | Returns: 112 | Numerical coordinates corresponding to the locations in the heatmaps. 113 | """ 114 | return soft_argmax(heatmaps, **kwargs) 115 | 116 | 117 | def flat_softmax(inp): 118 | """Compute the softmax with all but the first two tensor dimensions combined.""" 119 | 120 | orig_size = inp.size() 121 | # flat = inp.view(-1, reduce(mul, orig_size[2:])) 122 | flat = inp.reshape(-1, reduce(mul, orig_size[2:])) 123 | flat = torch.nn.functional.softmax(flat, -1) 124 | return flat.view(*orig_size) 125 | 126 | 127 | def euclidean_losses(actual, target): 128 | """Calculate the Euclidean losses for multi-point samples. 129 | 130 | Each sample must contain `n` points, each with `d` dimensions. For example, 131 | in the MPII human pose estimation task n=16 (16 joint locations) and 132 | d=2 (locations are 2D). 133 | 134 | Args: 135 | actual (Tensor): Predictions (B x L x D) 136 | target (Tensor): Ground truth target (B x L x D) 137 | 138 | 139 | Returns: 140 | Tensor: Losses (B x L) 141 | """ 142 | assert actual.size() == target.size(), 'input tensors must have the same size' 143 | return torch.norm(actual - target, p=2, dim=-1, keepdim=False) 144 | 145 | def squared_losses(actual, target): 146 | """Calculate the Euclidean losses for multi-point samples. 147 | 148 | Each sample must contain `n` points, each with `d` dimensions. For example, 149 | in the MPII human pose estimation task n=16 (16 joint locations) and 150 | d=2 (locations are 2D). 151 | 152 | Args: 153 | actual (Tensor): Predictions (B x L x D) 154 | target (Tensor): Ground truth target (B x L x D) 155 | 156 | 157 | Returns: 158 | Tensor: Losses (B x L) 159 | """ 160 | assert actual.size() == target.size(), 'input tensors must have the same size' 161 | e = torch.norm(actual - target, p=2, dim=-1, keepdim=False) 162 | return e * e 163 | 164 | 165 | def l1_losses(actual, target): 166 | """Calculate the average L1 losses for multi-point samples. 167 | 168 | Args: 169 | actual (Tensor): Predictions (B x L x D) 170 | target (Tensor): Ground truth target (B x L x D) 171 | 172 | Returns: 173 | Tensor: Losses (B x L) 174 | """ 175 | assert actual.size() == target.size(), 'input tensors must have the same size' 176 | return torch.nn.functional.l1_loss(actual, target, reduction='none').mean(-1) 177 | 178 | 179 | def mse_losses(actual, target): 180 | """Calculate the average squared L2 losses for multi-point samples. 181 | 182 | Args: 183 | actual (Tensor): Predictions (B x L x D) 184 | target (Tensor): Ground truth target (B x L x D) 185 | 186 | Returns: 187 | Tensor: Losses (B x L) 188 | """ 189 | assert actual.size() == target.size(), 'input tensors must have the same size' 190 | return torch.nn.functional.mse_loss(actual, target, reduction='none').mean(-1) 191 | 192 | 193 | def make_gauss(means, size, sigma, normalize=True): 194 | """Draw Gaussians. 195 | 196 | This function is differential with respect to means. 197 | 198 | Note on ordering: `size` expects [..., depth, height, width], whereas 199 | `means` expects x, y, z, ... 200 | 201 | Args: 202 | means: coordinates containing the Gaussian means (units: normalized coordinates) 203 | size: size of the generated images (units: pixels) 204 | sigma: standard deviation of the Gaussian (units: pixels) 205 | normalize: when set to True, the returned Gaussians will be normalized 206 | """ 207 | 208 | dim_range = range(-1, -(len(size) + 1), -1) 209 | coords_list = [normalized_linspace(s, dtype=means.dtype, device=means.device) 210 | for s in reversed(size)] 211 | 212 | # PDF = exp(-(x - \mu)^2 / (2 \sigma^2)) 213 | 214 | # dists <- (x - \mu)^2 215 | dists = [(x - mean) ** 2 for x, mean in zip(coords_list, means.split(1, -1))] 216 | 217 | # ks <- -1 / (2 \sigma^2) 218 | stddevs = [2 * sigma / s for s in reversed(size)] 219 | ks = [-0.5 * (1 / stddev) ** 2 for stddev in stddevs] 220 | 221 | exps = [(dist * k).exp() for k, dist in zip(ks, dists)] 222 | 223 | # Combine dimensions of the Gaussian 224 | gauss = reduce(mul, [ 225 | reduce(lambda t, d: t.unsqueeze(d), filter(lambda d: d != dim, dim_range), dist) 226 | for dim, dist in zip(dim_range, exps) 227 | ]) 228 | 229 | if not normalize: 230 | return gauss 231 | 232 | # Normalize the Gaussians 233 | val_sum = reduce(lambda t, dim: t.sum(dim, keepdim=True), dim_range, gauss) + 1e-24 234 | return gauss / val_sum 235 | 236 | 237 | def average_loss(losses, mask=None): 238 | """Calculate the average of per-location losses. 239 | 240 | Args: 241 | losses (Tensor): Predictions (B x L) 242 | mask (Tensor, optional): Mask of points to include in the loss calculation 243 | (B x L), defaults to including everything 244 | """ 245 | 246 | if mask is not None: 247 | assert mask.size() == losses.size(), 'mask must be the same size as losses' 248 | losses = losses * mask 249 | denom = mask.sum() 250 | else: 251 | denom = losses.numel() 252 | 253 | # Prevent division by zero 254 | if isinstance(denom, int): 255 | denom = max(denom, 1) 256 | else: 257 | denom = denom.clamp(1) 258 | 259 | return losses.sum() / denom 260 | 261 | 262 | def _kl(p, q, ndims): 263 | eps = 1e-24 264 | unsummed_kl = p * ((p + eps).log() - (q + eps).log()) 265 | kl_values = reduce(lambda t, _: t.sum(-1, keepdim=False), range(ndims), unsummed_kl) 266 | return kl_values 267 | 268 | 269 | def _js(p, q, ndims): 270 | m = 0.5 * (p + q) 271 | return 0.5 * _kl(p, m, ndims) + 0.5 * _kl(q, m, ndims) 272 | 273 | 274 | def _divergence_reg_losses(heatmaps, mu_t, sigma_t, divergence): 275 | ndims = mu_t.size(-1) 276 | assert heatmaps.dim() == ndims + 2, 'expected heatmaps to be a {}D tensor'.format(ndims + 2) 277 | assert heatmaps.size()[:-ndims] == mu_t.size()[:-1] 278 | 279 | gauss = make_gauss(mu_t, heatmaps.size()[2:], sigma_t) 280 | divergences = divergence(heatmaps, gauss, ndims) 281 | return divergences, gauss 282 | 283 | 284 | def kl_reg_losses(heatmaps, mu_t, sigma_t): 285 | """Calculate Kullback-Leibler divergences between heatmaps and target Gaussians. 286 | 287 | Args: 288 | heatmaps (torch.Tensor): Heatmaps generated by the model 289 | mu_t (torch.Tensor): Centers of the target Gaussians (in normalized units) 290 | sigma_t (float): Standard deviation of the target Gaussians (in pixels) 291 | 292 | Returns: 293 | Per-location KL divergences. 294 | """ 295 | 296 | return _divergence_reg_losses(heatmaps, mu_t, sigma_t, _kl) 297 | 298 | 299 | def js_reg_losses(heatmaps, mu_t, sigma_t): 300 | """Calculate Jensen-Shannon divergences between heatmaps and target Gaussians. 301 | 302 | Args: 303 | heatmaps (torch.Tensor): Heatmaps generated by the model 304 | mu_t (torch.Tensor): Centers of the target Gaussians (in normalized units) 305 | sigma_t (float): Standard deviation of the target Gaussians (in pixels) 306 | 307 | Returns: 308 | Per-location JS divergences. 309 | """ 310 | 311 | return _divergence_reg_losses(heatmaps, mu_t, sigma_t, _js) 312 | 313 | 314 | def variance_reg_losses(heatmaps, sigma_t): 315 | """Calculate the loss between heatmap variances and target variance. 316 | 317 | Note that this is slightly different from the version used in the 318 | DSNT paper. This version uses pixel units for variance, which 319 | produces losses that are larger by a constant factor. 320 | 321 | Args: 322 | heatmaps (torch.Tensor): Heatmaps generated by the model 323 | sigma_t (float): Target standard deviation (in pixels) 324 | 325 | Returns: 326 | Per-location sum of square errors for variance. 327 | """ 328 | 329 | # mu = E[X] 330 | values = [normalized_linspace(d, dtype=heatmaps.dtype, device=heatmaps.device) 331 | for d in heatmaps.size()[2:]] 332 | mu = linear_expectation(heatmaps, values) 333 | # var = E[(X - mu)^2] 334 | values = [(a - b.squeeze(0)) ** 2 for a, b in zip(values, mu.split(1, -1))] 335 | var = linear_expectation(heatmaps, values) 336 | 337 | 338 | heatmap_size = torch.tensor(list(heatmaps.size()[2:]), dtype=var.dtype, device=var.device) 339 | actual_variance = var * (heatmap_size / 2) ** 2 340 | target_variance = sigma_t ** 2 341 | sq_error = (actual_variance - target_variance) ** 2 342 | 343 | return sq_error.sum(-1, keepdim=False) 344 | 345 | 346 | def normalized_to_pixel_coordinates(coords, size): 347 | """Convert from normalized coordinates to pixel coordinates. 348 | 349 | Args: 350 | coords: Coordinate tensor, where elements in the last dimension are ordered as (x, y, ...). 351 | size: Number of pixels in each spatial dimension, ordered as (..., height, width). 352 | 353 | Returns: 354 | `coords` in pixel coordinates. 355 | """ 356 | if torch.is_tensor(coords): 357 | size = coords.new_tensor(size).flip(-1) 358 | return 0.5 * ((coords + 1) * size - 1) 359 | 360 | 361 | def pixel_to_normalized_coordinates(coords, size): 362 | """Convert from pixel coordinates to normalized coordinates. 363 | 364 | Args: 365 | coords: Coordinate tensor, where elements in the last dimension are ordered as (x, y, ...). 366 | size: Number of pixels in each spatial dimension, ordered as (..., height, width). 367 | 368 | Returns: 369 | `coords` in normalized coordinates. 370 | """ 371 | if torch.is_tensor(coords): 372 | size = coords.new_tensor(size).flip(-1) 373 | return ((2 * coords + 1) / size) - 1 374 | -------------------------------------------------------------------------------- /src/modules/lightning/models/graphunet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.nn.parameter import Parameter 5 | import numpy as np 6 | 7 | class GraphConv(nn.Module): 8 | 9 | def __init__(self, in_features, out_features, activation=nn.ReLU(inplace=True)): 10 | super(GraphConv, self).__init__() 11 | self.fc = nn.Linear(in_features=in_features, out_features=out_features) 12 | #self.adj_sq = adj_sq 13 | self.activation = activation 14 | #self.scale_identity = scale_identity 15 | #self.I = Parameter(torch.eye(number_of_nodes, requires_grad=False).unsqueeze(0)) 16 | 17 | 18 | def laplacian(self, A_hat): 19 | D_hat = (torch.sum(A_hat, 0) + 1e-5) ** (-0.5) 20 | L = D_hat * A_hat * D_hat 21 | return L 22 | 23 | 24 | def laplacian_batch(self, A_hat): 25 | #batch, N = A.shape[:2] 26 | #if self.adj_sq: 27 | # A = torch.bmm(A, A) # use A^2 to increase graph connectivity 28 | #I = torch.eye(N).unsqueeze(0).to(device) 29 | #I = self.I 30 | #if self.scale_identity: 31 | # I = 2 * I # increase weight of self connections 32 | #A_hat = A + I 33 | batch, N = A_hat.shape[:2] 34 | D_hat = (torch.sum(A_hat, 1) + 1e-5) ** (-0.5) 35 | L = D_hat.view(batch, N, 1) * A_hat * D_hat.view(batch, 1, N) 36 | return L 37 | 38 | 39 | def forward(self, X, A): 40 | batch = X.size(0) 41 | #A = self.laplacian(A) 42 | A_hat = A.unsqueeze(0).repeat(batch, 1, 1) 43 | #X = self.fc(torch.bmm(A_hat, X)) 44 | X = self.fc(torch.bmm(self.laplacian_batch(A_hat), X)) 45 | if self.activation is not None: 46 | X = self.activation(X) 47 | return X 48 | 49 | 50 | class GraphPool(nn.Module): 51 | 52 | def __init__(self, in_nodes, out_nodes): 53 | super(GraphPool, self).__init__() 54 | self.fc = nn.Linear(in_features=in_nodes, out_features=out_nodes) 55 | 56 | 57 | def forward(self, X): 58 | X = X.transpose(1, 2) 59 | X = self.fc(X) 60 | X = X.transpose(1, 2) 61 | return X 62 | 63 | 64 | class GraphUnpool(nn.Module): 65 | 66 | def __init__(self, in_nodes, out_nodes): 67 | super(GraphUnpool, self).__init__() 68 | self.fc = nn.Linear(in_features=in_nodes, out_features=out_nodes) 69 | 70 | 71 | def forward(self, X): 72 | X = X.transpose(1, 2) 73 | X = self.fc(X) 74 | X = X.transpose(1, 2) 75 | return X 76 | 77 | 78 | class GraphUNet(nn.Module): 79 | 80 | def __init__(self, in_features=2, out_features=3, initial_points=19): 81 | super(GraphUNet, self).__init__() 82 | 83 | a0 = initial_points 84 | a1 = (a0 + 1) // 2 85 | a2 = (a1 + 1) // 2 86 | a3 = (a2 + 1) // 2 87 | a4 = (a3 + 1) // 2 88 | a5 = (a4 + 1) // 2 89 | 90 | self.A_0 = Parameter(torch.eye(a0).float().cuda(), requires_grad=True) 91 | self.A_1 = Parameter(torch.eye(a1).float().cuda(), requires_grad=True) 92 | self.A_2 = Parameter(torch.eye(a2).float().cuda(), requires_grad=True) 93 | self.A_3 = Parameter(torch.eye(a3).float().cuda(), requires_grad=True) 94 | self.A_4 = Parameter(torch.eye(a4).float().cuda(), requires_grad=True) 95 | self.A_5 = Parameter(torch.eye(a5).float().cuda(), requires_grad=True) 96 | 97 | self.gconv1 = GraphConv(in_features, 4) # 29 = 21 H + 8 O 98 | self.pool1 = GraphPool(a0, a1) 99 | 100 | self.gconv2 = GraphConv(4, 8) # 15 = 11 H + 4 O 101 | self.pool2 = GraphPool(a1, a2) 102 | 103 | self.gconv3 = GraphConv(8, 16) # 7 = 5 H + 2 O 104 | self.pool3 = GraphPool(a2, a3) 105 | 106 | self.gconv4 = GraphConv(16, 32) # 4 = 3 H + 1 O 107 | self.pool4 = GraphPool(a3, a4) 108 | 109 | self.gconv5 = GraphConv(32, 64) # 2 = 1 H + 1 O 110 | self.pool5 = GraphPool(a4, a5) 111 | 112 | self.fc1 = nn.Linear(64, 20) 113 | 114 | self.fc2 = nn.Linear(20, 64) 115 | 116 | self.unpool6 = GraphUnpool(a5, a4) 117 | self.gconv6 = GraphConv(128, 32) 118 | 119 | self.unpool7 = GraphUnpool(a4, a3) 120 | self.gconv7 = GraphConv(64, 16) 121 | 122 | self.unpool8 = GraphUnpool(a3, a2) 123 | self.gconv8 = GraphConv(32, 8) 124 | 125 | self.unpool9 = GraphUnpool(a2, a1) 126 | self.gconv9 = GraphConv(16, 4) 127 | 128 | self.unpool10 = GraphUnpool(a1, a0) 129 | self.gconv10 = GraphConv(8, out_features, activation=None) 130 | 131 | self.ReLU = nn.ReLU() 132 | 133 | def _get_decoder_input(self, X_e, X_d): 134 | return torch.cat((X_e, X_d), 2) 135 | 136 | def forward(self, X): 137 | X_0 = self.gconv1(X, self.A_0) 138 | X_1 = self.pool1(X_0) 139 | 140 | X_1 = self.gconv2(X_1, self.A_1) 141 | X_2 = self.pool2(X_1) 142 | 143 | X_2 = self.gconv3(X_2, self.A_2) 144 | X_3 = self.pool3(X_2) 145 | 146 | X_3 = self.gconv4(X_3, self.A_3) 147 | X_4 = self.pool4(X_3) 148 | 149 | X_4 = self.gconv5(X_4, self.A_4) 150 | X_5 = self.pool5(X_4) 151 | 152 | global_features = self.ReLU(self.fc1(X_5)) 153 | global_features = self.ReLU(self.fc2(global_features)) 154 | 155 | X_6 = self.unpool6(global_features) 156 | X_6 = self.gconv6(self._get_decoder_input(X_4, X_6), self.A_4) 157 | 158 | X_7 = self.unpool7(X_6) 159 | X_7 = self.gconv7(self._get_decoder_input(X_3, X_7), self.A_3) 160 | 161 | X_8 = self.unpool8(X_7) 162 | X_8 = self.gconv8(self._get_decoder_input(X_2, X_8), self.A_2) 163 | 164 | X_9 = self.unpool9(X_8) 165 | X_9 = self.gconv9(self._get_decoder_input(X_1, X_9), self.A_1) 166 | 167 | X_10 = self.unpool10(X_9) 168 | X_10 = self.gconv10(self._get_decoder_input(X_0, X_10), self.A_0) 169 | 170 | return X_10 171 | 172 | 173 | class GraphNet(nn.Module): 174 | 175 | def __init__(self, in_features=2, out_features=2, initial_points=19): 176 | super(GraphNet, self).__init__() 177 | 178 | self.A_hat = Parameter(torch.eye(initial_points).float().cuda(), requires_grad=True) 179 | 180 | self.gconv1 = GraphConv(in_features, 128) 181 | self.gconv2 = GraphConv(128, 16) 182 | self.gconv3 = GraphConv(16, out_features, activation=None) 183 | 184 | 185 | def forward(self, X): 186 | X_0 = self.gconv1(X, self.A_hat) 187 | X_1 = self.gconv2(X_0, self.A_hat) 188 | X_2 = self.gconv3(X_1, self.A_hat) 189 | 190 | return X_2 191 | -------------------------------------------------------------------------------- /src/modules/lightning/models/hopenet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import torch.nn as nn 4 | from .resnet import ( 5 | resnet50, 6 | resnet10 7 | ) 8 | 9 | from .graphunet import ( 10 | GraphNet, 11 | GraphUNet 12 | ) 13 | 14 | import typing 15 | 16 | class HopeNet(nn.Module): 17 | def __init__(self): 18 | super(HopeNet, self).__init__() 19 | self.resnet = resnet50(pretrained=False, num_classes=19*2) 20 | self.graphnet = GraphNet(in_features=2050, out_features=2) 21 | self.graphunet = GraphUNet(in_features=2, out_features=3) 22 | 23 | def forward(self, 24 | data : torch.Tensor 25 | ) -> typing.Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: 26 | points2D_init, features = self.resnet(data) 27 | features = features.unsqueeze(1).repeat(1, 19, 1) 28 | # batch = points2D.shape[0] 29 | in_features = torch.cat([points2D_init, features], dim=2) 30 | points2D = self.graphnet(in_features) 31 | points3D = self.graphunet(points2D) 32 | return points2D_init, points2D, points3D 33 | -------------------------------------------------------------------------------- /src/modules/lightning/models/hrnet_e2e.py: -------------------------------------------------------------------------------- 1 | from moai.utils.arguments import ensure_string_list 2 | 3 | import moai.networks.lightning as minet 4 | import moai.nn.convolution as mic 5 | import moai.nn.residual as mires 6 | import moai.nn.sampling.spatial.downsample as mids 7 | import moai.modules.lightning as mimod 8 | import moai.nn.utils as miu 9 | 10 | import torch 11 | 12 | import hydra.utils as hyu 13 | import omegaconf.omegaconf as omegaconf 14 | import typing 15 | import logging 16 | 17 | log = logging.getLogger(__name__) 18 | 19 | #NOTE: from https://github.com/HRNet/HRNet-Bottom-Up-Pose-Estimation/blob/master/lib/models/pose_hrnet.py 20 | #NOTE: from https://arxiv.org/pdf/1908.07919.pdf 21 | 22 | __all__ = ["HRNetMod_e2e"] 23 | 24 | class HRNetMod_e2e(torch.nn.Module): 25 | def __init__(self, 26 | configuration: omegaconf.DictConfig, 27 | modules: omegaconf.DictConfig 28 | ): 29 | super(HRNetMod_e2e, self).__init__( 30 | # data=data, parameters=parameters, 31 | # feedforward=feedforward, monads=monads, 32 | # supervision=supervision, validation=validation, 33 | # export=export, visualization=visualization, 34 | ) 35 | preproc = configuration.preproc 36 | residual = configuration.residual 37 | #NOTE: preproc = stem + layer1 38 | preproc_convs = [] 39 | prev_features = configuration.in_features 40 | self.out = configuration.out_features 41 | if not preproc == None: 42 | stem = preproc.stem 43 | for b, c, a, f, k, s, p in zip( 44 | stem.blocks, stem.convolutions, 45 | stem.activations, stem.features, 46 | stem.kernel_sizes, stem.strides, stem.paddings): 47 | preproc_convs.append(mic.make_conv_block( 48 | block_type=b, 49 | convolution_type=c, 50 | in_features=prev_features, 51 | out_features=f, 52 | activation_type=a, 53 | convolution_params={ 54 | "kernel_size": k, 55 | "stride": s, 56 | "padding": p, 57 | }, 58 | )) 59 | prev_features = f 60 | residual_blocks = [] 61 | for i, o, b in zip( 62 | residual.features.in_features, residual.features.out_features, 63 | residual.features.bottleneck_features, 64 | ): 65 | residual_blocks.append(mires.make_residual_block( 66 | block_type=residual.block, 67 | convolution_type=residual.convolution, 68 | out_features=o, 69 | in_features=i, 70 | bottleneck_features=b, 71 | activation_type=residual.activation, 72 | strided=False, 73 | )) 74 | self.pre = torch.nn.Sequential( 75 | *preproc_convs, *residual_blocks, 76 | ) 77 | 78 | start_transition_key = 'start_transition_standard_1' 79 | highres_key = 'highres_standard_1' 80 | stage_transition_key = 'stage_transition_standard_1' 81 | head_key = 'top_branch_1' 82 | else: 83 | start_transition_key = 'start_transition_standard_2' 84 | highres_key = 'highres_standard_2' 85 | stage_transition_key = 'stage_transition_standard_2' 86 | head_key = 'top_branch_2' 87 | 88 | branches_config = configuration.branches 89 | start_trans_config = modules[start_transition_key] 90 | self.start_trans = hyu.instantiate(start_trans_config, 91 | in_features=residual.features.out_features[-1], 92 | start_features=branches_config.start_features 93 | ) 94 | #NOTE: stages 95 | highres_module = modules[highres_key] # NOTE: outputs list of # branches outputs 96 | self.stages = torch.nn.ModuleList([ 97 | torch.nn.Sequential(*[ 98 | hyu.instantiate(highres_module, 99 | branches=i, depth=d, start_features=branches_config.start_features 100 | ) for _, d in zip(range(modules), depths) 101 | ]) for i, modules, depths in zip( 102 | range(2, configuration.stages + 1), 103 | branches_config.modules, 104 | branches_config.depths, 105 | ) 106 | ]) 107 | stage_trans_config = modules[stage_transition_key] 108 | self.stage_transitions = torch.nn.ModuleList([ 109 | hyu.instantiate(stage_trans_config, branches=i + 1, 110 | prev_branch_features=branches_config.start_features * (2 ** i), 111 | ) for i in range(1, configuration.stages - 1) 112 | ]) 113 | head_module = modules[head_key] 114 | self.head = hyu.instantiate(head_module, 115 | stages=configuration.stages, 116 | start_features=branches_config.start_features, 117 | out_features=configuration.out_features, 118 | ) 119 | # self.input = ensure_string_list(configuration.input) 120 | # self.output = ensure_string_list(configuration.output) 121 | self.output_prefix = configuration.output 122 | 123 | def forward(self, 124 | data: torch.Tensor 125 | ) -> typing.Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: 126 | x = data 127 | if hasattr(self, 'pre'): 128 | x = self.pre(x) 129 | hr_inputs = self.start_trans(x) 130 | combined_hm_preds = [] 131 | combined_hm_preds.append(hr_inputs) 132 | for stage, trans in zip(self.stages, self.stage_transitions): 133 | features = stage(hr_inputs) 134 | combined_hm_preds.append(features) 135 | hr_inputs = trans(features) 136 | 137 | combined_hm_preds.append(self.stages[-1](hr_inputs)) 138 | combined_hm_preds_final = [] 139 | for i, features in enumerate(combined_hm_preds): 140 | combined_hm_preds_final.append(self.head(features)) 141 | 142 | aggregated_hm = torch.zeros_like(combined_hm_preds_final[0]) 143 | for i, heatmap in enumerate(combined_hm_preds_final): 144 | aggregated_hm += heatmap 145 | 146 | 147 | return aggregated_hm[:, :53, ...], aggregated_hm[:, 53:, ...], torch.cat([x, aggregated_hm], dim=1) 148 | -------------------------------------------------------------------------------- /src/modules/lightning/models/hrnet_mod.py: -------------------------------------------------------------------------------- 1 | from moai.utils.arguments import ensure_string_list 2 | 3 | import moai.networks.lightning as minet 4 | import moai.nn.convolution as mic 5 | import moai.nn.residual as mires 6 | import moai.nn.sampling.spatial.downsample as mids 7 | import moai.modules.lightning as mimod 8 | import moai.nn.utils as miu 9 | 10 | import torch 11 | 12 | import hydra.utils as hyu 13 | import omegaconf.omegaconf as omegaconf 14 | import typing 15 | import logging 16 | 17 | log = logging.getLogger(__name__) 18 | 19 | #NOTE: from https://github.com/HRNet/HRNet-Bottom-Up-Pose-Estimation/blob/master/lib/models/pose_hrnet.py 20 | #NOTE: from https://arxiv.org/pdf/1908.07919.pdf 21 | 22 | __all__ = ["HRNetMod"] 23 | 24 | class HRNetMod(torch.nn.Module): 25 | def __init__(self, 26 | configuration: omegaconf.DictConfig, 27 | modules: omegaconf.DictConfig 28 | ): 29 | super(HRNetMod, self).__init__( 30 | # data=data, parameters=parameters, 31 | # feedforward=feedforward, monads=monads, 32 | # supervision=supervision, validation=validation, 33 | # export=export, visualization=visualization, 34 | ) 35 | preproc = configuration.preproc 36 | residual = configuration.residual 37 | #NOTE: preproc = stem + layer1 38 | preproc_convs = [] 39 | prev_features = configuration.in_features 40 | 41 | if not preproc == None: 42 | stem = preproc.stem 43 | for b, c, a, f, k, s, p in zip( 44 | stem.blocks, stem.convolutions, 45 | stem.activations, stem.features, 46 | stem.kernel_sizes, stem.strides, stem.paddings): 47 | preproc_convs.append(mic.make_conv_block( 48 | block_type=b, 49 | convolution_type=c, 50 | in_features=prev_features, 51 | out_features=f, 52 | activation_type=a, 53 | convolution_params={ 54 | "kernel_size": k, 55 | "stride": s, 56 | "padding": p, 57 | }, 58 | )) 59 | prev_features = f 60 | residual_blocks = [] 61 | for i, o, b in zip( 62 | residual.features.in_features, residual.features.out_features, 63 | residual.features.bottleneck_features, 64 | ): 65 | residual_blocks.append(mires.make_residual_block( 66 | block_type=residual.block, 67 | convolution_type=residual.convolution, 68 | out_features=o, 69 | in_features=i, 70 | bottleneck_features=b, 71 | activation_type=residual.activation, 72 | strided=False, 73 | )) 74 | self.pre = torch.nn.Sequential( 75 | *preproc_convs, *residual_blocks, 76 | ) 77 | 78 | start_transition_key = 'start_transition_standard_1' 79 | highres_key = 'highres_standard_1' 80 | stage_transition_key = 'stage_transition_standard_1' 81 | head_key = 'top_branch_1' 82 | else: 83 | start_transition_key = 'start_transition_standard_2' 84 | highres_key = 'highres_standard_2' 85 | stage_transition_key = 'stage_transition_standard_2' 86 | head_key = 'top_branch_2' 87 | 88 | branches_config = configuration.branches 89 | start_trans_config = modules[start_transition_key] 90 | self.start_trans = hyu.instantiate(start_trans_config, 91 | in_features=residual.features.out_features[-1], 92 | start_features=branches_config.start_features 93 | ) 94 | #NOTE: stages 95 | highres_module = modules[highres_key] # NOTE: outputs list of # branches outputs 96 | self.stages = torch.nn.ModuleList([ 97 | torch.nn.Sequential(*[ 98 | hyu.instantiate(highres_module, 99 | branches=i, depth=d, start_features=branches_config.start_features 100 | ) for _, d in zip(range(modules), depths) 101 | ]) for i, modules, depths in zip( 102 | range(2, configuration.stages + 1), 103 | branches_config.modules, 104 | branches_config.depths, 105 | ) 106 | ]) 107 | stage_trans_config = modules[stage_transition_key] 108 | self.stage_transitions = torch.nn.ModuleList([ 109 | hyu.instantiate(stage_trans_config, branches=i + 1, 110 | prev_branch_features=branches_config.start_features * (2 ** i), 111 | ) for i in range(1, configuration.stages - 1) 112 | ]) 113 | head_module = modules[head_key] 114 | self.head = hyu.instantiate(head_module, 115 | stages=configuration.stages, 116 | start_features=branches_config.start_features, 117 | out_features=configuration.out_features, 118 | ) 119 | # self.input = ensure_string_list(configuration.input) 120 | # self.output = ensure_string_list(configuration.output) 121 | self.output_prefix = configuration.output 122 | 123 | def forward(self, 124 | data: torch.Tensor 125 | ) -> typing.Tuple[torch.Tensor, torch.Tensor]: 126 | x = data 127 | if hasattr(self, 'pre'): 128 | x = self.pre(x) 129 | hr_inputs = self.start_trans(x) 130 | combined_hm_preds = [] 131 | combined_hm_preds.append(hr_inputs) 132 | for stage, trans in zip(self.stages, self.stage_transitions): 133 | features = stage(hr_inputs) 134 | combined_hm_preds.append(features) 135 | hr_inputs = trans(features) 136 | 137 | combined_hm_preds.append(self.stages[-1](hr_inputs)) 138 | combined_hm_preds_final = [] 139 | for i, features in enumerate(combined_hm_preds): 140 | combined_hm_preds_final.append(self.head(features)) 141 | 142 | aggregated_hm = torch.zeros_like(combined_hm_preds_final[0]) 143 | for i, heatmap in enumerate(combined_hm_preds_final): 144 | aggregated_hm += heatmap 145 | return aggregated_hm, torch.cat([x, aggregated_hm], dim=1) -------------------------------------------------------------------------------- /src/modules/lightning/models/hrnet_ps.py: -------------------------------------------------------------------------------- 1 | from moai.utils.arguments import ensure_string_list 2 | 3 | import moai.networks.lightning as minet 4 | import moai.nn.convolution as mic 5 | import moai.nn.residual as mires 6 | import moai.nn.sampling.spatial.downsample as mids 7 | import moai.modules.lightning as mimod 8 | import moai.nn.utils as miu 9 | 10 | import torch 11 | 12 | import hydra.utils as hyu 13 | import omegaconf.omegaconf as omegaconf 14 | import typing 15 | import logging 16 | 17 | log = logging.getLogger(__name__) 18 | 19 | #NOTE: from https://github.com/HRNet/HRNet-Bottom-Up-Pose-Estimation/blob/master/lib/models/pose_hrnet.py 20 | #NOTE: from https://arxiv.org/pdf/1908.07919.pdf 21 | 22 | __all__ = ["HRNetModPS"] 23 | 24 | class HRNetModPS(torch.nn.Module): 25 | def __init__(self, 26 | configuration: omegaconf.DictConfig, 27 | modules: omegaconf.DictConfig 28 | ): 29 | super(HRNetModPS, self).__init__( 30 | # data=data, parameters=parameters, 31 | # feedforward=feedforward, monads=monads, 32 | # supervision=supervision, validation=validation, 33 | # export=export, visualization=visualization, 34 | ) 35 | preproc = configuration.preproc 36 | residual = configuration.residual 37 | #NOTE: preproc = stem + layer1 38 | preproc_convs = [] 39 | prev_features = configuration.in_features 40 | 41 | if not preproc == None: 42 | stem = preproc.stem 43 | for b, c, a, f, k, s, p in zip( 44 | stem.blocks, stem.convolutions, 45 | stem.activations, stem.features, 46 | stem.kernel_sizes, stem.strides, stem.paddings): 47 | preproc_convs.append(mic.make_conv_block( 48 | block_type=b, 49 | convolution_type=c, 50 | in_features=prev_features, 51 | out_features=f, 52 | activation_type=a, 53 | convolution_params={ 54 | "kernel_size": k, 55 | "stride": s, 56 | "padding": p, 57 | }, 58 | )) 59 | prev_features = f 60 | residual_blocks = [] 61 | for i, o, b in zip( 62 | residual.features.in_features, residual.features.out_features, 63 | residual.features.bottleneck_features, 64 | ): 65 | residual_blocks.append(mires.make_residual_block( 66 | block_type=residual.block, 67 | convolution_type=residual.convolution, 68 | out_features=o, 69 | in_features=i, 70 | bottleneck_features=b, 71 | activation_type=residual.activation, 72 | strided=False, 73 | )) 74 | self.pre = torch.nn.Sequential( 75 | *preproc_convs, *residual_blocks, 76 | ) 77 | 78 | start_transition_key = 'start_transition_standard_1' 79 | highres_key = 'highres_standard_1' 80 | stage_transition_key = 'stage_transition_standard_1' 81 | head_key = 'top_branch_1' 82 | else: 83 | start_transition_key = 'start_transition_standard_2' 84 | highres_key = 'highres_standard_2' 85 | stage_transition_key = 'stage_transition_standard_2' 86 | head_key = 'top_branch_2' 87 | 88 | branches_config = configuration.branches 89 | start_trans_config = modules[start_transition_key] 90 | self.start_trans = hyu.instantiate(start_trans_config, 91 | in_features=residual.features.out_features[-1], 92 | start_features=branches_config.start_features 93 | ) 94 | #NOTE: stages 95 | highres_module = modules[highres_key] # NOTE: outputs list of # branches outputs 96 | self.stages = torch.nn.ModuleList([ 97 | torch.nn.Sequential(*[ 98 | hyu.instantiate(highres_module, 99 | branches=i, depth=d, start_features=branches_config.start_features 100 | ) for _, d in zip(range(modules), depths) 101 | ]) for i, modules, depths in zip( 102 | range(2, configuration.stages + 1), 103 | branches_config.modules, 104 | branches_config.depths, 105 | ) 106 | ]) 107 | stage_trans_config = modules[stage_transition_key] 108 | self.stage_transitions = torch.nn.ModuleList([ 109 | hyu.instantiate(stage_trans_config, branches=i + 1, 110 | prev_branch_features=branches_config.start_features * (2 ** i), 111 | ) for i in range(1, configuration.stages - 1) 112 | ]) 113 | head_module = modules[head_key] 114 | self.head = hyu.instantiate(head_module, 115 | stages=configuration.stages, 116 | start_features=branches_config.start_features, 117 | out_features=configuration.out_features, 118 | ) 119 | # self.input = ensure_string_list(configuration.input) 120 | # self.output = ensure_string_list(configuration.output) 121 | self.output_prefix = configuration.output 122 | 123 | def forward(self, 124 | data: torch.Tensor 125 | ) -> typing.Tuple[torch.Tensor, torch.Tensor]: 126 | x = data 127 | if hasattr(self, 'pre'): 128 | x = self.pre(x) 129 | hr_inputs = self.start_trans(x) 130 | for stage, trans in zip(self.stages, self.stage_transitions): 131 | features = stage(hr_inputs) 132 | hr_inputs = trans(features) 133 | prediction = self.head(self.stages[-1](hr_inputs)) 134 | aggregated_hm = prediction 135 | 136 | return aggregated_hm, torch.cat([x, aggregated_hm], dim=1) -------------------------------------------------------------------------------- /src/modules/lightning/models/oml_dual.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.modules as nnm 4 | 5 | import torch.nn.functional as F 6 | 7 | import typing 8 | 9 | """ typical 2D convolution, WxHxC => WxHxC """ 10 | def conv(in_channels, out_channels, filter, pad, dil, n_type): 11 | if (n_type == 'elu'): 12 | return nn.Sequential( 13 | nn.Conv2d(in_channels, out_channels, filter, stride=1, padding=(pad*dil), dilation=dil, bias=False), 14 | nn.ELU(inplace=False) 15 | ) 16 | else: 17 | return nn.Sequential( 18 | nn.Conv2d(in_channels, out_channels, filter, stride=1, padding=(pad*dil), dilation=dil, bias=False), 19 | nn.BatchNorm2d(out_channels), 20 | nn.ReLU(inplace=False) 21 | ) 22 | 23 | 24 | class OmlDual(nn.Module): 25 | def __init__(self, 26 | num_markers, 27 | num_joints 28 | ): 29 | super(OmlDual, self).__init__() 30 | """ 31 | Args: 32 | width: input width 33 | height: input height 34 | ndf: constant number from channels 35 | dil: dilation value - parameter for convolutional layers 36 | norma_type: normalization type (elu | batch norm) 37 | """ 38 | self.h = 136 39 | self.w = 136 40 | self.dil = 1 41 | self.type = "batch_norm" 42 | self.markers_out = num_markers 43 | self.joints_out = num_joints 44 | self.out = self.markers_out + self.joints_out 45 | 46 | 47 | # ATTENTION: this is hardcoded due to the SoA model 48 | ndf = 64 49 | """ dmc_neural_network """ 50 | self.conv1 = conv(1, ndf, 3, 0, dil=self.dil, n_type=self.type) 51 | self.conv2 = conv(ndf, ndf, 3, 0, dil=self.dil, n_type=self.type) 52 | self.pool3 = nn.MaxPool2d(2, 2, 0, self.dil, False, False) 53 | self.conv4 = conv(ndf, 2 * ndf, 3, 0, dil=self.dil, n_type=self.type) 54 | self.conv5 = conv(2 * ndf, 2 * ndf, 3, 0, dil=self.dil, n_type=self.type) 55 | self.conv6 = conv(2 * ndf, 2 * ndf, 3, 0, dil=self.dil, n_type=self.type) 56 | self.pool7 = nn.MaxPool2d(2, 2, 0, self.dil, False, False) 57 | 58 | 59 | f1d = int((((self.w-2 * 2 * self.dil) / 2 - 3 * 2 * self.dil) \ 60 | / 2 * ((self.w-2 * 2 * self.dil) / 2 - 3 * 2 * self.dil) / 2 ) * ndf * 2) 61 | 62 | #f1d = 4608 63 | self.fc_1 = nn.Linear(f1d, 2048) 64 | self.fc_2 = nn.Linear(2048, 3 * self.out) 65 | 66 | def forward(self, 67 | data: torch.Tensor 68 | ) -> typing.Tuple[torch.Tensor, torch.Tensor]: 69 | out = self.conv1(data) 70 | out = self.conv2(out) 71 | out = self.pool3(out) 72 | out = self.conv4(out) 73 | out = self.conv5(out) 74 | out = self.conv6(out) 75 | out_viz = self.pool7(out) 76 | 77 | out = torch.reshape(out_viz, (out_viz.size()[0], out_viz.size()[1] * out_viz.size()[2] * out_viz.size()[3])) 78 | 79 | out = self.fc_1(out) 80 | out = nn.functional.relu(out) 81 | out_c = self.fc_2(out) 82 | out = out_c.view(-1, self.out, 3) 83 | return out[:, :self.markers_out], out[:, self.markers_out:, ...] 84 | -------------------------------------------------------------------------------- /src/modules/lightning/models/resnet.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains the definitions of the various ResNet models. 3 | Code adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py. 4 | Forward pass was modified to discard the last fully connected layer 5 | """ 6 | import torch 7 | import torch.nn as nn 8 | import torch.utils.model_zoo as model_zoo 9 | 10 | model_urls = { 11 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 12 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 13 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 14 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 15 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 16 | } 17 | 18 | 19 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): 20 | """3x3 convolution with padding""" 21 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 22 | padding=dilation, groups=groups, bias=False, dilation=dilation) 23 | 24 | 25 | def conv1x1(in_planes, out_planes, stride=1): 26 | """1x1 convolution""" 27 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 28 | 29 | 30 | class BasicBlock(nn.Module): 31 | expansion = 1 32 | __constants__ = ['downsample'] 33 | 34 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 35 | base_width=64, dilation=1, norm_layer=None): 36 | super(BasicBlock, self).__init__() 37 | if norm_layer is None: 38 | norm_layer = nn.BatchNorm2d 39 | if groups != 1 or base_width != 64: 40 | raise ValueError('BasicBlock only supports groups=1 and base_width=64') 41 | if dilation > 1: 42 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock") 43 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1 44 | self.conv1 = conv3x3(inplanes, planes, stride) 45 | self.bn1 = norm_layer(planes) 46 | self.relu = nn.ReLU(inplace=True) 47 | self.conv2 = conv3x3(planes, planes) 48 | self.bn2 = norm_layer(planes) 49 | self.downsample = downsample 50 | self.stride = stride 51 | 52 | def forward(self, x): 53 | identity = x 54 | 55 | out = self.conv1(x) 56 | out = self.bn1(out) 57 | out = self.relu(out) 58 | 59 | out = self.conv2(out) 60 | out = self.bn2(out) 61 | 62 | if self.downsample is not None: 63 | identity = self.downsample(x) 64 | 65 | out += identity 66 | out = self.relu(out) 67 | 68 | return out 69 | 70 | 71 | class Bottleneck(nn.Module): 72 | expansion = 4 73 | __constants__ = ['downsample'] 74 | 75 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 76 | base_width=64, dilation=1, norm_layer=None): 77 | super(Bottleneck, self).__init__() 78 | if norm_layer is None: 79 | norm_layer = nn.BatchNorm2d 80 | width = int(planes * (base_width / 64.)) * groups 81 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1 82 | self.conv1 = conv1x1(inplanes, width) 83 | self.bn1 = norm_layer(width) 84 | self.conv2 = conv3x3(width, width, stride, groups, dilation) 85 | self.bn2 = norm_layer(width) 86 | self.conv3 = conv1x1(width, planes * self.expansion) 87 | self.bn3 = norm_layer(planes * self.expansion) 88 | self.relu = nn.ReLU(inplace=True) 89 | self.downsample = downsample 90 | self.stride = stride 91 | 92 | def forward(self, x): 93 | identity = x 94 | 95 | out = self.conv1(x) 96 | out = self.bn1(out) 97 | out = self.relu(out) 98 | 99 | out = self.conv2(out) 100 | out = self.bn2(out) 101 | out = self.relu(out) 102 | 103 | out = self.conv3(out) 104 | out = self.bn3(out) 105 | 106 | if self.downsample is not None: 107 | identity = self.downsample(x) 108 | 109 | out += identity 110 | out = self.relu(out) 111 | 112 | return out 113 | 114 | 115 | class ResNet(nn.Module): 116 | 117 | def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, 118 | groups=1, width_per_group=64, replace_stride_with_dilation=None, 119 | norm_layer=None): 120 | super(ResNet, self).__init__() 121 | if norm_layer is None: 122 | norm_layer = nn.BatchNorm2d 123 | self._norm_layer = norm_layer 124 | 125 | self.inplanes = 64 126 | self.dilation = 1 127 | if replace_stride_with_dilation is None: 128 | # each element in the tuple indicates if we should replace 129 | # the 2x2 stride with a dilated convolution instead 130 | replace_stride_with_dilation = [False, False, False] 131 | if len(replace_stride_with_dilation) != 3: 132 | raise ValueError("replace_stride_with_dilation should be None " 133 | "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) 134 | self.groups = groups 135 | self.base_width = width_per_group 136 | self.conv1 = nn.Conv2d(1, self.inplanes, kernel_size=7, stride=2, padding=3, 137 | bias=False) 138 | self.bn1 = norm_layer(self.inplanes) 139 | self.relu = nn.ReLU(inplace=True) 140 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 141 | self.layer1 = self._make_layer(block, 64, layers[0]) 142 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, 143 | dilate=replace_stride_with_dilation[0]) 144 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, 145 | dilate=replace_stride_with_dilation[1]) 146 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, 147 | dilate=replace_stride_with_dilation[2]) 148 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 149 | self.fc = nn.Linear(512 * block.expansion, num_classes) 150 | 151 | for m in self.modules(): 152 | if isinstance(m, nn.Conv2d): 153 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 154 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 155 | nn.init.constant_(m.weight, 1) 156 | nn.init.constant_(m.bias, 0) 157 | 158 | # Zero-initialize the last BN in each residual branch, 159 | # so that the residual branch starts with zeros, and each residual block behaves like an identity. 160 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 161 | if zero_init_residual: 162 | for m in self.modules(): 163 | if isinstance(m, Bottleneck): 164 | nn.init.constant_(m.bn3.weight, 0) 165 | elif isinstance(m, BasicBlock): 166 | nn.init.constant_(m.bn2.weight, 0) 167 | 168 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False): 169 | norm_layer = self._norm_layer 170 | downsample = None 171 | previous_dilation = self.dilation 172 | if dilate: 173 | self.dilation *= stride 174 | stride = 1 175 | if stride != 1 or self.inplanes != planes * block.expansion: 176 | downsample = nn.Sequential( 177 | conv1x1(self.inplanes, planes * block.expansion, stride), 178 | norm_layer(planes * block.expansion), 179 | ) 180 | 181 | layers = [] 182 | layers.append(block(self.inplanes, planes, stride, downsample, self.groups, 183 | self.base_width, previous_dilation, norm_layer)) 184 | self.inplanes = planes * block.expansion 185 | for _ in range(1, blocks): 186 | layers.append(block(self.inplanes, planes, groups=self.groups, 187 | base_width=self.base_width, dilation=self.dilation, 188 | norm_layer=norm_layer)) 189 | 190 | return nn.Sequential(*layers) 191 | 192 | def forward(self, x): 193 | x = self.conv1(x) 194 | x = self.bn1(x) 195 | x = self.relu(x) 196 | x = self.maxpool(x) 197 | 198 | x = self.layer1(x) 199 | x = self.layer2(x) 200 | x = self.layer3(x) 201 | x = self.layer4(x) 202 | 203 | x = self.avgpool(x) 204 | x = torch.flatten(x, 1) 205 | out = x 206 | x = self.fc(x) 207 | 208 | return x.view(-1, 19, 2), out 209 | 210 | def resnet10(pretrained=False, num_classes=1000, **kwargs): 211 | """Constructs a ResNet-10 model. 212 | Args: 213 | pretrained (bool): If True, returns a model pre-trained on ImageNet 214 | """ 215 | model = ResNet(BasicBlock, [1, 1, 1, 1], num_classes=1000, **kwargs) 216 | # if pretrained: 217 | # model.load_state_dict(model_zoo.load_url(model_urls['resnet10'])) 218 | num_ftrs = model.fc.in_features 219 | model.fc = nn.Linear(num_ftrs, num_classes) 220 | return model 221 | 222 | def resnet18(pretrained=False, num_classes=1000, **kwargs): 223 | """Constructs a ResNet-18 model. 224 | Args: 225 | pretrained (bool): If True, returns a model pre-trained on ImageNet 226 | """ 227 | model = ResNet(BasicBlock, [2, 2, 2, 2], num_classes=1000, **kwargs) 228 | if pretrained: 229 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 230 | num_ftrs = model.fc.in_features 231 | model.fc = nn.Linear(num_ftrs, num_classes) 232 | return model 233 | 234 | def resnet50(pretrained=False, num_classes=1000, **kwargs): 235 | """Constructs a ResNet-50 model. 236 | Args: 237 | pretrained (bool): If True, returns a model pre-trained on ImageNet 238 | """ 239 | model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=1000, **kwargs) 240 | if pretrained: 241 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 242 | num_ftrs = model.fc.in_features 243 | model.fc = nn.Linear(num_ftrs, num_classes) 244 | return model 245 | 246 | def resnet101(pretrained=False, num_classes=1000, **kwargs): 247 | """Constructs a ResNet-101 model. 248 | Args: 249 | pretrained (bool): If True, returns a model pre-trained on ImageNet 250 | """ 251 | model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes=1000, **kwargs) 252 | if pretrained: 253 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 254 | num_ftrs = model.fc.in_features 255 | model.fc = nn.Linear(num_ftrs, num_classes) 256 | return model 257 | 258 | 259 | def resnet152(pretrained=False, num_classes=1000, **kwargs): 260 | """Constructs a ResNet-152 model. 261 | Args: 262 | pretrained (bool): If True, returns a model pre-trained on ImageNet 263 | """ 264 | model = ResNet(Bottleneck, [3, 8, 36, 3], num_classes=1000, **kwargs) 265 | if pretrained: 266 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) 267 | num_ftrs = model.fc.in_features 268 | model.fc = nn.Linear(num_ftrs, num_classes) 269 | return model 270 | 271 | -------------------------------------------------------------------------------- /src/modules/lightning/models/stacked_hourglass.py: -------------------------------------------------------------------------------- 1 | import moai.networks.lightning as minet 2 | import moai.nn.convolution as mic 3 | import moai.nn.residual as mires 4 | import moai.nn.sampling.spatial.downsample as mids 5 | import moai.modules.lightning as mimod 6 | import moai.nn.utils as miu 7 | 8 | import torch 9 | 10 | import hydra.utils as hyu 11 | import omegaconf.omegaconf as omegaconf 12 | import typing 13 | import logging 14 | 15 | log = logging.getLogger(__name__) 16 | 17 | #NOTE: from https://github.com/anibali/pytorch-stacked-hourglass/blob/master/src/stacked_hourglass/model.py 18 | #NOTE: from https://github.com/princeton-vl/pytorch_stacked_hourglass/blob/master/models/posenet.py 19 | 20 | __all__ = ["StackedHourglassMod"] 21 | 22 | class StackedHourglassMod(torch.nn.Module): 23 | def __init__(self, 24 | configuration: omegaconf.DictConfig, 25 | modules: omegaconf.DictConfig, 26 | # data: omegaconf.DictConfig=None, 27 | # parameters: omegaconf.DictConfig=None, 28 | # feedforward: omegaconf.DictConfig=None, 29 | # monads: omegaconf.DictConfig=None, 30 | # supervision: omegaconf.DictConfig=None, 31 | # validation: omegaconf.DictConfig=None, 32 | # visualization: omegaconf.DictConfig=None, 33 | # export: omegaconf.DictConfig=None, 34 | ): 35 | super(StackedHourglassMod, self).__init__( 36 | # data=data, parameters=parameters, 37 | # feedforward=feedforward, monads=monads, 38 | # supervision=supervision, validation=validation, 39 | # export=export, visualization=visualization, 40 | ) 41 | self.stacks = configuration.stacks 42 | preproc = configuration.preproc 43 | projection = configuration.projection 44 | prediction = configuration.prediction 45 | merge = configuration.merge 46 | hourglass = list(modules.values())[0] 47 | if not preproc == None: 48 | self.pre = torch.nn.Sequential( 49 | mic.make_conv_block( 50 | block_type=preproc.block, 51 | convolution_type=preproc.convolution, 52 | in_features=configuration.in_features, 53 | out_features=hourglass.features // 4, 54 | activation_type=preproc.activation, 55 | convolution_params={ 56 | "kernel_size": preproc.stem.kernel_size, 57 | "stride": preproc.stem.stride, 58 | "padding": preproc.stem.padding, 59 | }, 60 | ), 61 | mires.make_residual_block( 62 | block_type=preproc.residual, 63 | convolution_type=preproc.convolution, 64 | in_features=hourglass.features // 4, 65 | out_features=hourglass.features // 2, 66 | bottleneck_features=hourglass.features // 2, 67 | activation_type=preproc.activation, 68 | strided=False, 69 | ), 70 | mids.make_downsample( 71 | downscale_type=preproc.downscale, 72 | features=hourglass.features // 2, 73 | kernel_size=3 if preproc.downscale == 'maxpool2d_aa' else 2, 74 | ), 75 | mires.make_residual_block( 76 | block_type=preproc.residual, 77 | convolution_type=preproc.convolution, 78 | in_features=hourglass.features // 2, 79 | out_features=hourglass.features // 2, 80 | bottleneck_features=hourglass.features // 2, 81 | activation_type=preproc.activation, 82 | strided=False, 83 | ), 84 | mires.make_residual_block( 85 | block_type=preproc.residual, 86 | convolution_type=preproc.convolution, 87 | in_features=hourglass.features // 2, 88 | out_features=hourglass.features, 89 | bottleneck_features=hourglass.features, 90 | activation_type=preproc.activation, 91 | strided=False, 92 | ), 93 | ) 94 | 95 | self.hgs = torch.nn.ModuleList([ 96 | torch.nn.Sequential( 97 | hyu.instantiate(hourglass) 98 | ) for i in range(self.stacks) 99 | ] 100 | ) 101 | 102 | if not preproc == None: 103 | self.features = torch.nn.ModuleList([ 104 | torch.nn.Sequential( 105 | mires.make_residual_block( 106 | block_type=preproc.residual, 107 | convolution_type=preproc.convolution, 108 | in_features=hourglass.features, 109 | out_features=hourglass.features, 110 | bottleneck_features=hourglass.features, 111 | activation_type=preproc.activation, 112 | strided=False, 113 | ), 114 | mic.make_conv_block( 115 | block_type=projection.block, 116 | convolution_type=projection.convolution, 117 | in_features=hourglass.features, 118 | out_features=hourglass.features, 119 | activation_type=projection.activation, 120 | convolution_params={"kernel_size": 1, "padding": 0}, 121 | ) 122 | ) for i in range(self.stacks) 123 | ] 124 | ) 125 | else: 126 | self.features = torch.nn.ModuleList([ 127 | torch.nn.Sequential( 128 | mic.make_conv_block( 129 | block_type=projection.block, 130 | convolution_type=projection.convolution, 131 | in_features=hourglass.features, 132 | out_features=hourglass.features, 133 | activation_type=projection.activation, 134 | convolution_params={"kernel_size": 1, "padding": 0}, 135 | ) 136 | ) for i in range(self.stacks) 137 | ] 138 | ) 139 | 140 | self.outs = torch.nn.ModuleList([ 141 | mic.make_conv_block( 142 | block_type=prediction.block, 143 | convolution_type=prediction.convolution, 144 | in_features=hourglass.features, 145 | out_features=configuration.out_features, 146 | activation_type=prediction.activation, 147 | convolution_params={ 148 | "kernel_size": 1, "padding": 0, 149 | }, 150 | activation_params={"inplace": True} 151 | ) for i in range(self.stacks) 152 | ]) 153 | self.merge_features = torch.nn.ModuleList([ 154 | torch.nn.Sequential( 155 | mic.make_conv_1x1( 156 | convolution_type=projection.convolution, 157 | in_channels=hourglass.features, 158 | out_channels=hourglass.features 159 | ), 160 | torch.nn.Dropout2d(p=merge.dropout, inplace=True)\ 161 | if merge.dropout > 0.0 else torch.nn.Identity() 162 | ) for i in range(self.stacks-1) 163 | ]) 164 | self.merge_preds = torch.nn.ModuleList([ 165 | torch.nn.Sequential( 166 | mic.make_conv_1x1( 167 | convolution_type=projection.convolution, 168 | in_channels=configuration.out_features, 169 | out_channels=hourglass.features 170 | ), 171 | torch.nn.Dropout2d(p=prediction.dropout, inplace=False)\ 172 | if prediction.dropout > 0.0 else torch.nn.Identity() 173 | ) for i in range(self.stacks-1) 174 | ]) 175 | # self.input = configuration.input #NOTE: cannot be as required in cascase model 176 | self.output_prefix = configuration.output 177 | 178 | def forward(self, 179 | data: torch.Tensor 180 | ) -> typing.Tuple[torch.Tensor, torch.Tensor]: 181 | x = data 182 | if hasattr(self, 'pre'): 183 | x = self.pre(x) 184 | combined_hm_preds = [] 185 | for i in range(self.stacks): 186 | hg = self.hgs[i](x) 187 | feature = self.features[i](hg) 188 | preds = self.outs[i](feature) 189 | combined_hm_preds.append(preds) 190 | if i < self.stacks - 1: 191 | x = x + self.merge_preds[i](preds) + self.merge_features[i](feature) 192 | 193 | aggregated_hm = torch.zeros_like(combined_hm_preds[0]) 194 | for i, heatmap in enumerate(combined_hm_preds): 195 | aggregated_hm += heatmap 196 | return aggregated_hm, torch.cat([x, aggregated_hm], dim=1) 197 | 198 | -------------------------------------------------------------------------------- /src/modules/lightning/models/stacked_hourglass_e2e.py: -------------------------------------------------------------------------------- 1 | import moai.networks.lightning as minet 2 | import moai.nn.convolution as mic 3 | import moai.nn.residual as mires 4 | import moai.nn.sampling.spatial.downsample as mids 5 | import moai.modules.lightning as mimod 6 | import moai.nn.utils as miu 7 | 8 | import torch 9 | 10 | import hydra.utils as hyu 11 | import omegaconf.omegaconf as omegaconf 12 | import typing 13 | import logging 14 | 15 | log = logging.getLogger(__name__) 16 | 17 | #NOTE: from https://github.com/anibali/pytorch-stacked-hourglass/blob/master/src/stacked_hourglass/model.py 18 | #NOTE: from https://github.com/princeton-vl/pytorch_stacked_hourglass/blob/master/models/posenet.py 19 | 20 | __all__ = ["StackedHourglassMod_e2e"] 21 | 22 | class StackedHourglassMod_e2e(torch.nn.Module): 23 | def __init__(self, 24 | configuration: omegaconf.DictConfig, 25 | modules: omegaconf.DictConfig, 26 | # data: omegaconf.DictConfig=None, 27 | # parameters: omegaconf.DictConfig=None, 28 | # feedforward: omegaconf.DictConfig=None, 29 | # monads: omegaconf.DictConfig=None, 30 | # supervision: omegaconf.DictConfig=None, 31 | # validation: omegaconf.DictConfig=None, 32 | # visualization: omegaconf.DictConfig=None, 33 | # export: omegaconf.DictConfig=None, 34 | ): 35 | super(StackedHourglassMod_e2e, self).__init__( 36 | # data=data, parameters=parameters, 37 | # feedforward=feedforward, monads=monads, 38 | # supervision=supervision, validation=validation, 39 | # export=export, visualization=visualization, 40 | ) 41 | self.stacks = configuration.stacks 42 | preproc = configuration.preproc 43 | projection = configuration.projection 44 | prediction = configuration.prediction 45 | merge = configuration.merge 46 | hourglass = list(modules.values())[0] 47 | self.out = configuration.out_features 48 | if not preproc == None: 49 | self.pre = torch.nn.Sequential( 50 | mic.make_conv_block( 51 | block_type=preproc.block, 52 | convolution_type=preproc.convolution, 53 | in_features=configuration.in_features, 54 | out_features=hourglass.features // 4, 55 | activation_type=preproc.activation, 56 | convolution_params={ 57 | "kernel_size": preproc.stem.kernel_size, 58 | "stride": preproc.stem.stride, 59 | "padding": preproc.stem.padding, 60 | }, 61 | ), 62 | mires.make_residual_block( 63 | block_type=preproc.residual, 64 | convolution_type=preproc.convolution, 65 | in_features=hourglass.features // 4, 66 | out_features=hourglass.features // 2, 67 | bottleneck_features=hourglass.features // 2, 68 | activation_type=preproc.activation, 69 | strided=False, 70 | ), 71 | mids.make_downsample( 72 | downscale_type=preproc.downscale, 73 | features=hourglass.features // 2, 74 | kernel_size=3 if preproc.downscale == 'maxpool2d_aa' else 2, 75 | ), 76 | mires.make_residual_block( 77 | block_type=preproc.residual, 78 | convolution_type=preproc.convolution, 79 | in_features=hourglass.features // 2, 80 | out_features=hourglass.features // 2, 81 | bottleneck_features=hourglass.features // 2, 82 | activation_type=preproc.activation, 83 | strided=False, 84 | ), 85 | mires.make_residual_block( 86 | block_type=preproc.residual, 87 | convolution_type=preproc.convolution, 88 | in_features=hourglass.features // 2, 89 | out_features=hourglass.features, 90 | bottleneck_features=hourglass.features, 91 | activation_type=preproc.activation, 92 | strided=False, 93 | ), 94 | ) 95 | 96 | self.hgs = torch.nn.ModuleList([ 97 | torch.nn.Sequential( 98 | hyu.instantiate(hourglass) 99 | ) for i in range(self.stacks) 100 | ] 101 | ) 102 | 103 | if not preproc == None: 104 | self.features = torch.nn.ModuleList([ 105 | torch.nn.Sequential( 106 | mires.make_residual_block( 107 | block_type=preproc.residual, 108 | convolution_type=preproc.convolution, 109 | in_features=hourglass.features, 110 | out_features=hourglass.features, 111 | bottleneck_features=hourglass.features, 112 | activation_type=preproc.activation, 113 | strided=False, 114 | ), 115 | mic.make_conv_block( 116 | block_type=projection.block, 117 | convolution_type=projection.convolution, 118 | in_features=hourglass.features, 119 | out_features=hourglass.features, 120 | activation_type=projection.activation, 121 | convolution_params={"kernel_size": 1, "padding": 0}, 122 | ) 123 | ) for i in range(self.stacks) 124 | ] 125 | ) 126 | else: 127 | self.features = torch.nn.ModuleList([ 128 | torch.nn.Sequential( 129 | mic.make_conv_block( 130 | block_type=projection.block, 131 | convolution_type=projection.convolution, 132 | in_features=hourglass.features, 133 | out_features=hourglass.features, 134 | activation_type=projection.activation, 135 | convolution_params={"kernel_size": 1, "padding": 0}, 136 | ) 137 | ) for i in range(self.stacks) 138 | ] 139 | ) 140 | 141 | self.outs = torch.nn.ModuleList([ 142 | mic.make_conv_block( 143 | block_type=prediction.block, 144 | convolution_type=prediction.convolution, 145 | in_features=hourglass.features, 146 | out_features=configuration.out_features, 147 | activation_type=prediction.activation, 148 | convolution_params={ 149 | "kernel_size": 1, "padding": 0, 150 | }, 151 | activation_params={"inplace": True} 152 | ) for i in range(self.stacks) 153 | ]) 154 | self.merge_features = torch.nn.ModuleList([ 155 | torch.nn.Sequential( 156 | mic.make_conv_1x1( 157 | convolution_type=projection.convolution, 158 | in_channels=hourglass.features, 159 | out_channels=hourglass.features 160 | ), 161 | torch.nn.Dropout2d(p=merge.dropout, inplace=True)\ 162 | if merge.dropout > 0.0 else torch.nn.Identity() 163 | ) for i in range(self.stacks-1) 164 | ]) 165 | self.merge_preds = torch.nn.ModuleList([ 166 | torch.nn.Sequential( 167 | mic.make_conv_1x1( 168 | convolution_type=projection.convolution, 169 | in_channels=configuration.out_features, 170 | out_channels=hourglass.features 171 | ), 172 | torch.nn.Dropout2d(p=prediction.dropout, inplace=False)\ 173 | if prediction.dropout > 0.0 else torch.nn.Identity() 174 | ) for i in range(self.stacks-1) 175 | ]) 176 | # self.input = configuration.input #NOTE: cannot be as required in cascase model 177 | self.output_prefix = configuration.output 178 | 179 | def forward(self, 180 | data: torch.Tensor 181 | ) -> typing.Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: 182 | x = data 183 | if hasattr(self, 'pre'): 184 | x = self.pre(x) 185 | combined_hm_preds = [] 186 | for i in range(self.stacks): 187 | hg = self.hgs[i](x) 188 | feature = self.features[i](hg) 189 | preds = self.outs[i](feature) 190 | combined_hm_preds.append(preds) 191 | if i < self.stacks - 1: 192 | x = x + self.merge_preds[i](preds) + self.merge_features[i](feature) 193 | 194 | aggregated_hm = torch.zeros_like(combined_hm_preds[0]) 195 | for i, heatmap in enumerate(combined_hm_preds): 196 | aggregated_hm += heatmap 197 | 198 | return aggregated_hm[:, :19, ...], aggregated_hm[:, 19:, ...], torch.cat([x, aggregated_hm], dim=1) 199 | 200 | # return aggregated_hm[:, :53, ...], aggregated_hm[:, 53:, ...] 201 | 202 | -------------------------------------------------------------------------------- /src/monads/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tofis/democap/bc7f8cd27163085f78e164ac464df2336f0c6ad9/src/monads/__init__.py -------------------------------------------------------------------------------- /src/monads/distribution/zmean.py: -------------------------------------------------------------------------------- 1 | from moai.monads.utils import spatial_dim_list 2 | 3 | import torch 4 | 5 | __all__ = ["zMean"] 6 | 7 | class zMean(torch.nn.Module): 8 | def __init__(self, 9 | ): 10 | super(zMean, self).__init__() 11 | 12 | def forward(self, 13 | heatmaps: torch.Tensor, # spatial probability tensor of K keypoints 14 | ) -> torch.Tensor: 15 | hm_dims = spatial_dim_list(heatmaps) 16 | return torch.mean(heatmaps, dim=tuple(hm_dims)).unsqueeze(2) 17 | # return torch.amax(heatmaps, dim=tuple(hm_dims)).unsqueeze(2) 18 | # return torch.sum(heatmaps, dim=tuple(hm_dims)).unsqueeze(2) 19 | -------------------------------------------------------------------------------- /src/monads/keypoints/fuse_coords.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import typing 3 | 4 | __all__ = [ 5 | "FuseCoords", 6 | ] 7 | 8 | class FuseCoords(torch.nn.Module): 9 | def __init__(self, 10 | mode: str="two" # two, four 11 | ): 12 | super(FuseCoords,self).__init__() 13 | self.mode = mode 14 | 15 | def rotate_back_from_back(self, coords): 16 | coords[..., 0] = 1.0 - coords[..., 0] 17 | coords[..., 2] = 1.0 - coords[..., 2] 18 | return coords 19 | 20 | def rotate_left_from_right(self, coords): 21 | rot = torch.tensor([[ 22 | [0.0, 0.0, 1.0], 23 | [0.0, 1.0, 0.0], 24 | [-1.0, 0.0, 0.0], 25 | ]]).to(coords.device).float() 26 | xformed_t = rot @ coords.permute(0, 2, 1) 27 | xformed_t += torch.tensor([0.0, 0.0, 1.0]).to(coords.device).expand(1, xformed_t.size()[2], xformed_t.size()[1]).permute(0, 2, 1) 28 | return xformed_t.permute(0, 2, 1) 29 | 30 | def rotate_right_from_left(self, coords): 31 | rot = torch.tensor([[ 32 | [0.0, 0.0, -1.0], 33 | [0.0, 1.0, 0.0], 34 | [1.0, 0.0, 0.0], 35 | ]]).to(coords.device).float() 36 | xformed_t = rot @ coords.permute(0, 2, 1) 37 | xformed_t += torch.tensor([1.0, 0.0, 0.0]).to(coords.device).expand(1, xformed_t.size()[2], xformed_t.size()[1]).permute(0, 2, 1) 38 | return xformed_t.permute(0, 2, 1) 39 | 40 | def forward(self, coords: typing.List[torch.Tensor]) -> torch.Tensor: 41 | fused_coords = torch.zeros_like(coords[0]) 42 | for i, coords_i in enumerate(coords): 43 | if self.mode == "two": 44 | if i == 0: 45 | fused_coords += coords_i 46 | else: 47 | fused_coords += self.rotate_back_from_back(coords_i) 48 | elif self.mode == "four": 49 | if i == 0: 50 | fused_coords += coords_i 51 | elif i == 1: 52 | fused_coords += self.rotate_back_from_back(coords_i) 53 | elif i == 2: 54 | fused_coords += self.rotate_right_from_left(coords_i) 55 | elif i == 3: 56 | fused_coords += self.rotate_left_from_right(coords_i) 57 | fused_coords /= len(coords) 58 | return fused_coords -------------------------------------------------------------------------------- /src/validation/metrics/human_pose/mae.py: -------------------------------------------------------------------------------- 1 | from moai.monads.utils.common import dim_list 2 | 3 | import torch 4 | import numpy 5 | import os 6 | 7 | # from moai.validation.metrics.human_pose.temp import ( 8 | # save_ply_from_keypoints 9 | # ) 10 | 11 | 12 | class MAE(torch.nn.Module): 13 | def __init__(self): 14 | super(MAE, self).__init__() 15 | self.counter_m = 0 16 | self.counter_j = 0 17 | self.per_joint_results = numpy.zeros([0, 19]) 18 | self.per_marker_results = numpy.zeros([0, 53]) 19 | # if not os.path.exists("ply"): 20 | # os.mkdir("ply") 21 | # if not os.path.exists("csv"): 22 | # os.mkdir("csv") 23 | 24 | def forward(self, 25 | gt: torch.Tensor, 26 | pred: torch.Tensor, 27 | ) -> torch.Tensor: 28 | euc = torch.norm(gt - pred, p=2, dim=-1, keepdim=False) 29 | 30 | if gt.size()[1] == 53: 31 | self.counter_m += gt.size()[0] 32 | self.per_marker_results = numpy.vstack([self.per_marker_results, euc.cpu().numpy()]) 33 | # numpy.savetxt("csv/mae_markers.csv", self.per_marker_results, delimiter=',') 34 | else: 35 | gt[:, 14, :] = 0.0 36 | gt[:, 18, :] = 0.0 37 | pred[:, 14, :] = 0.0 38 | pred[:, 18, :] = 0.0 39 | self.counter_j += gt.size()[0] 40 | self.per_joint_results = numpy.vstack([self.per_joint_results, euc.cpu().numpy()]) 41 | # numpy.savetxt("csv/mae_joints.csv", self.per_joint_results, delimiter=',') 42 | 43 | return torch.mean(torch.mean(euc, dim=-1)) 44 | -------------------------------------------------------------------------------- /src/validation/metrics/human_pose/rmse.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from moai.monads.utils.common import dim_list 4 | 5 | import torch 6 | import numpy 7 | 8 | 9 | class RMSE(torch.nn.Module): 10 | def __init__(self): 11 | super(RMSE, self).__init__() 12 | self.counter_m = 0 13 | self.counter_j = 0 14 | self.per_joint_results = numpy.zeros([0, 19]) 15 | self.per_marker_results = numpy.zeros([0, 53]) 16 | if not os.path.exists("csv"): 17 | os.mkdir("csv") 18 | 19 | def forward(self, 20 | gt: torch.Tensor, 21 | pred: torch.Tensor 22 | ) -> torch.Tensor: 23 | diff_sq = torch.norm(gt - pred, p=2, dim=-1, keepdim=False) ** 2 24 | 25 | if gt.size()[1] == 53: 26 | self.per_marker_results = numpy.vstack([self.per_marker_results, diff_sq.cpu().numpy()]) 27 | numpy.savetxt("csv/rmse_markers.csv", self.per_marker_results, delimiter=',') 28 | else: 29 | self.per_joint_results = numpy.vstack([self.per_joint_results, diff_sq.cpu().numpy()]) 30 | numpy.savetxt("csv/rmse_joints.csv", self.per_joint_results, delimiter=',') 31 | 32 | return torch.mean(torch.sqrt(torch.mean(diff_sq, dim=-1))) --------------------------------------------------------------------------------