├── .gitignore
├── LICENSE
├── README.md
├── assets
    └── img
    │   └── overview.png
├── conf
    ├── democap
    │   └── democap_HRNET_2_views_fp
    │   │   ├── democap_data.yaml
    │   │   ├── democap_losses.yaml
    │   │   ├── democap_metrics.yaml
    │   │   ├── democap_model.yaml
    │   │   ├── democap_official.yaml
    │   │   └── democap_options.yaml
    └── src
    │   ├── data
    │       ├── test
    │       │   └── dataset
    │       │   │   └── human_pose
    │       │   │       └── H4DIR.yaml
    │       ├── train
    │       │   └── dataset
    │       │   │   └── human_pose
    │       │   │       └── H4DIR.yaml
    │       └── val
    │       │   └── dataset
    │       │       └── human_pose
    │       │           └── H4DIR.yaml
    │   └── model
    │       ├── modules
    │           └── models
    │           │   ├── cmpm.yaml
    │           │   ├── cpm.yaml
    │           │   ├── highres_standard_1.yaml
    │           │   ├── highres_standard_2.yaml
    │           │   ├── hopenet.yaml
    │           │   ├── hourglass_1.yaml
    │           │   ├── hourglass_2.yaml
    │           │   ├── hrnet_1.yaml
    │           │   ├── hrnet_2.yaml
    │           │   ├── hrnet_e2e_1.yaml
    │           │   ├── hrnet_e2e_2.yaml
    │           │   ├── hrnet_ps_1.yaml
    │           │   ├── hrnet_ps_2.yaml
    │           │   ├── oml_dual.yaml
    │           │   ├── stacked_hourglass_1.yaml
    │           │   ├── stacked_hourglass_2.yaml
    │           │   ├── stacked_hourglass_e2e_1.yaml
    │           │   ├── stacked_hourglass_e2e_2.yaml
    │           │   ├── stage_transition_standard_1.yaml
    │           │   ├── stage_transition_standard_2.yaml
    │           │   ├── start_transition_standard_1.yaml
    │           │   ├── start_transition_standard_2.yaml
    │           │   ├── top_branch_1.yaml
    │           │   └── top_branch_2.yaml
    │       ├── monads
    │           ├── distribution
    │           │   └── zmean.yaml
    │           └── keypoints
    │           │   └── fuse_coords.yaml
    │       └── validation
    │           └── metric
    │               └── human_pose
    │                   ├── MAE.yaml
    │                   └── RMSE.yaml
└── src
    ├── data
        └── datasets
        │   └── human_pose
        │       └── H4DIR
        │           ├── h4dir.py
        │           └── importers
        │               ├── __init__.py
        │               ├── enums.py
        │               ├── image.py
        │               ├── loader.py
        │               ├── markermap.py
        │               └── projections.py
    ├── modules
        └── lightning
        │   └── models
        │       ├── __init__.py
        │       ├── cmpm.py
        │       ├── cpm.py
        │       ├── dsntnn.py
        │       ├── graphunet.py
        │       ├── hopenet.py
        │       ├── hrnet_e2e.py
        │       ├── hrnet_mod.py
        │       ├── hrnet_ps.py
        │       ├── oml_dual.py
        │       ├── resnet.py
        │       ├── stacked_hourglass.py
        │       └── stacked_hourglass_e2e.py
    ├── monads
        ├── __init__.py
        ├── distribution
        │   └── zmean.py
        └── keypoints
        │   └── fuse_coords.py
    └── validation
        └── metrics
            └── human_pose
                ├── mae.py
                └── rmse.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # other
132 | prive/
133 | actions/
134 | .vscode/
135 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Creative Commons Attribution 4.0 International
  2 | 
  3 | Creative Commons Corporation ("Creative Commons") is not a law firm and
  4 | does not provide legal services or legal advice. Distribution of
  5 | Creative Commons public licenses does not create a lawyer-client or
  6 | other relationship. Creative Commons makes its licenses and related
  7 | information available on an "as-is" basis. Creative Commons gives no
  8 | warranties regarding its licenses, any material licensed under their
  9 | terms and conditions, or any related information. Creative Commons
 10 | disclaims all liability for damages resulting from their use to the
 11 | fullest extent possible.
 12 | 
 13 | Using Creative Commons Public Licenses
 14 | 
 15 | Creative Commons public licenses provide a standard set of terms and
 16 | conditions that creators and other rights holders may use to share
 17 | original works of authorship and other material subject to copyright and
 18 | certain other rights specified in the public license below. The
 19 | following considerations are for informational purposes only, are not
 20 | exhaustive, and do not form part of our licenses.
 21 | 
 22 | Considerations for licensors: Our public licenses are intended for use
 23 | by those authorized to give the public permission to use material in
 24 | ways otherwise restricted by copyright and certain other rights. Our
 25 | licenses are irrevocable. Licensors should read and understand the terms
 26 | and conditions of the license they choose before applying it. Licensors
 27 | should also secure all rights necessary before applying our licenses so
 28 | that the public can reuse the material as expected. Licensors should
 29 | clearly mark any material not subject to the license. This includes
 30 | other CC-licensed material, or material used under an exception or
 31 | limitation to copyright. More considerations for licensors :
 32 | wiki.creativecommons.org/Considerations_for_licensors
 33 | 
 34 | Considerations for the public: By using one of our public licenses, a
 35 | licensor grants the public permission to use the licensed material under
 36 | specified terms and conditions. If the licensor's permission is not
 37 | necessary for any reason–for example, because of any applicable
 38 | exception or limitation to copyright–then that use is not regulated by
 39 | the license. Our licenses grant only permissions under copyright and
 40 | certain other rights that a licensor has authority to grant. Use of the
 41 | licensed material may still be restricted for other reasons, including
 42 | because others have copyright or other rights in the material. A
 43 | licensor may make special requests, such as asking that all changes be
 44 | marked or described. Although not required by our licenses, you are
 45 | encouraged to respect those requests where reasonable. More
 46 | considerations for the public :
 47 | wiki.creativecommons.org/Considerations_for_licensees
 48 | 
 49 | Creative Commons Attribution 4.0 International Public License
 50 | 
 51 | By exercising the Licensed Rights (defined below), You accept and agree
 52 | to be bound by the terms and conditions of this Creative Commons
 53 | Attribution 4.0 International Public License ("Public License"). To the
 54 | extent this Public License may be interpreted as a contract, You are
 55 | granted the Licensed Rights in consideration of Your acceptance of these
 56 | terms and conditions, and the Licensor grants You such rights in
 57 | consideration of benefits the Licensor receives from making the Licensed
 58 | Material available under these terms and conditions.
 59 | 
 60 | Section 1 – Definitions.
 61 | 
 62 | -   a. Adapted Material means material subject to Copyright and Similar
 63 |     Rights that is derived from or based upon the Licensed Material and
 64 |     in which the Licensed Material is translated, altered, arranged,
 65 |     transformed, or otherwise modified in a manner requiring permission
 66 |     under the Copyright and Similar Rights held by the Licensor. For
 67 |     purposes of this Public License, where the Licensed Material is a
 68 |     musical work, performance, or sound recording, Adapted Material is
 69 |     always produced where the Licensed Material is synched in timed
 70 |     relation with a moving image.
 71 | -   b. Adapter's License means the license You apply to Your Copyright
 72 |     and Similar Rights in Your contributions to Adapted Material in
 73 |     accordance with the terms and conditions of this Public License.
 74 | -   c. Copyright and Similar Rights means copyright and/or similar
 75 |     rights closely related to copyright including, without limitation,
 76 |     performance, broadcast, sound recording, and Sui Generis Database
 77 |     Rights, without regard to how the rights are labeled or categorized.
 78 |     For purposes of this Public License, the rights specified in Section
 79 |     2(b)(1)-(2) are not Copyright and Similar Rights.
 80 | -   d. Effective Technological Measures means those measures that, in
 81 |     the absence of proper authority, may not be circumvented under laws
 82 |     fulfilling obligations under Article 11 of the WIPO Copyright Treaty
 83 |     adopted on December 20, 1996, and/or similar international
 84 |     agreements.
 85 | -   e. Exceptions and Limitations means fair use, fair dealing, and/or
 86 |     any other exception or limitation to Copyright and Similar Rights
 87 |     that applies to Your use of the Licensed Material.
 88 | -   f. Licensed Material means the artistic or literary work, database,
 89 |     or other material to which the Licensor applied this Public License.
 90 | -   g. Licensed Rights means the rights granted to You subject to the
 91 |     terms and conditions of this Public License, which are limited to
 92 |     all Copyright and Similar Rights that apply to Your use of the
 93 |     Licensed Material and that the Licensor has authority to license.
 94 | -   h. Licensor means the individual(s) or entity(ies) granting rights
 95 |     under this Public License.
 96 | -   i. Share means to provide material to the public by any means or
 97 |     process that requires permission under the Licensed Rights, such as
 98 |     reproduction, public display, public performance, distribution,
 99 |     dissemination, communication, or importation, and to make material
100 |     available to the public including in ways that members of the public
101 |     may access the material from a place and at a time individually
102 |     chosen by them.
103 | -   j. Sui Generis Database Rights means rights other than copyright
104 |     resulting from Directive 96/9/EC of the European Parliament and of
105 |     the Council of 11 March 1996 on the legal protection of databases,
106 |     as amended and/or succeeded, as well as other essentially equivalent
107 |     rights anywhere in the world.
108 | -   k. You means the individual or entity exercising the Licensed Rights
109 |     under this Public License. Your has a corresponding meaning.
110 | 
111 | Section 2 – Scope.
112 | 
113 | -   a. License grant.
114 |     -   1. Subject to the terms and conditions of this Public License,
115 |         the Licensor hereby grants You a worldwide, royalty-free,
116 |         non-sublicensable, non-exclusive, irrevocable license to
117 |         exercise the Licensed Rights in the Licensed Material to:
118 |         -   A. reproduce and Share the Licensed Material, in whole or in
119 |             part; and
120 |         -   B. produce, reproduce, and Share Adapted Material.
121 |     -   2. Exceptions and Limitations. For the avoidance of doubt, where
122 |         Exceptions and Limitations apply to Your use, this Public
123 |         License does not apply, and You do not need to comply with its
124 |         terms and conditions.
125 |     -   3. Term. The term of this Public License is specified in Section
126 |         6(a).
127 |     -   4. Media and formats; technical modifications allowed. The
128 |         Licensor authorizes You to exercise the Licensed Rights in all
129 |         media and formats whether now known or hereafter created, and to
130 |         make technical modifications necessary to do so. The Licensor
131 |         waives and/or agrees not to assert any right or authority to
132 |         forbid You from making technical modifications necessary to
133 |         exercise the Licensed Rights, including technical modifications
134 |         necessary to circumvent Effective Technological Measures. For
135 |         purposes of this Public License, simply making modifications
136 |         authorized by this Section 2(a)(4) never produces Adapted
137 |         Material.
138 |     -   5. Downstream recipients.
139 |         -   A. Offer from the Licensor – Licensed Material. Every
140 |             recipient of the Licensed Material automatically receives an
141 |             offer from the Licensor to exercise the Licensed Rights
142 |             under the terms and conditions of this Public License.
143 |         -   B. No downstream restrictions. You may not offer or impose
144 |             any additional or different terms or conditions on, or apply
145 |             any Effective Technological Measures to, the Licensed
146 |             Material if doing so restricts exercise of the Licensed
147 |             Rights by any recipient of the Licensed Material.
148 |     -   6. No endorsement. Nothing in this Public License constitutes or
149 |         may be construed as permission to assert or imply that You are,
150 |         or that Your use of the Licensed Material is, connected with, or
151 |         sponsored, endorsed, or granted official status by, the Licensor
152 |         or others designated to receive attribution as provided in
153 |         Section 3(a)(1)(A)(i).
154 | -   b. Other rights.
155 |     -   1. Moral rights, such as the right of integrity, are not
156 |         licensed under this Public License, nor are publicity, privacy,
157 |         and/or other similar personality rights; however, to the extent
158 |         possible, the Licensor waives and/or agrees not to assert any
159 |         such rights held by the Licensor to the limited extent necessary
160 |         to allow You to exercise the Licensed Rights, but not otherwise.
161 |     -   2. Patent and trademark rights are not licensed under this
162 |         Public License.
163 |     -   3. To the extent possible, the Licensor waives any right to
164 |         collect royalties from You for the exercise of the Licensed
165 |         Rights, whether directly or through a collecting society under
166 |         any voluntary or waivable statutory or compulsory licensing
167 |         scheme. In all other cases the Licensor expressly reserves any
168 |         right to collect such royalties.
169 | 
170 | Section 3 – License Conditions.
171 | 
172 | Your exercise of the Licensed Rights is expressly made subject to the
173 | following conditions.
174 | 
175 | -   a. Attribution.
176 |     -   1. If You Share the Licensed Material (including in modified
177 |         form), You must:
178 |         -   A. retain the following if it is supplied by the Licensor
179 |             with the Licensed Material:
180 |             -   i. identification of the creator(s) of the Licensed
181 |                 Material and any others designated to receive
182 |                 attribution, in any reasonable manner requested by the
183 |                 Licensor (including by pseudonym if designated);
184 |             -   ii. a copyright notice;
185 |             -   iii. a notice that refers to this Public License;
186 |             -   iv. a notice that refers to the disclaimer of
187 |                 warranties;
188 |             -   v. a URI or hyperlink to the Licensed Material to the
189 |                 extent reasonably practicable;
190 |         -   B. indicate if You modified the Licensed Material and retain
191 |             an indication of any previous modifications; and
192 |         -   C. indicate the Licensed Material is licensed under this
193 |             Public License, and include the text of, or the URI or
194 |             hyperlink to, this Public License.
195 |     -   2. You may satisfy the conditions in Section 3(a)(1) in any
196 |         reasonable manner based on the medium, means, and context in
197 |         which You Share the Licensed Material. For example, it may be
198 |         reasonable to satisfy the conditions by providing a URI or
199 |         hyperlink to a resource that includes the required information.
200 |     -   3. If requested by the Licensor, You must remove any of the
201 |         information required by Section 3(a)(1)(A) to the extent
202 |         reasonably practicable.
203 |     -   4. If You Share Adapted Material You produce, the Adapter's
204 |         License You apply must not prevent recipients of the Adapted
205 |         Material from complying with this Public License.
206 | 
207 | Section 4 – Sui Generis Database Rights.
208 | 
209 | Where the Licensed Rights include Sui Generis Database Rights that apply
210 | to Your use of the Licensed Material:
211 | 
212 | -   a. for the avoidance of doubt, Section 2(a)(1) grants You the right
213 |     to extract, reuse, reproduce, and Share all or a substantial portion
214 |     of the contents of the database;
215 | -   b. if You include all or a substantial portion of the database
216 |     contents in a database in which You have Sui Generis Database
217 |     Rights, then the database in which You have Sui Generis Database
218 |     Rights (but not its individual contents) is Adapted Material; and
219 | -   c. You must comply with the conditions in Section 3(a) if You Share
220 |     all or a substantial portion of the contents of the database.
221 | 
222 | For the avoidance of doubt, this Section 4 supplements and does not
223 | replace Your obligations under this Public License where the Licensed
224 | Rights include other Copyright and Similar Rights.
225 | 
226 | Section 5 – Disclaimer of Warranties and Limitation of Liability.
227 | 
228 | -   a. Unless otherwise separately undertaken by the Licensor, to the
229 |     extent possible, the Licensor offers the Licensed Material as-is and
230 |     as-available, and makes no representations or warranties of any kind
231 |     concerning the Licensed Material, whether express, implied,
232 |     statutory, or other. This includes, without limitation, warranties
233 |     of title, merchantability, fitness for a particular purpose,
234 |     non-infringement, absence of latent or other defects, accuracy, or
235 |     the presence or absence of errors, whether or not known or
236 |     discoverable. Where disclaimers of warranties are not allowed in
237 |     full or in part, this disclaimer may not apply to You.
238 | -   b. To the extent possible, in no event will the Licensor be liable
239 |     to You on any legal theory (including, without limitation,
240 |     negligence) or otherwise for any direct, special, indirect,
241 |     incidental, consequential, punitive, exemplary, or other losses,
242 |     costs, expenses, or damages arising out of this Public License or
243 |     use of the Licensed Material, even if the Licensor has been advised
244 |     of the possibility of such losses, costs, expenses, or damages.
245 |     Where a limitation of liability is not allowed in full or in part,
246 |     this limitation may not apply to You.
247 | -   c. The disclaimer of warranties and limitation of liability provided
248 |     above shall be interpreted in a manner that, to the extent possible,
249 |     most closely approximates an absolute disclaimer and waiver of all
250 |     liability.
251 | 
252 | Section 6 – Term and Termination.
253 | 
254 | -   a. This Public License applies for the term of the Copyright and
255 |     Similar Rights licensed here. However, if You fail to comply with
256 |     this Public License, then Your rights under this Public License
257 |     terminate automatically.
258 | -   b. Where Your right to use the Licensed Material has terminated
259 |     under Section 6(a), it reinstates:
260 |     -   1. automatically as of the date the violation is cured, provided
261 |         it is cured within 30 days of Your discovery of the violation;
262 |         or
263 |     -   2. upon express reinstatement by the Licensor.
264 | -   c. For the avoidance of doubt, this Section 6(b) does not affect any
265 |     right the Licensor may have to seek remedies for Your violations of
266 |     this Public License.
267 | -   d. For the avoidance of doubt, the Licensor may also offer the
268 |     Licensed Material under separate terms or conditions or stop
269 |     distributing the Licensed Material at any time; however, doing so
270 |     will not terminate this Public License.
271 | -   e. Sections 1, 5, 6, 7, and 8 survive termination of this Public
272 |     License.
273 | 
274 | Section 7 – Other Terms and Conditions.
275 | 
276 | -   a. The Licensor shall not be bound by any additional or different
277 |     terms or conditions communicated by You unless expressly agreed.
278 | -   b. Any arrangements, understandings, or agreements regarding the
279 |     Licensed Material not stated herein are separate from and
280 |     independent of the terms and conditions of this Public License.
281 | 
282 | Section 8 – Interpretation.
283 | 
284 | -   a. For the avoidance of doubt, this Public License does not, and
285 |     shall not be interpreted to, reduce, limit, restrict, or impose
286 |     conditions on any use of the Licensed Material that could lawfully
287 |     be made without permission under this Public License.
288 | -   b. To the extent possible, if any provision of this Public License
289 |     is deemed unenforceable, it shall be automatically reformed to the
290 |     minimum extent necessary to make it enforceable. If the provision
291 |     cannot be reformed, it shall be severed from this Public License
292 |     without affecting the enforceability of the remaining terms and
293 |     conditions.
294 | -   c. No term or condition of this Public License will be waived and no
295 |     failure to comply consented to unless expressly agreed to by the
296 |     Licensor.
297 | -   d. Nothing in this Public License constitutes or may be interpreted
298 |     as a limitation upon, or waiver of, any privileges and immunities
299 |     that apply to the Licensor or You, including from the legal
300 |     processes of any jurisdiction or authority.
301 | 
302 | Creative Commons is not a party to its public licenses. Notwithstanding,
303 | Creative Commons may elect to apply one of its public licenses to
304 | material it publishes and in those instances will be considered the
305 | "Licensor." The text of the Creative Commons public licenses is
306 | dedicated to the public domain under the CC0 Public Domain Dedication.
307 | Except for the limited purpose of indicating that material is shared
308 | under a Creative Commons public license or as otherwise permitted by the
309 | Creative Commons policies published at creativecommons.org/policies,
310 | Creative Commons does not authorize the use of the trademark "Creative
311 | Commons" or any other trademark or logo of Creative Commons without its
312 | prior written consent including, without limitation, in connection with
313 | any unauthorized modifications to any of its public licenses or any
314 | other arrangements, understandings, or agreements concerning use of
315 | licensed material. For the avoidance of doubt, this paragraph does not
316 | form part of the public licenses.
317 | 
318 | Creative Commons may be contacted at creativecommons.org.
319 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DeMoCap: Low-cost Marker-based Motion Capture
 2 | Official implementation of "DeMoCap: Low-cost Marker-based Motion Capture" method. 
 3 | 
 4 | [![](https://img.shields.io/badge/PDF-DeMoCap-red)](https://rdcu.be/czAXF)
 5 | 
 6 | [![](https://img.shields.io/badge/IJCV-DeMoCap-blueviolet)](https://link.springer.com/article/10.1007/s11263-021-01526-z)
 7 | [![made-with-python](https://img.shields.io/badge/Made%20with-Python-1f425f.svg)](https://www.python.org/)
 8 | [![Maintaner](https://img.shields.io/badge/maintainer-Anargyros_Chatzitofis-blue)](http://tofis.github.io)
 9 | 
10 | ![DeMoCap Concept](./assets/img/overview.png)
11 | ### Paper Abstract
12 | Optical marker-based motion capture (MoCap) remains the predominant way to acquire high-fidelity articulated body motions. We introduce DeMoCap, the first data-driven approach for end-to-end marker-based MoCap, using only a sparse setup of spatio-temporally aligned, consumer-grade infrared-depth cameras. Trading off some of their typical features, our approach is the sole robust option for far lower-cost marker-based MoCap than high-end solutions. We introduce an end-to-end differentiable markers-to-pose model to solve a set of challenges such as under-constrained position estimates, noisy input data and spatial configuration invariance. We simultaneously handle depth and marker detection noise, label and localize the markers, and estimate the 3D pose by introducing a novel spatial 3D coordinate regression technique under a multi-view rendering and supervision concept. DeMoCap is driven by a special dataset captured with 4 spatio-temporally aligned low-cost Intel RealSense D415 sensors and a 24 MXT40S camera professional MoCap system, used as input and ground truth, respectively.
13 | 
14 | 
15 | ## Requirements
16 | This code has been tested with:
17 | - [PyTorch 1.10.2](https://pytorch.org/get-started/previous-versions/)
18 | - [Python 3.8.11](https://www.python.org/downloads/release/python-3811/)
19 | - [CUDA 11.3](https://developer.nvidia.com/cuda-11-3-1-download-archive)
20 | 
21 | Besides PyTorch, the following Python packages are needed:
22 | - [moai](https://pypi.org/project/moai-mdk/)
23 | - [opencv-python](https://pypi.org/project/opencv-python/)
24 | - [numpy](https://numpy.org/)
25 | - [matplotlib](https://matplotlib.org/)
26 | - [torchvision](https://pypi.org/project/torchvision/)
27 | - [visdom](https://github.com/facebookresearch/visdom)
28 | 
29 | ## Dataset
30 | 
31 | You can download [here](https://drive.google.com/file/d/1R0nqyBaKPp5wfJ0LH4hekUNrq4e3kodt/view?usp=sharing) the data used for training, validation and testing of DeMoCap.
32 | For accessing the data, a google drive request with a message confirming the consent with the data license (CC-BY-4.0) is mandatory. You will receive the decryption pass via email after that.
33 | 
34 | ## Installation
35 | 
36 | The code is powered by [moai-mdk](https://pypi.org/project/moai-mdk/) framework enabling the building of the model on a configuration-based implementation along with custom extra modules. 
37 | For further details with respect to the framework and the way it should be installed for running the code, please visit the [documentation](https://moai.readthedocs.io/en/latest/) of the framework.
38 | 
39 | After the succesfull installation of the requirements, from the root folder of the project, run the code below:
40 | 
41 | ```
42 | moai train democap\democap_HRNET_2_views_fp\democap_official.yaml --config-dir conf H4DIR_train_split={your_train_folder} H4DIR_test_split={your_test_folder} H4DIR_val_split={your_val_folder} +project_path=[{project_path_root}]
43 | ```
44 | ------
45 | 
46 | If you used the method or found this work useful, please cite:
47 | ```
48 | @article{chatzitofis2021democap,
49 |   title={DeMoCap: Low-Cost Marker-Based Motion Capture},
50 |   author={Chatzitofis, Anargyros and Zarpalas, Dimitrios and Daras, Petros and Kollias, Stefanos},
51 |   journal={International Journal of Computer Vision},
52 |   volume={129},
53 |   number={12},
54 |   pages={3338--3366},
55 |   year={2021},
56 |   publisher={Springer}
57 | }
58 | ```
59 | 


--------------------------------------------------------------------------------
/assets/img/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tofis/democap/bc7f8cd27163085f78e164ac464df2336f0c6ad9/assets/img/overview.png


--------------------------------------------------------------------------------
/conf/democap/democap_HRNET_2_views_fp/democap_data.yaml:
--------------------------------------------------------------------------------
 1 | H4DIR_train_split: ???
 2 | H4DIR_test_split: ???
 3 | H4DIR_val_split: ???
 4 | 
 5 | data:
 6 |   train:
 7 |     iterator:
 8 |       datasets:
 9 |         H4DIR:
10 |           markers_out: 53
11 |           joints_out: 19
12 |           views: ["f","b", "3d"]
13 |           resolution: 160
14 |           augment: true
15 |           rs: true
16 |   test:
17 |     iterator:
18 |       datasets:
19 |         H4DIR:
20 |           markers_out: 53
21 |           joints_out: 19
22 |           views: ["f","b", "3d"]
23 |           resolution: 160
24 |           augment: false
25 |           rs: true
26 |   val:
27 |     iterator:
28 |       datasets:
29 |         H4DIR:
30 |           markers_out: 53
31 |           joints_out: 19
32 |           views: ["f","b","3d"]
33 |           resolution: 160
34 |           augment: true
35 |           rs: true


--------------------------------------------------------------------------------
/conf/democap/democap_HRNET_2_views_fp/democap_losses.yaml:
--------------------------------------------------------------------------------
 1 | all_heatmaps_gt: [f_hms_m_gt, b_hms_m_gt, f_hms_j_gt, b_hms_j_gt]
 2 | all_heatmaps_pred: [f_hm_markers, b_hm_markers, f_hm_joints, b_hm_joints]
 3 | all_kpts_gt: [f_gt_markers_3d, f_gt_joints_3d]
 4 | all_kpts: [pred_markers_3d, pred_joints_3d]
 5 | 
 6 | model:
 7 |   supervision:
 8 |     losses:
 9 |       wing:
10 |         omega: 10.0
11 |         epsilon: 2.0
12 |     wing:
13 |       gt: ${all_kpts_gt}
14 |       pred: ${all_kpts}
15 |       weight: [1, 1]
16 |     JS:
17 |       gt: ${all_heatmaps_gt}
18 |       pred: ${all_heatmaps_pred}
19 |       weight: [400, 400, 400, 400]


--------------------------------------------------------------------------------
/conf/democap/democap_HRNET_2_views_fp/democap_metrics.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | model:
 4 |   validation:    
 5 |     human_pose_rmse:
 6 |       gt: [f_gt_markers_3d_original, f_gt_joints_3d_original]
 7 |       pred: [pred_markers_3d_original, pred_joints_3d_original]
 8 |       out: [rmse_m, rmse_j]
 9 |     human_pose_mae:
10 |       gt: [f_gt_markers_3d_original, f_gt_joints_3d_original]
11 |       pred: [pred_markers_3d_original, pred_joints_3d_original]
12 |       out: [mae_m, mae_j]
13 | 
14 |     indicators:
15 |       rmse_X_mae: '[rmse_m] * [rmse_j] * [mae_m] * [mae_j] / 100000000.0'


--------------------------------------------------------------------------------
/conf/democap/democap_HRNET_2_views_fp/democap_model.yaml:
--------------------------------------------------------------------------------
  1 | # @package _global_
  2 | 
  3 | conv_type: conv2d
  4 | downscale_type: maxpool2d
  5 | model:
  6 |   configuration:
  7 |     branch1:
  8 |       in_features: 1
  9 |       data: [f_depth, b_depth]
 10 |       out: 
 11 |         - [f_hm_markers, f_features]
 12 |         - [b_hm_markers, b_features]
 13 |     branch2:
 14 |       in_features: 309
 15 |       data: [f_features, b_features]
 16 |       out: 
 17 |         - [f_hm_joints, ""]
 18 |         - [b_hm_joints, ""]
 19 | 
 20 |   modules:
 21 |     branch1:
 22 |       modules:
 23 |         highres_standard_1:
 24 |           residual:
 25 |             type: bottleneck
 26 |             bottleneck_features: 128
 27 |             convolution: conv2d
 28 |             activation: relu
 29 |           fuse:
 30 |             convolution: conv2d
 31 |             activation:    
 32 |               intermediate: bn2d_relu
 33 |               prefusion: bn2d
 34 |               final: relu
 35 |             upscale: 
 36 |               type: upsample2d
 37 |               conv_up: true
 38 |         start_transition_standard_1:
 39 |           identity:
 40 |             convolution: conv2d
 41 |             kernel_size: 3
 42 |             stride: 1
 43 |             padding: 1
 44 |           branched:
 45 |             convolution: conv2d
 46 |             downscale: none
 47 |             kernel_size: 3
 48 |             stride: 2
 49 |             padding: 1
 50 |         stage_transition_standard_1:
 51 |           branched:
 52 |             convolution: conv2d
 53 |             downscale: none
 54 |             kernel_size: 3
 55 |             stride: 2
 56 |             padding: 1
 57 |         top_branch_1:
 58 |           convolution: conv2d
 59 |           activation: relu
 60 |           kernel_size: 1
 61 |           padding: 0
 62 |           inplace: false    
 63 |     branch2:
 64 |       modules:
 65 |         highres_standard_2:
 66 |           residual:
 67 |             type: bottleneck
 68 |             bottleneck_features: 128
 69 |             convolution: conv2d
 70 |             activation: relu
 71 |           fuse:
 72 |             convolution: conv2d
 73 |             activation:    
 74 |               intermediate: bn2d_relu
 75 |               prefusion: bn2d
 76 |               final: relu
 77 |             upscale: 
 78 |               type: upsample2d
 79 |               conv_up: true
 80 |         start_transition_standard_2:
 81 |           identity:
 82 |             convolution: conv2d
 83 |             kernel_size: 3
 84 |             stride: 1
 85 |             padding: 1
 86 |           branched:
 87 |             convolution: conv2d
 88 |             downscale: none
 89 |             kernel_size: 3
 90 |             stride: 2
 91 |             padding: 1
 92 |         stage_transition_standard_2:
 93 |           branched:
 94 |             convolution: conv2d
 95 |             downscale: none
 96 |             kernel_size: 3
 97 |             stride: 2
 98 |             padding: 1
 99 |         top_branch_2:
100 |           convolution: conv2d
101 |           activation: relu
102 |           kernel_size: 1
103 |           padding: 0
104 |           inplace: false
105 | 
106 |   parameters:
107 |     optimization:
108 |       optimizers:
109 |         adam:          
110 |           lr: 3.0e-4
111 |     schedule:
112 |       schedulers:
113 |         step:
114 |           step_size: 4
115 |           gamma: 0.95
116 | 
117 |   monads:
118 |     grid:
119 |       width: 40  # x
120 |       height: 40 # y     
121 |       inclusive: true
122 |       order: xy
123 |       mode: norm
124 | 
125 |     isotropic_gaussian:
126 |       std: 3.0
127 |       normalize: true
128 |       scale: false
129 |       grid_type: norm
130 | 
131 |     center_of_mass:
132 |       flip: false
133 |     
134 |     concat:
135 |       dim: 2
136 | 
137 |     transform:
138 |       xyz_in_at: channel
139 |       xyz_out_at: channel
140 |       transpose: false
141 | 
142 |   feedforward:
143 |     preprocess:
144 |       grid:
145 |         tensor: [f_depth]
146 |         out: [grid]
147 | 
148 |       isotropic_gaussian:
149 |         keypoints: [f_gt_markers_2d, b_gt_markers_2d, f_gt_joints_2d, b_gt_joints_2d]
150 |         grid: [grid, grid, grid, grid]
151 |         out: [f_hms_m_gt, b_hms_m_gt, f_hms_j_gt, b_hms_j_gt]
152 | 
153 |     postprocess:
154 |       zmean:
155 |         heatmaps: [f_hm_markers, b_hm_markers, f_hm_joints, b_hm_joints]
156 |         out: [f_m_z_coords, b_m_z_coords, f_j_z_coords, b_j_z_coords]
157 | 
158 |       spatial_softmax:
159 |         tensor: [f_hm_markers, b_hm_markers, f_hm_joints, b_hm_joints]
160 |         out: [f_hm_markers, b_hm_markers, f_hm_joints, b_hm_joints]
161 |       
162 |       center_of_mass:
163 |         grid: [grid, grid, grid, grid]
164 |         heatmaps: [f_hm_markers, b_hm_markers, f_hm_joints, b_hm_joints]
165 |         out: [f_pred_markers_2d, b_pred_markers_2d, f_pred_joints_2d, b_pred_joints_2d]
166 | 
167 |       concat:
168 |         tensors:
169 |           - [f_pred_markers_2d, f_m_z_coords]
170 |           - [b_pred_markers_2d, b_m_z_coords]
171 |           - [f_pred_joints_2d, f_j_z_coords]
172 |           - [b_pred_joints_2d, b_j_z_coords]
173 |         out: [f_pred_markers_3d, b_pred_markers_3d, f_pred_joints_3d, b_pred_joints_3d]
174 | 
175 |       fuse_coords:
176 |         coords:
177 |           - [f_pred_markers_3d, b_pred_markers_3d]
178 |           - [f_pred_joints_3d, b_pred_joints_3d]
179 |         out: [pred_markers_3d, pred_joints_3d]
180 |         
181 |       scale_coords:
182 |         coords: [f_gt_markers_3d, f_gt_joints_3d, pred_markers_3d, pred_joints_3d]
183 |         scales: [f_scale, f_scale, f_scale, f_scale]
184 |         out: [f_gt_markers_3d_original, f_gt_joints_3d_original, pred_markers_3d_original, pred_joints_3d_original]


--------------------------------------------------------------------------------
/conf/democap/democap_HRNET_2_views_fp/democap_official.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   ###### HYDRA  
 3 |   - hydra/job_logging: colorlog
 4 |   - hydra/hydra_logging: colorlog
 5 | 
 6 |   ###### ENGINE
 7 |   - engine: engine
 8 |   - engine/train: lightning
 9 |   - engine/modules: manual_seed
10 |   - engine/modules: import
11 | 
12 |   - engine/visualization: collection
13 |   - engine/visualization/visdom: image2d
14 |   # - engine/visualization/visdom: feature2d
15 |   - engine/visualization/visdom: pose2d
16 |   
17 |   - engine/log/lightning: collection
18 |   - engine/log/lightning/logger: visdom
19 |   - engine/log/lightning/logger: tabular
20 | 
21 |   - engine/checkpoint/lightning: default
22 | 
23 |   ###### DATA  
24 |   - data/train/loader: torch
25 |   - data/train/iterator: indexed
26 |   - data/train/augmentation: none
27 |   - src/data/train/dataset/human_pose: H4DIR
28 | 
29 |   - data/test/loader: torch
30 |   - data/test/iterator: indexed
31 |   - data/test/augmentation: none
32 |   - src/data/test/dataset/human_pose: H4DIR
33 | 
34 |   - data/val/loader: torch
35 |   - data/val/iterator: indexed
36 |   - data/val/augmentation: none
37 |   - src/data/val/dataset/human_pose: H4DIR
38 | 
39 |   # ###### MODEL 
40 |   - model/networks/lightning/factory: cascade
41 |   - src/model/modules/models: hrnet_1
42 |   - src/model/modules/models: hrnet_2
43 |   - src/model/modules/models: highres_standard_1
44 |   - src/model/modules/models: highres_standard_2
45 |   - src/model/modules/models: top_branch_1
46 |   - src/model/modules/models: top_branch_2
47 |   - src/model/modules/models: start_transition_standard_1
48 |   - src/model/modules/models: stage_transition_standard_1
49 |   - src/model/modules/models: start_transition_standard_2
50 |   - src/model/modules/models: stage_transition_standard_2
51 | 
52 |   - model/feedforward: preprocess 
53 |   - model/monads/generation: grid
54 |   - model/monads/distribution/reconstruction: isotropic_gaussian
55 | 
56 |   - model/feedforward: postprocess
57 |   - src/model/monads/distribution: zmean
58 |   - model/monads/distribution/prior: spatial_softmax
59 |   - model/monads/distribution: center_of_mass
60 |   - model/monads/tensor: concat
61 |   - src/model/monads/keypoints: fuse_coords
62 |   - model/monads/keypoints: scale_coords
63 | 
64 |   - model/parameters/initialization: default  
65 |   - model/parameters/optimization: single
66 |   - model/parameters/optimization/optimizer: adam
67 |   - model/parameters/optimization/scheduling: single
68 |   - model/parameters/optimization/scheduling/scheduler: step
69 |   - model/parameters/regularization: none
70 | 
71 |   - model/supervision: weighted
72 |   - model/supervision/loss/regression/robust: wing
73 |   - model/supervision/loss/distribution: JS
74 | 
75 |   - model/validation: indicators
76 |   - src/model/validation/metric/human_pose: rmse
77 |   - src/model/validation/metric/human_pose: mae
78 | 
79 |   - democap/democap_HRNET_2_views_fp/democap_options
80 |   - democap/democap_HRNET_2_views_fp/democap_data
81 |   - democap/democap_HRNET_2_views_fp/democap_model
82 |   - democap/democap_HRNET_2_views_fp/democap_losses
83 |   - democap/democap_HRNET_2_views_fp/democap_metrics


--------------------------------------------------------------------------------
/conf/democap/democap_HRNET_2_views_fp/democap_options.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | experiment:
 4 |   visdom_ip: localhost
 5 |   name: democap_hrnet_2_views
 6 |   batch_size: 16
 7 |   workers: 4
 8 | 
 9 | engine:
10 |   modules:
11 |     import:
12 |       run_path: false
13 |       other_paths: ${project_path}
14 |     manual_seed:
15 |       seed: 1337
16 |       deterministic: true
17 | 
18 | trainer:
19 |   gpus: [1]
20 |   check_val_every_n_epoch: 1
21 |   accumulate_grad_batches: 1
22 |   max_epochs: 200
23 |   deterministic: false
24 |   checkpoint:
25 |     monitor: rmse_X_mae
26 |     mode: min
27 |     filename: '{epoch}_{rmse_X_mae:.2f}'
28 | 
29 | logging:
30 |   name: ${experiment.name}
31 |   loggers:
32 |     visdom:
33 |       name: ${experiment.name}
34 |       ip: ${experiment.visdom_ip}
35 |     tabular:
36 |       name: ${experiment.name}
37 | 
38 | visualization:
39 |   batch_interval: 100
40 |   visualizers:
41 |     # feature2d:                      
42 |     #   name: ${experiment.name}
43 |     #   ip: ${experiment.visdom_ip}
44 |     #   image: [b_hms_j_gt, b_hm_joints]
45 |     #   type: [color, color]
46 |     #   colormap: [turbo, turbo]
47 |     #   transform: [minmax, minmax]
48 | 
49 |     image2d:
50 |       name: ${experiment.name}
51 |       ip: ${experiment.visdom_ip}
52 |       image: [b_depth]
53 |       type: [color]
54 |       colormap: [turbo]
55 |       transform: [minmax]
56 | 
57 |     pose2d:
58 |       name: ${experiment.name}
59 |       ip: ${experiment.visdom_ip}
60 |       images: [f_depth]
61 |       poses: [human_pose2d]
62 |       gt: [f_gt_joints_2d]
63 |       pred: [f_pred_joints_2d]
64 |       pose_structure: 
65 |         - [0, 1, 2, 3, 4]
66 |         - [5, 6, 7]
67 |         - [8, 9, 10]
68 |         - [11, 12, 13, 14]
69 |         - [15, 16, 17, 18]
70 |       coords: [norm]
71 |       color_gt: [cyan]
72 |       color_pred: [red]
73 |       reverse_coords: true
74 |       rotate_image: false
75 |       use_mask: false


--------------------------------------------------------------------------------
/conf/src/data/test/dataset/human_pose/H4DIR.yaml:
--------------------------------------------------------------------------------
 1 | # @package data.test.iterator.datasets._name_
 2 | 
 3 | _target_: src.data.datasets.human_pose.H4DIR.h4dir.H4DIR
 4 | root_path: ${H4DIR_test_split}
 5 | markers_out: 53
 6 | joints_out: 19
 7 | views: ["f","b","3d"]
 8 | augment: False
 9 | rs: True
10 | 


--------------------------------------------------------------------------------
/conf/src/data/train/dataset/human_pose/H4DIR.yaml:
--------------------------------------------------------------------------------
 1 | # @package data.train.iterator.datasets._name_
 2 | 
 3 | _target_: src.data.datasets.human_pose.H4DIR.h4dir.H4DIR
 4 | root_path: ${H4DIR_train_split}
 5 | markers_out: 53
 6 | joints_out: 19
 7 | resolution: 160
 8 | views: ["f","b","3d"]
 9 | augment: True
10 | rs: True
11 | 


--------------------------------------------------------------------------------
/conf/src/data/val/dataset/human_pose/H4DIR.yaml:
--------------------------------------------------------------------------------
 1 | # @package data.val.iterator.datasets._name_
 2 | 
 3 | _target_: src.data.datasets.human_pose.H4DIR.h4dir.H4DIR
 4 | root_path: ${H4DIR_val_split}
 5 | markers_out: 53
 6 | joints_out: 19
 7 | resolution: 160
 8 | views: ["f","b","3d"]
 9 | augment: False
10 | rs: True
11 | 


--------------------------------------------------------------------------------
/conf/src/model/modules/models/cmpm.yaml:
--------------------------------------------------------------------------------
1 | # @package model.modules.branch1
2 | 
3 | _target_: moai.modules.lightning.models.CMPM
4 | # configuration:
5 | #   in_features: 1
6 | #   out_features: 53
7 | #   output: _markers_hms_pred


--------------------------------------------------------------------------------
/conf/src/model/modules/models/cpm.yaml:
--------------------------------------------------------------------------------
1 | # @package model.modules.branch1
2 | 
3 | _target_: moai.modules.lightning.models.CPM
4 | num_markers: 53
5 | num_joints: 19


--------------------------------------------------------------------------------
/conf/src/model/modules/models/highres_standard_1.yaml:
--------------------------------------------------------------------------------
 1 | # @package model.modules.branch1.modules.highres_standard_1
 2 | 
 3 | _target_: moai.modules.lightning.highres.HighResolution
 4 | residual:
 5 |   type: preactiv_bottleneck
 6 |   bottleneck_features: 128
 7 |   convolution: conv2d
 8 |   activation: relu
 9 | fuse:
10 |   convolution: conv2d
11 |   activation:    
12 |     intermediate: relu_bn2d
13 |     prefusion: bn2d
14 |     final: relu
15 |   upscale: 
16 |     type: upsample2d
17 |     conv_up: true


--------------------------------------------------------------------------------
/conf/src/model/modules/models/highres_standard_2.yaml:
--------------------------------------------------------------------------------
 1 | # @package model.modules.branch2.modules.highres_standard_2
 2 | 
 3 | _target_: moai.modules.lightning.highres.HighResolution
 4 | residual:
 5 |   type: preactiv_bottleneck
 6 |   bottleneck_features: 128
 7 |   convolution: conv2d
 8 |   activation: relu
 9 | fuse:
10 |   convolution: conv2d
11 |   activation:    
12 |     intermediate: relu_bn2d
13 |     prefusion: bn2d
14 |     final: relu
15 |   upscale: 
16 |     type: upsample2d
17 |     conv_up: true


--------------------------------------------------------------------------------
/conf/src/model/modules/models/hopenet.yaml:
--------------------------------------------------------------------------------
1 | # @package model.modules.branch1
2 | 
3 | _target_: moai.modules.lightning.models.HopeNet


--------------------------------------------------------------------------------
/conf/src/model/modules/models/hourglass_1.yaml:
--------------------------------------------------------------------------------
 1 | # @package model.modules.branch1.modules.hourglass_1
 2 | 
 3 | _target_: moai.modules.lightning.Hourglass
 4 | features: 256
 5 | depth: 1
 6 | convolution: conv2d
 7 | activation: relu_bn2d
 8 | residual: bottleneck
 9 | # residual: preactiv_bottleneck
10 | downscale: maxpool2d_aa
11 | upscale: upsample2d


--------------------------------------------------------------------------------
/conf/src/model/modules/models/hourglass_2.yaml:
--------------------------------------------------------------------------------
 1 | # @package model.modules.branch2.modules.hourglass_2
 2 | 
 3 | _target_: moai.modules.lightning.Hourglass
 4 | features: 309
 5 | depth: 1
 6 | convolution: conv2d
 7 | activation: relu_bn2d
 8 | # residual: preactiv_bottleneck
 9 | residual: bottleneck
10 | downscale: maxpool2d_aa
11 | upscale: upsample2d


--------------------------------------------------------------------------------
/conf/src/model/modules/models/hrnet_1.yaml:
--------------------------------------------------------------------------------
 1 | # @package model.modules.branch1
 2 | 
 3 | _target_: src.modules.lightning.models.hrnet_mod.HRNetMod
 4 | configuration:
 5 |   stages: 4
 6 |   in_features: 1
 7 |   out_features: 53
 8 |   input: ???
 9 |   output: _markers_hms_pred
10 |   preproc:
11 |     stem:
12 |       blocks: [conv2d, conv2d]
13 |       convolutions: [conv2d, conv2d]
14 |       activations: [relu_bn2d, relu_bn2d]
15 |       kernel_sizes: [3, 3]
16 |       features: [32, 64]
17 |       strides: [2, 2]
18 |       paddings: [1, 1]
19 |   residual: 
20 |     block: bottleneck
21 |     convolution: conv2d
22 |     activation: bn2d_relu
23 |     features:
24 |       in_features: [64, 256]
25 |       out_features: [256, 256]
26 |       bottleneck_features: [64, 64]
27 |   branches:
28 |     block: conv2d
29 |     convolution: conv2d
30 |     activation: relu
31 |     kernel_size: 3
32 |     stride: 1
33 |     padding: 1
34 |     start_features: 32
35 |     modules: [1, 4, 3]
36 |     depths: 
37 |       - [4]
38 |       - [4, 4, 4, 4]
39 |       - [4, 4, 4]
40 | 


--------------------------------------------------------------------------------
/conf/src/model/modules/models/hrnet_2.yaml:
--------------------------------------------------------------------------------
 1 | # @package model.modules.branch2
 2 | 
 3 | _target_: src.modules.lightning.models.hrnet_mod.HRNetMod
 4 | configuration:
 5 |   stages: 4
 6 |   in_features: 309
 7 |   out_features: 19
 8 |   input: ???
 9 |   output: _joints_hms_pred
10 |   # preproc:
11 |   #   stem:
12 |   #     blocks: [conv2d, conv2d]
13 |   #     convolutions: [conv2d, conv2d]
14 |   #     activations: [relu_bn2d, relu_bn2d]
15 |   #     kernel_sizes: [3, 3]
16 |   #     features: [32, 64]
17 |   #     strides: [2, 2]
18 |   #     paddings: [1, 1]
19 |   residual: 
20 |     block: bottleneck
21 |     # block: preactiv_bottleneck
22 |     convolution: conv2d
23 |     activation: relu_bn2d
24 |     features:
25 |       in_features: [64, 309]
26 |       out_features: [309, 309]
27 |       bottleneck_features: [64, 64]
28 |   branches:
29 |     block: conv2d
30 |     convolution: conv2d
31 |     activation: relu
32 |     kernel_size: 3
33 |     stride: 1
34 |     padding: 1
35 |     start_features: 32
36 |     modules: [1, 4, 3]
37 |     depths: 
38 |       - [4]
39 |       - [4, 4, 4, 4]
40 |       - [4, 4, 4]


--------------------------------------------------------------------------------
/conf/src/model/modules/models/hrnet_e2e_1.yaml:
--------------------------------------------------------------------------------
 1 | # @package model.modules.branch1
 2 | 
 3 | _target_: moai.modules.lightning.models.HRNetMod_e2e
 4 | configuration:
 5 |   stages: 4
 6 |   in_features: 1
 7 |   out_features: 72
 8 |   input: ???
 9 |   output: _markers_hms_pred
10 |   preproc:
11 |     stem:
12 |       blocks: [conv2d, conv2d]
13 |       convolutions: [conv2d, conv2d]
14 |       activations: [relu_bn2d, relu_bn2d]
15 |       kernel_sizes: [3, 3]
16 |       features: [32, 64]
17 |       strides: [2, 2]
18 |       paddings: [1, 1]
19 |   residual: 
20 |     block: bottleneck
21 |     convolution: conv2d
22 |     activation: bn2d_relu
23 |     features:
24 |       in_features: [64, 256]
25 |       out_features: [256, 256]
26 |       bottleneck_features: [64, 64]
27 |   branches:
28 |     block: conv2d
29 |     convolution: conv2d
30 |     activation: relu
31 |     kernel_size: 3
32 |     stride: 1
33 |     padding: 1
34 |     start_features: 32
35 |     modules: [1, 4, 3]
36 |     depths: 
37 |       - [4]
38 |       - [4, 4, 4, 4]
39 |       - [4, 4, 4]
40 | 


--------------------------------------------------------------------------------
/conf/src/model/modules/models/hrnet_e2e_2.yaml:
--------------------------------------------------------------------------------
 1 | # @package model.modules.branch2
 2 | 
 3 | _target_: moai.modules.lightning.models.HRNetMod_e2e
 4 | configuration:
 5 |   stages: 4
 6 |   in_features: 328
 7 |   out_features: 72
 8 |   input: ???
 9 |   output: _joints_hms_pred
10 |   # preproc:
11 |   #   stem:
12 |   #     blocks: [conv2d, conv2d]
13 |   #     convolutions: [conv2d, conv2d]
14 |   #     activations: [relu_bn2d, relu_bn2d]
15 |   #     kernel_sizes: [3, 3]
16 |   #     features: [32, 64]
17 |   #     strides: [2, 2]
18 |   #     paddings: [1, 1]
19 |   residual: 
20 |     block: bottleneck
21 |     # block: preactiv_bottleneck
22 |     convolution: conv2d
23 |     activation: relu_bn2d
24 |     features:
25 |       in_features: [64, 328]
26 |       out_features: [328, 328]
27 |       bottleneck_features: [64, 64]
28 |   branches:
29 |     block: conv2d
30 |     convolution: conv2d
31 |     activation: relu
32 |     kernel_size: 3
33 |     stride: 1
34 |     padding: 1
35 |     start_features: 32
36 |     modules: [1, 4, 3]
37 |     depths: 
38 |       - [4]
39 |       - [4, 4, 4, 4]
40 |       - [4, 4, 4]


--------------------------------------------------------------------------------
/conf/src/model/modules/models/hrnet_ps_1.yaml:
--------------------------------------------------------------------------------
 1 | # @package model.modules.branch1
 2 | 
 3 | _target_: moai.modules.lightning.models.HRNetModPS
 4 | configuration:
 5 |   stages: 4
 6 |   in_features: 1
 7 |   out_features: 53
 8 |   input: ???
 9 |   output: _markers_hms_pred
10 |   preproc:
11 |     stem:
12 |       blocks: [conv2d, conv2d]
13 |       convolutions: [conv2d, conv2d]
14 |       activations: [relu_bn2d, relu_bn2d]
15 |       kernel_sizes: [3, 3]
16 |       features: [32, 64]
17 |       strides: [2, 2]
18 |       paddings: [1, 1]
19 |   residual: 
20 |     block: bottleneck
21 |     convolution: conv2d
22 |     activation: bn2d_relu
23 |     features:
24 |       in_features: [64, 256]
25 |       out_features: [256, 256]
26 |       bottleneck_features: [64, 64]
27 |   branches:
28 |     block: conv2d
29 |     convolution: conv2d
30 |     activation: relu
31 |     kernel_size: 3
32 |     stride: 1
33 |     padding: 1
34 |     start_features: 32
35 |     modules: [1, 4, 3]
36 |     depths: 
37 |       - [4]
38 |       - [4, 4, 4, 4]
39 |       - [4, 4, 4]
40 | 


--------------------------------------------------------------------------------
/conf/src/model/modules/models/hrnet_ps_2.yaml:
--------------------------------------------------------------------------------
 1 | # @package model.modules.branch2
 2 | 
 3 | _target_: moai.modules.lightning.models.HRNetModPS
 4 | configuration:
 5 |   stages: 4
 6 |   in_features: 309
 7 |   out_features: 19
 8 |   input: ???
 9 |   output: _joints_hms_pred
10 |   # preproc:
11 |   #   stem:
12 |   #     blocks: [conv2d, conv2d]
13 |   #     convolutions: [conv2d, conv2d]
14 |   #     activations: [relu_bn2d, relu_bn2d]
15 |   #     kernel_sizes: [3, 3]
16 |   #     features: [32, 64]
17 |   #     strides: [2, 2]
18 |   #     paddings: [1, 1]
19 |   residual: 
20 |     block: bottleneck
21 |     # block: preactiv_bottleneck
22 |     convolution: conv2d
23 |     activation: relu_bn2d
24 |     features:
25 |       in_features: [64, 309]
26 |       out_features: [309, 309]
27 |       bottleneck_features: [64, 64]
28 |   branches:
29 |     block: conv2d
30 |     convolution: conv2d
31 |     activation: relu
32 |     kernel_size: 3
33 |     stride: 1
34 |     padding: 1
35 |     start_features: 32
36 |     modules: [1, 4, 3]
37 |     depths: 
38 |       - [4]
39 |       - [4, 4, 4, 4]
40 |       - [4, 4, 4]


--------------------------------------------------------------------------------
/conf/src/model/modules/models/oml_dual.yaml:
--------------------------------------------------------------------------------
1 | # @package model.modules.branch1
2 | 
3 | _target_: moai.modules.lightning.models.OmlDual
4 | num_markers: 53
5 | num_joints: 19


--------------------------------------------------------------------------------
/conf/src/model/modules/models/stacked_hourglass_1.yaml:
--------------------------------------------------------------------------------
 1 | # @package model.modules.branch1
 2 | 
 3 | _target_: moai.modules.lightning.models.StackedHourglassMod
 4 | configuration:
 5 |   stacks: 4
 6 |   in_features: 1
 7 |   out_features: 53
 8 |   output: _markers_hms_pred
 9 |   preproc:
10 |     block: conv2d
11 |     convolution: conv2d
12 |     activation: relu_bn2d
13 |     residual: bottleneck
14 |     # residual: preactiv_bottleneck
15 |     downscale: maxpool2d_aa
16 |     stem:
17 |       kernel_size: 7
18 |       stride: 2
19 |       padding: 3
20 |   projection:
21 |     block: conv2d
22 |     convolution: conv2d
23 |     activation: relu
24 |   prediction:
25 |     block: conv2d
26 |     convolution: conv2d
27 |     activation: relu
28 |     dropout: 0.0
29 |   merge:
30 |     dropout: 0.0
31 | 


--------------------------------------------------------------------------------
/conf/src/model/modules/models/stacked_hourglass_2.yaml:
--------------------------------------------------------------------------------
 1 | # @package model.modules.branch2
 2 | 
 3 | _target_: moai.modules.lightning.models.StackedHourglassMod
 4 | configuration:
 5 |   stacks: 4
 6 |   in_features: 309
 7 |   out_features: 19
 8 |   output: _joints_hms_pred
 9 |   # preproc:
10 |   #   block: identity
11 |   #   convolution: identity
12 |   #   activation: identity
13 |   #   residual: identity
14 |   #   downscale: identity
15 |   #   stem:
16 |   #     kernel_size: 7
17 |   #     stride: 2
18 |   #     padding: 3
19 |   projection:
20 |     block: conv2d
21 |     convolution: conv2d
22 |     activation: relu
23 |   prediction:
24 |     block: conv2d
25 |     convolution: conv2d
26 |     activation: relu
27 |     dropout: 0.0
28 |   merge:
29 |     dropout: 0.0


--------------------------------------------------------------------------------
/conf/src/model/modules/models/stacked_hourglass_e2e_1.yaml:
--------------------------------------------------------------------------------
 1 | # @package model.modules.branch1
 2 | 
 3 | _target_: moai.modules.lightning.models.StackedHourglassMod_e2e
 4 | configuration:
 5 |   stacks: 4
 6 |   in_features: 1
 7 |   out_features: 72
 8 |   output: _markers_hms_pred
 9 |   preproc:
10 |     block: conv2d
11 |     convolution: conv2d
12 |     activation: relu_bn2d
13 |     residual: bottleneck
14 |     # residual: preactiv_bottleneck
15 |     downscale: maxpool2d_aa
16 |     stem:
17 |       kernel_size: 7
18 |       stride: 2
19 |       padding: 3
20 |   projection:
21 |     block: conv2d
22 |     convolution: conv2d
23 |     activation: relu
24 |   prediction:
25 |     block: conv2d
26 |     convolution: conv2d
27 |     activation: relu
28 |     dropout: 0.0
29 |   merge:
30 |     dropout: 0.0
31 | 


--------------------------------------------------------------------------------
/conf/src/model/modules/models/stacked_hourglass_e2e_2.yaml:
--------------------------------------------------------------------------------
 1 | # @package model.modules.branch2
 2 | 
 3 | _target_: moai.modules.lightning.models.StackedHourglassMod_e2e
 4 | configuration:
 5 |   stacks: 4
 6 |   in_features: 328
 7 |   out_features: 72
 8 |   output: _markers_hms_pred
 9 |   # preproc:
10 |   #   block: conv2d
11 |   #   convolution: conv2d
12 |   #   activation: relu_bn2d
13 |   #   residual: bottleneck
14 |   #   # residual: preactiv_bottleneck
15 |   #   downscale: maxpool2d_aa
16 |   #   stem:
17 |   #     kernel_size: 7
18 |   #     stride: 2
19 |   #     padding: 3
20 |   projection:
21 |     block: conv2d
22 |     convolution: conv2d
23 |     activation: relu
24 |   prediction:
25 |     block: conv2d
26 |     convolution: conv2d
27 |     activation: relu
28 |     dropout: 0.0
29 |   merge:
30 |     dropout: 0.0
31 | 


--------------------------------------------------------------------------------
/conf/src/model/modules/models/stage_transition_standard_1.yaml:
--------------------------------------------------------------------------------
 1 | # @package model.modules.branch1.modules.stage_transition_standard_1
 2 | 
 3 | _target_: moai.modules.lightning.highres.StageTransition
 4 | branched:
 5 |   convolution: conv2d
 6 |   activation: relu
 7 |   downscale: none # maxpool2d
 8 |   kernel_size: 3
 9 |   stride: 2
10 |   padding: 1


--------------------------------------------------------------------------------
/conf/src/model/modules/models/stage_transition_standard_2.yaml:
--------------------------------------------------------------------------------
 1 | # @package model.modules.branch2.modules.stage_transition_standard_2
 2 | 
 3 | _target_: moai.modules.lightning.highres.StageTransition
 4 | branched:
 5 |   convolution: conv2d
 6 |   activation: relu
 7 |   downscale: none # maxpool2d
 8 |   kernel_size: 3
 9 |   stride: 2
10 |   padding: 1


--------------------------------------------------------------------------------
/conf/src/model/modules/models/start_transition_standard_1.yaml:
--------------------------------------------------------------------------------
 1 | # @package model.modules.branch1.modules.start_transition_standard_1
 2 | 
 3 | _target_: moai.modules.lightning.highres.StartTransition
 4 | identity:
 5 |   convolution: conv2d
 6 |   activation: relu
 7 |   kernel_size: 3
 8 |   stride: 1
 9 |   padding: 1
10 | branched:
11 |   convolution: conv2d
12 |   activation: relu
13 |   downscale: none # maxpool2d
14 |   kernel_size: 3
15 |   stride: 2
16 |   padding: 1


--------------------------------------------------------------------------------
/conf/src/model/modules/models/start_transition_standard_2.yaml:
--------------------------------------------------------------------------------
 1 | # @package model.modules.branch2.modules.start_transition_standard_2
 2 | 
 3 | _target_: moai.modules.lightning.highres.StartTransition
 4 | identity:
 5 |   convolution: conv2d
 6 |   activation: relu
 7 |   kernel_size: 3
 8 |   stride: 1
 9 |   padding: 1
10 | branched:
11 |   convolution: conv2d
12 |   activation: relu
13 |   downscale: none # maxpool2d
14 |   kernel_size: 3
15 |   stride: 2
16 |   padding: 1


--------------------------------------------------------------------------------
/conf/src/model/modules/models/top_branch_1.yaml:
--------------------------------------------------------------------------------
1 | # @package model.modules.branch1.modules.top_branch_1
2 | 
3 | _target_: moai.modules.lightning.highres.TopBranchHead
4 | convolution: conv2d
5 | activation: none
6 | kernel_size: 1
7 | padding: 0
8 | inplace: True


--------------------------------------------------------------------------------
/conf/src/model/modules/models/top_branch_2.yaml:
--------------------------------------------------------------------------------
1 | # @package model.modules.branch2.modules.top_branch_2
2 | 
3 | _target_: moai.modules.lightning.highres.TopBranchHead
4 | convolution: conv2d
5 | activation: none
6 | kernel_size: 1
7 | padding: 0
8 | inplace: True


--------------------------------------------------------------------------------
/conf/src/model/monads/distribution/zmean.yaml:
--------------------------------------------------------------------------------
1 | # @package model.monads._name_
2 | 
3 | _target_: src.monads.distribution.zmean.zMean


--------------------------------------------------------------------------------
/conf/src/model/monads/keypoints/fuse_coords.yaml:
--------------------------------------------------------------------------------
1 | # @package model.monads._name_
2 | 
3 | _target_: src.monads.keypoints.fuse_coords.FuseCoords
4 | mode: two


--------------------------------------------------------------------------------
/conf/src/model/validation/metric/human_pose/MAE.yaml:
--------------------------------------------------------------------------------
1 | # @package model.validation.metrics.human_pose__name_
2 | 
3 | _target_: src.validation.metrics.human_pose.mae.MAE


--------------------------------------------------------------------------------
/conf/src/model/validation/metric/human_pose/RMSE.yaml:
--------------------------------------------------------------------------------
1 | # @package model.validation.metrics.human_pose__name_
2 | 
3 | _target_: src.validation.metrics.human_pose.rmse.RMSE


--------------------------------------------------------------------------------
/src/data/datasets/human_pose/H4DIR/h4dir.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | from scipy.spatial.transform import Rotation as R
  4 | import logging
  5 | from src.data.datasets.human_pose.H4DIR.importers import (
  6 | 	load_3d_data
  7 | )
  8 | from src.data.datasets.human_pose.H4DIR.importers import (
  9 |     get_depth_image_from_points,
 10 | 	f_rotate_back,
 11 |     f_rotate_left,
 12 |     f_rotate_right
 13 | )
 14 | 
 15 | from torch.utils.data.dataset import Dataset
 16 | 
 17 | log = logging.getLogger(__name__)
 18 | 
 19 | __all__ = ["H4DIR"]
 20 | 
 21 | class H4DIR(Dataset):
 22 | 	def __init__(self,
 23 | 		root_path, 
 24 | 		markers_out, 
 25 | 		joints_out, 
 26 | 		resolution, 
 27 | 		views,
 28 | 		augment, 
 29 | 		rs,
 30 | 		scale_res=5.0,
 31 | 		x_range = 20.0,
 32 | 		y_range = 360.0,
 33 | 		z_range = 20.0
 34 | 	):
 35 | 		super(H4DIR,self).__init__()
 36 | 		self.root_path = root_path
 37 | 		self.markers_out = markers_out
 38 | 		self.joints_out = joints_out
 39 | 		self.resolution = resolution
 40 | 		self.views = views
 41 | 		self.augment = augment
 42 | 		self.rs = rs
 43 | 		self.scale_res = scale_res
 44 | 		self.x_range = x_range
 45 | 		self.y_range = y_range
 46 | 		self.z_range = z_range
 47 | 
 48 | 		if not os.path.exists(root_path):
 49 | 			raise ValueError("{} does not exist, exiting.".format(root_path))
 50 | 
 51 | 		self.data = {}
 52 | 		# Iterate over each recorded folder
 53 | 		for recording in os.listdir(root_path):
 54 | 			data_path = os.path.join(root_path, recording)
 55 | 			if not os.path.isdir(data_path):
 56 | 				continue
 57 | 			for file in os.listdir(data_path):
 58 | 				full_filename = os.path.join(data_path, file)
 59 | 				filename, ext = os.path.splitext(full_filename)
 60 | 				if ext != ".txt" or "_rs" in filename: # TODO: refactor the data loading and labeling
 61 | 					continue					
 62 | 				splits = file.split("_")
 63 | 				if len(splits) == 3 or len(splits) == 4:
 64 | 					_type = splits[0]
 65 | 					_id = splits[1]
 66 | 					_view = splits[2].split('.')[0]
 67 | 				else:
 68 | 					continue
 69 | 				unique_name = recording + "-" + str(_id)
 70 | 				if _view not in self.views:
 71 | 					continue
 72 | 				if unique_name not in self.data:
 73 | 					self.data[unique_name] = {}
 74 | 				if _view not in self.data[unique_name]:
 75 | 					self.data[unique_name][_view] = {}
 76 | 				self.data[unique_name][_view][_type] = full_filename
 77 | 
 78 | 	def __len__(self):
 79 | 		return len(self.data)
 80 | 
 81 | 	def __getitem__(self, idx):
 82 | 		key = list(self.data.keys())[idx]
 83 | 		datum = self.data[key]
 84 | 		datum_out = {}
 85 | 		random_rot = R.from_euler('xyz', [random.random() * self.x_range - self.x_range // 2, random.random() * self.y_range - self.y_range // 2, random.random() * self.z_range - self.z_range // 2], degrees=True)
 86 | 
 87 | 		if "3d" in self.views:
 88 | 			rs_markers_f, gt_markers_f, gt_joints_f, scale, com = load_3d_data(
 89 | 				datum["3d"]["txt"], 
 90 | 				datum["3d"]["txt"].replace('gt', 'rs'),
 91 | 				self.markers_out, 
 92 | 				self.joints_out,
 93 | 				rs=self.rs, 
 94 | 				S1S4=True if "_S1_" in  datum["3d"]["txt"] or "_S4_" in datum["3d"]["txt"] else False, # TODO: to discuss the marker placement type
 95 | 				random_rot=random_rot,
 96 | 				augment=self.augment
 97 | 			)
 98 | 
 99 | 		for view in self.views:
100 | 			if (view == "3d"):
101 | 				continue
102 | 			else:
103 | 				if (view == 'f'):
104 | 					gt_markers = gt_markers_f.clone()
105 | 					gt_joints = gt_joints_f.clone()
106 | 					rs_markers = rs_markers_f.clone()
107 | 					depth_img = get_depth_image_from_points(self.resolution, self.scale_res, rs_markers.clone())
108 | 				elif (view == 'b'):
109 | 					gt_markers = f_rotate_back(gt_markers_f.clone())
110 | 					gt_joints = f_rotate_back(gt_joints_f.clone())        
111 | 					rs_markers = f_rotate_back(rs_markers_f.clone())
112 | 					depth_img = get_depth_image_from_points(self.resolution, self.scale_res, rs_markers)
113 | 				elif (view == 'l'):     
114 | 					gt_markers = f_rotate_left(gt_markers_f.clone())
115 | 					gt_joints = f_rotate_left(gt_joints_f.clone())        
116 | 					rs_markers = f_rotate_left(rs_markers_f.clone())
117 | 					depth_img = get_depth_image_from_points(self.resolution, self.scale_res, rs_markers)
118 | 				elif (view == 'r'):     
119 | 					gt_markers = f_rotate_right(gt_markers_f.clone())
120 | 					gt_joints = f_rotate_right(gt_joints_f.clone())        
121 | 					rs_markers = f_rotate_right(rs_markers_f.clone())
122 | 					depth_img = get_depth_image_from_points(self.resolution, self.scale_res, rs_markers)
123 | 				else:
124 | 					raise ("Error. View {} is not supported.", view)
125 | 
126 | 			datum_out.update({
127 | 				view + "_depth" : depth_img.squeeze(0),
128 | 				view + "_gt_markers_3d" : gt_markers,
129 | 				view + "_gt_markers_2d" : gt_markers[..., :2],
130 | 				view + "_gt_joints_3d" : gt_joints,
131 | 				view + "_gt_joints_2d" : gt_joints[..., :2],
132 | 				view + "_scale" : scale,
133 | 				view + "_com" : com.unsqueeze(0)
134 | 				})
135 | 		
136 | 		return datum_out
137 | 
138 | 	def get_data(self):
139 | 		return self.data


--------------------------------------------------------------------------------
/src/data/datasets/human_pose/H4DIR/importers/__init__.py:
--------------------------------------------------------------------------------
 1 | from src.data.datasets.human_pose.H4DIR.importers.loader import (
 2 |     load_3d_data
 3 | )
 4 | from src.data.datasets.human_pose.H4DIR.importers.enums import (
 5 |     joint_selection,
 6 | )
 7 | from src.data.datasets.human_pose.H4DIR.importers.markermap import (
 8 |     MARKER_S1S4,
 9 |     MARKER_S1S4_new,
10 |     MARKER_S2S3,
11 |     MARKER_S2S3_new,
12 |     S1S4_Mapping, 
13 |     S2S3_Mapping,
14 | )
15 | from src.data.datasets.human_pose.H4DIR.importers.image import (
16 |     get_depth_image_from_points
17 | )
18 | from src.data.datasets.human_pose.H4DIR.importers.projections import (
19 |     f_rotate_back,
20 |     f_rotate_left,
21 |     f_rotate_right
22 | )


--------------------------------------------------------------------------------
/src/data/datasets/human_pose/H4DIR/importers/enums.py:
--------------------------------------------------------------------------------
  1 | def get_markers():
  2 |     MARKERS = {}
  3 | 
  4 |     MARKERS["01"] = (4, 106, 1+ 100)     # 00 spinebase
  5 |     MARKERS["02"] = (8, 104, 2+ 100)     # 01 left chest
  6 |     MARKERS["03"] = (12, 102, 3+ 100)   # 02 right chest
  7 |     MARKERS["04"] = (16, 100, 4+ 100)   # 03 left head
  8 |     MARKERS["05"] = (20, 98, 5+ 100)   # 04 right head
  9 |     MARKERS["06"] = (24, 96, 6+ 100)   # 05 back_head
 10 |     MARKERS["07"] = (28, 94, 7+ 100)   # 06 back_high
 11 |     MARKERS["08"] = (32, 92, 8+ 100)   # 07 back_low
 12 |     MARKERS["09"] = (36, 90, 9+ 100)   # 08 left b_shoulder
 13 |     MARKERS["10"] = (40, 88, 10+ 100)   # 09 left f_shoulder
 14 |     MARKERS["11"] = (44, 86, 11+ 100)   # 10 left upperarm
 15 |     MARKERS["12"] = (48, 84, 12+ 100)   # 11 left forearm
 16 |     MARKERS["13"] = (52, 82, 13+ 100)   # 12 right b_shoulder
 17 |     MARKERS["14"] = (56, 80, 14+ 100)   # 13 right f_shoulder
 18 |     MARKERS["15"] = (60, 78, 15+ 100)   # 14 right upperarm
 19 |     MARKERS["16"] = (64, 76, 16+ 100)   # 15 right forearm   
 20 |     MARKERS["17"] = (68, 74, 17+ 100)   # 16 left pelvis
 21 |     MARKERS["18"] = (72, 72, 18+ 100)   # 17 left thigh
 22 |     MARKERS["19"] = (76, 70, 19+ 100)   # 18 left calf
 23 |     MARKERS["20"] = (80, 68, 20+ 100)   # 19 right pelvis
 24 |     MARKERS["21"] = (84, 66, 21+ 100)    # 20 right thigh
 25 |     MARKERS["22"] = (88, 64, 22+ 100)    # 21 right calf
 26 | 
 27 |     MARKERS["23"] = (92, 62, 23+ 100)   # 22 left hand
 28 |     MARKERS["24"] = (96, 60, 24+ 100)   # 23 left foot
 29 | 
 30 |     MARKERS["25"] = (100, 58, 25+ 100)   # 24 right hand
 31 |     MARKERS["26"] = (104, 56, 26+ 100)    # 25 right foot
 32 | 
 33 |     MARKERS["27"] = (108, 54, 27+ 100)   # 22 left hand
 34 |     MARKERS["28"] = (112, 52, 28+ 100)   # 23 left foot
 35 | 
 36 |     MARKERS["29"] = (116, 50, 29+ 100)   # 24 right hand
 37 |     MARKERS["30"] = (120, 48, 30+ 100)    # 25 right foot
 38 | 
 39 |     MARKERS["31"] = (124, 46, 31+ 100)     # 00 spinebase
 40 |     MARKERS["32"] = (128, 44, 32+ 100)     # 01 left chest
 41 |     MARKERS["33"] = (132, 42, 33+ 100)   # 02 right chest
 42 |     MARKERS["34"] = (136, 40, 34+ 100)   # 03 left head
 43 |     MARKERS["35"] = (140, 38, 35+ 100)   # 04 right head
 44 |     MARKERS["36"] = (144, 36, 36+ 100)   # 05 back_head
 45 |     MARKERS["37"] = (148, 34, 37+ 100)   # 06 back_high
 46 |     MARKERS["38"] = (152, 32, 38+ 100)   # 07 back_low
 47 |     MARKERS["39"] = (156, 30, 39+ 100)   # 08 left b_shoulder
 48 |     MARKERS["40"] = (160, 28, 40+ 100)   # 09 left f_shoulder
 49 |     MARKERS["41"] = (164, 26, 41+ 100)   # 10 left upperarm
 50 |     MARKERS["42"] = (168, 24, 42+ 100)   # 11 left forearm
 51 |     MARKERS["43"] = (172, 22, 43+ 100)   # 12 right b_shoulder
 52 |     MARKERS["44"] = (176, 20, 44+ 100)   # 13 right f_shoulder
 53 |     MARKERS["45"] = (180, 18, 45+ 100)   # 14 right upperarm
 54 |     MARKERS["46"] = (184, 16, 46+ 100)   # 15 right forearm   
 55 |     MARKERS["47"] = (188, 14, 47+ 100)   # 16 left pelvis
 56 |     MARKERS["48"] = (192, 12, 48+ 100)   # 17 left thigh
 57 |     MARKERS["49"] = (196, 10, 49+ 100)   # 18 left calf
 58 |     MARKERS["50"] = (200, 8, 50+ 100)   # 19 right pelvis
 59 |     MARKERS["51"] = (204, 6, 51+ 100)    # 20 right thigh
 60 |     MARKERS["52"] = (208, 4, 52+ 100)    # 21 right calf
 61 |     MARKERS["53"] = (216, 2, 53+ 100)    # 21 right calf
 62 | 
 63 |     
 64 | 
 65 | 
 66 |     return MARKERS
 67 | 
 68 | def get_markers_deepmocap():
 69 |     MARKERS = {}
 70 | 
 71 |     MARKERS["01"] = (0, 255, 0+ 100)     # 00 spinebase
 72 |     MARKERS["02"] = (255, 0, 0+ 100)     # 01 left chest
 73 |     MARKERS["03"] = (255, 255, 0+ 100)   # 02 right chest
 74 |     MARKERS["04"] = (0, 255, 255+ 100)   # 03 left head
 75 |     MARKERS["05"] = (255, 0, 255+ 100)   # 04 right head
 76 |     MARKERS["06"] = (185, 255, 0+ 100)   # 05 back_head
 77 |     MARKERS["07"] = (0, 185, 255+ 100)   # 06 back_high
 78 |     MARKERS["08"] = (255, 0, 185+ 100)   # 07 back_low
 79 |     MARKERS["09"] = (185, 0, 255+ 100)   # 08 left b_shoulder
 80 |     MARKERS["10"] = (0, 255, 185+ 100)   # 09 left f_shoulder
 81 |     MARKERS["11"] = (255, 185, 0+ 100)   # 10 left upperarm
 82 |     MARKERS["12"] = (132, 0, 255+ 100)   # 11 left forearm
 83 |     MARKERS["13"] = (0, 255, 132+ 100)   # 22 left hand
 84 |     MARKERS["14"] = (255, 132, 0+ 100)   # 12 right b_shoulder
 85 |     MARKERS["15"] = (224, 255, 0+ 100)   # 13 right f_shoulder
 86 |     MARKERS["16"] = (0, 225, 255+ 100)   # 14 right upperarm
 87 |     MARKERS["17"] = (255, 0, 225+ 100)   # 15 right forearm   
 88 |     MARKERS["18"] = (138, 255, 0+ 100)   # 24 right hand    
 89 |     MARKERS["19"] = (0, 138, 255+ 100)   # 16 left pelvis
 90 |     MARKERS["20"] = (255, 0, 138+ 100)   # 17 left thigh
 91 |     MARKERS["21"] = (222, 0, 255+ 100)   # 18 left calf
 92 |     MARKERS["22"] = (0, 255, 222+ 100)   # 23 left foot
 93 |     MARKERS["23"] = (255, 222, 0+ 100)   # 19 right pelvis
 94 |     MARKERS["24"] = (97, 0, 255+ 100)    # 20 right thigh
 95 |     MARKERS["25"] = (0, 255, 97+ 100)    # 21 right calf
 96 |     MARKERS["26"] = (255, 95, 0+ 100)    # 25 right foot
 97 | 
 98 | 
 99 |     return MARKERS
100 | 
101 | joint_selection = [
102 |     0,  # Hips
103 |     2,  # Spine1
104 |     3,  # Spine2
105 |     5,  # Neck
106 |     8,  # Head
107 |     10, # RightArm
108 |     11, # RightForeArm
109 |     12, # RightHand
110 |     16, # LeftArm
111 |     17, # LeftForeArm
112 |     18, # LeftHand
113 |     21, # RightUpLeg
114 |     22, # RightLeg
115 |     23, # RightFoot
116 |     25, # RightToeBase
117 |     # 25, # LeftUpLeg
118 |     # 27, # LeftLeg
119 |     # 28, # LeftFoot
120 |     # 30, # LeftForeFoot
121 |     # 31, # LeftToeBase
122 |     27, # LeftUpLeg
123 |     28, # LeftLeg
124 |     29, # LeftFoot
125 |     31, # LeftToeBase
126 | ]
127 | 
128 | # 0	Hips
129 | # 1	Spine
130 | # 2	Spine1
131 | # 3	Spine2
132 | # 4	Spine3
133 | # 5	Neck
134 | # 6	Neck1
135 | # 7	Head
136 | # 8	HeadEnd
137 | # 9	RightShoulder
138 | # 10	RightArm
139 | # 11	RightForeArm
140 | # 12	RightHand
141 | # 13	RightHandThumb1
142 | # 14	RightHandMiddle1
143 | # 15	LeftShoulder
144 | # 16	LeftArm
145 | # 17	LeftForeArm
146 | # 18	LeftHand
147 | # 19	LeftHandThumb1
148 | # 20	LeftHandMiddle1
149 | # 21	RightUpLeg
150 | # 22	RightLeg
151 | # 23	RightFoot
152 | # 24	RightForeFoot
153 | # 25	RightToeBase
154 | # 26	RightToeBaseEnd
155 | # 27	LeftUpLeg
156 | # 28	LeftLeg
157 | # 29	LeftFoot
158 | # 30	LeftForeFoot
159 | # 31	LeftToeBase
160 | # 32	LeftToeBaseEnd
161 | 
162 | joint_selection2 = [
163 |     0,  # Hips
164 |     11,  # Spine1
165 |     12,  # Spine2
166 |     20,  # Neck
167 |     21,  # Head
168 |     24, # RightArm
169 |     25, # RightForeArm
170 |     26, # RightHand
171 |     14, # LeftArm
172 |     15, # LeftForeArm
173 |     16, # LeftHand
174 |     6, # RightUpLeg
175 |     7, # RightLeg
176 |     8, # RightFoot
177 |     9, # RightToeBase
178 |     # 25, # LeftUpLeg
179 |     # 27, # LeftLeg
180 |     # 28, # LeftFoot
181 |     # 30, # LeftForeFoot
182 |     # 31, # LeftToeBase
183 |     1, # LeftUpLeg
184 |     2, # LeftLeg
185 |     3, # LeftFoot
186 |     4, # LeftToeBase
187 | ]
188 | 
189 | marker_mapping_sfu_2_h4d = [
190 |     0,
191 |     1,
192 |     2,
193 |     3,
194 |     4,
195 |     5,
196 |     6,
197 |     7,
198 |     8,
199 |     10,
200 |     
201 | ]
202 | 
203 | 


--------------------------------------------------------------------------------
/src/data/datasets/human_pose/H4DIR/importers/image.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import torch
 3 | import numpy as np
 4 | 
 5 | # TODO: avoid the back and forth between torch and numpy
 6 | def get_depth_image_from_points(resolution, res_scale, points):
 7 |     depth_tensor = torch.zeros([1, 1, int(resolution), int(resolution)], dtype=torch.float32)
 8 |     width = resolution * res_scale
 9 |     height = resolution * res_scale
10 |     depth_img = np.zeros([int(height), int(width), 1])
11 |     points_np = points.cpu().numpy()
12 |     points_np = np.asarray(sorted(points_np.squeeze(), key=lambda x: x[..., 2]))[::-1]
13 | 
14 |     for i in range(points_np.shape[0]):
15 |         norm_x_value = points_np[i, 0]
16 |         norm_y_value = points_np[i, 1]
17 |         norm_depth_value = points_np[i, 2]
18 |         y = int(norm_y_value * height) - 1
19 |         x = int(norm_x_value * width) - 1
20 |         offset = 0
21 |         if (x > offset-1 and x < width-offset and y > offset-1 and y < height-offset):
22 |             depth_img = cv2.circle(depth_img, (x, y), 2 * int(res_scale), float(norm_depth_value), -1)
23 |         else:
24 |             print("error")
25 | 
26 |     depth_img = cv2.resize(depth_img, (int(resolution), int(resolution)), interpolation=cv2.INTER_LINEAR_EXACT)
27 |     depth_tensor[0, 0, ...] = torch.from_numpy(depth_img)
28 |     return depth_tensor
29 | 


--------------------------------------------------------------------------------
/src/data/datasets/human_pose/H4DIR/importers/loader.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from  src.data.datasets.human_pose.H4DIR.importers.enums import (
  4 |     joint_selection
  5 | )
  6 | from  src.data.datasets.human_pose.H4DIR.importers.markermap import (
  7 |     S1S4_Mapping, 
  8 |     S2S3_Mapping
  9 | )
 10 | 
 11 | __all__ = [
 12 |     'load_3d_data',
 13 | ]
 14 | 
 15 | def load_3d_data(filename, filename_rs, markers_out, joints_out, \
 16 |         rs=True, random_rot=None, augment=False, S1S4=True, \
 17 |         scale_on_image=1.25, data_type=torch.float32):
 18 |     # ground truth data
 19 |     file = open(filename, "r")
 20 |     lines = file.readlines()
 21 |     raw_points = np.zeros([markers_out + joints_out, 3], dtype=float)
 22 |     # num in the original ground truth data
 23 |     NUM_OF_MARKERS_IN_GT = 53
 24 |     NUM_OF_JOINTS_IN_GT = 33
 25 |     NUM_OF_LINES = NUM_OF_MARKERS_IN_GT + NUM_OF_JOINTS_IN_GT
 26 |     line_counter = 0
 27 |     j_counter = 0
 28 |     assert(len(lines) == NUM_OF_LINES)
 29 |     for line in lines:
 30 |         index = line_counter % NUM_OF_LINES
 31 |         if (joints_out == 0 and index == markers_out):
 32 |             break
 33 |         values = line.split(' ')     
 34 |         x_gt = float(values[2])
 35 |         y_gt = float(values[3])
 36 |         z_gt = float(values[1])
 37 |         if (index < NUM_OF_MARKERS_IN_GT and index < markers_out):
 38 |             raw_points[j_counter, 0] = x_gt
 39 |             raw_points[j_counter, 1] = -y_gt # (-) for imaging
 40 |             raw_points[j_counter, 2] = z_gt
 41 |             j_counter += 1   
 42 |         elif (index - NUM_OF_MARKERS_IN_GT) < NUM_OF_JOINTS_IN_GT and (index - NUM_OF_MARKERS_IN_GT) in joint_selection:            
 43 |             raw_points[j_counter, 0] = x_gt
 44 |             raw_points[j_counter, 1] = -y_gt # (-) for imaging
 45 |             raw_points[j_counter, 2] = z_gt
 46 |             j_counter += 1   
 47 |         line_counter += 1
 48 |     # raw points list to be filled either with raw or noisy vicon data
 49 |     raw_rs_points_list = []
 50 |     if (rs):
 51 |         file_rs = open(filename_rs, "r")
 52 |         lines_rs = file_rs.readlines()
 53 |         for line in lines_rs:
 54 |             values = line.split(' ') 
 55 |             x_rs = float(values[1])
 56 |             y_rs = float(values[2])
 57 |             z_rs = float(values[0])
 58 |             # this is thresholding the floor and the top. TODO: to be better investigated
 59 |             if y_rs > -830 and y_rs < 1170:
 60 |                 raw_rs_points_list.append([x_rs, -y_rs, z_rs]) # (-) for imaging
 61 |         rs_raw_points = np.asarray(raw_rs_points_list)        
 62 |     else:
 63 |         raw_rs_points_list = raw_points
 64 |     # subtraction of CoM and rotational augmentation
 65 |     com = np.mean(rs_raw_points, axis=0)
 66 |     raw_points -= com
 67 |     rs_raw_points -= com
 68 |     if augment:
 69 |         raw_points = random_rot.apply(raw_points)
 70 |         rs_raw_points = random_rot.apply(rs_raw_points)
 71 | 
 72 |     points = np.zeros([(markers_out + joints_out), 3], dtype=float)
 73 |     rs_points = rs_raw_points   
 74 | 
 75 |     minval_z = 100000
 76 |     maxval_z = 0
 77 |     minval_y = minval_z
 78 |     maxval_y = maxval_z
 79 |     minval_x = minval_z
 80 |     maxval_x = maxval_z
 81 |     counter = 0    
 82 | 
 83 |     for index in range(raw_points.shape[0]): 
 84 |         if (index < markers_out):
 85 |             if (S1S4):
 86 |                 points[index] = raw_points[S1S4_Mapping[index]]
 87 |             else:
 88 |                 points[index] = raw_points[S2S3_Mapping[index]]
 89 |         else:
 90 |             points[index] = raw_points[index]
 91 |         counter += 1
 92 |             
 93 |     minval_x = np.minimum(np.min(rs_points[..., 0]), np.min(points[..., 0]))
 94 |     maxval_x = np.maximum(np.max(rs_points[..., 0]), np.max(points[..., 0]))
 95 |     minval_y = np.minimum(np.min(rs_points[..., 1]), np.min(points[..., 1]))
 96 |     maxval_y = np.maximum(np.max(rs_points[..., 1]), np.max(points[..., 1]))
 97 |     minval_z = np.minimum(np.min(rs_points[..., 2]), np.min(points[..., 2]))
 98 |     maxval_z = np.maximum(np.max(rs_points[..., 2]), np.max(points[..., 2]))
 99 |     
100 |     tcom = torch.from_numpy(com).type(torch.FloatTensor)
101 |     scale = torch.zeros([3], dtype=torch.float32)
102 |     gt_markers = torch.zeros([markers_out, 3])                  
103 |     gt_joints = torch.zeros([joints_out, 3])
104 |     scale_on_image_offset = (1.0 - 1.0 / scale_on_image) / 2.0
105 |     for i in range(0, markers_out + joints_out):
106 |         scale[0] = scale_on_image * float(maxval_x - minval_x)
107 |         scale[1] = scale_on_image * float(maxval_y - minval_y)
108 |         scale[2] = scale_on_image * float(maxval_z - minval_z)
109 | 
110 |         norm_depth_value = (float(points[i][2]) - float(minval_z)) \
111 |             / scale[2] \
112 |         + scale_on_image_offset
113 |         norm_x_value = (float(points[i][0]) - float(minval_x)) \
114 |                 / scale[0] \
115 |         + scale_on_image_offset
116 |         norm_y_value = (float(points[i][1]) - float(minval_y)) \
117 |                 / scale[1] \
118 |         + scale_on_image_offset
119 | 
120 |         if (i < markers_out):
121 |             gt_markers[i][0] = norm_x_value
122 |             gt_markers[i][1] = norm_y_value
123 |             gt_markers[i][2] = norm_depth_value
124 |         else:
125 |             gt_joints[i-markers_out][0] = norm_x_value
126 |             gt_joints[i-markers_out][1] = norm_y_value
127 |             gt_joints[i-markers_out][2] = norm_depth_value            
128 | 
129 |     rs_markers_ori = torch.zeros([rs_raw_points.shape[0], rs_raw_points.shape[1]]) 
130 | 
131 |     tcom[0] += - scale[0] / 2 + scale[0] / scale_on_image / 2 + float(minval_x)
132 |     tcom[1] += - scale[1] / 2 + scale[1] / scale_on_image / 2 + float(minval_y)
133 |     tcom[2] += - scale[2] / 2 + scale[2] / scale_on_image / 2 + float(minval_z)    
134 | 
135 |     for i in range(rs_raw_points.shape[0]):
136 |         norm_depth_value = (float(rs_points[i][2]) - float(minval_z)) \
137 |             / scale[2] \
138 |         + scale_on_image_offset
139 |         norm_x_value = (float(rs_points[i][0]) - float(minval_x)) \
140 |                 / scale[0] \
141 |         + scale_on_image_offset
142 |         norm_y_value = (float(rs_points[i][1]) - float(minval_y)) \
143 |                 / scale[1] \
144 |         + scale_on_image_offset
145 | 
146 |         rs_markers_ori[i][0] = norm_x_value
147 |         rs_markers_ori[i][1] = norm_y_value
148 |         rs_markers_ori[i][2] = norm_depth_value
149 | 
150 |     return rs_markers_ori, gt_markers, gt_joints, scale, tcom
151 |     
152 |    
153 | 
154 | 


--------------------------------------------------------------------------------
/src/data/datasets/human_pose/H4DIR/importers/markermap.py:
--------------------------------------------------------------------------------
  1 | from enum import Enum
  2 | class MARKER_S1S4(Enum):
  3 |     ARIEL=0,
  4 |     C7=1,
  5 |     CLAV=2,
  6 |     LANK=3,
  7 |     LBHD=4,
  8 |     LBSH=5,
  9 |     LBWT=6,
 10 |     LELB=7,
 11 |     LFHD=8,
 12 |     LFRM=9,
 13 |     LFSH=10,
 14 |     LFWT=11,
 15 |     LHEL=12,
 16 |     LIEL=13,
 17 |     LIHAND=14,
 18 |     LIWR=15,
 19 |     LKNE=16,
 20 |     LKNI=17,
 21 |     LMT1=18,
 22 |     LMT5=19,
 23 |     LOHAND=20,
 24 |     LOWR=21,
 25 |     LSHN=22,
 26 |     LTHI=23,
 27 |     LTOE=24,
 28 |     LUPA=25,
 29 |     MBWT=26,
 30 |     MFWT=27,
 31 |     RANK=28,
 32 |     RBHD=29,
 33 |     RBSH=30,
 34 |     RBWT=31,
 35 |     RELB=32,
 36 |     RFHD=33,
 37 |     RFRM=34,
 38 |     RFSH=35,
 39 |     RFWT=36,
 40 |     RHEL=37,
 41 |     RIEL=38,
 42 |     RIHAND=39,
 43 |     RIWR=40,
 44 |     RKNE=41,
 45 |     RKNI=42,
 46 |     RMT1=43,
 47 |     RMT5=44,
 48 |     ROHAND=45,
 49 |     ROWR=46,
 50 |     RSHN=47,
 51 |     RTHI=48,
 52 |     RTOE=49,
 53 |     RUPA=50,
 54 |     STRN=51,
 55 |     T10=52,
 56 | 
 57 | class MARKER_S2S3(Enum):
 58 |     ARIEL=0,
 59 |     C7=1,
 60 |     CLAV=2,
 61 |     LANK=3,
 62 |     LBHD=4,
 63 |     LBSH=5,
 64 |     LBWT=6,
 65 |     LELB=7,
 66 |     LFHD=8,
 67 |     LFRM=9,
 68 |     LFSH=10,
 69 |     LFWT=11,
 70 |     LHEL=12,
 71 |     LIEL=13,
 72 |     LIHAND=14,
 73 |     LIWR=15,
 74 |     LKNE=16,
 75 |     LKNI=17,
 76 |     LMT1=18,
 77 |     LMT5=19,
 78 |     LMWT=20,
 79 |     LOHAND=21,
 80 |     LOWR=22,
 81 |     LSHN=23,
 82 |     LTHI=24,
 83 |     LTOE=25,
 84 |     LUPA=26,
 85 |     RANK=27,
 86 |     RBHD=28,
 87 |     RBSH=29,
 88 |     RBWT=30,
 89 |     RELB=31,
 90 |     RFHD=32,
 91 |     RFRM=33,
 92 |     RFSH=34,
 93 |     RFWT=35,
 94 |     RHEL=36,
 95 |     RIEL=37,
 96 |     RIHAND=38,
 97 |     RIWR=39,
 98 |     RKNE=40,
 99 |     RKNI=41,
100 |     RMT1=42,
101 |     RMT5=43,
102 |     RMWT=44,
103 |     ROHAND=45,
104 |     ROWR=46,
105 |     RSHN=47,
106 |     RTHI=48,
107 |     RTOE=49,
108 |     RUPA=50,
109 |     STRN=51,
110 |     T10=52,
111 | 
112 | S1S4_Mapping = [
113 |     0,
114 |     1,
115 |     2,
116 |     3,
117 |     4,
118 |     5,
119 |     6,
120 |     7,
121 |     8,
122 |     9,
123 |     10,
124 |     11,
125 |     12,
126 |     13,
127 |     14,
128 |     15,
129 |     16,
130 |     17,
131 |     18,
132 |     19,
133 |     20,
134 |     21,
135 |     22,
136 |     23,
137 |     24,
138 |     25,
139 |     28,
140 |     29,
141 |     30,
142 |     31,
143 |     32,
144 |     33,
145 |     34,
146 |     35,
147 |     36,
148 |     37,
149 |     38,
150 |     39,
151 |     40,
152 |     41,
153 |     42,
154 |     43,
155 |     44,
156 |     45,
157 |     46,
158 |     47,
159 |     48,
160 |     49,
161 |     50,
162 |     51,
163 |     52,
164 |     26,
165 |     27
166 | ]
167 | 
168 | 
169 | S2S3_Mapping = [
170 |     0,
171 |     1,
172 |     2,
173 |     3,
174 |     4,
175 |     5,
176 |     6,
177 |     7,
178 |     8,
179 |     9,
180 |     10,
181 |     11,
182 |     12,
183 |     13,
184 |     14,
185 |     15,
186 |     16,
187 |     17,
188 |     18,
189 |     19,
190 |     21,
191 |     22,
192 |     23,
193 |     24,
194 |     25,
195 |     26,
196 |     27,
197 |     28,
198 |     29,
199 |     30,
200 |     31,
201 |     32,
202 |     33,
203 |     34,
204 |     35,
205 |     36,
206 |     37,
207 |     38,
208 |     39,
209 |     40,
210 |     41,
211 |     42,
212 |     43,
213 |     45,
214 |     46,
215 |     47,
216 |     48,
217 |     49,
218 |     50,
219 |     51,
220 |     52,
221 |     20,
222 |     44
223 | ]
224 | 
225 | class MARKER_S1S4_new(Enum):
226 |     ARIEL=0,
227 |     C7=1,
228 |     CLAV=2,
229 |     LANK=3,
230 |     LBHD=4,
231 |     LBSH=5,
232 |     LBWT=6,
233 |     LELB=7,
234 |     LFHD=8,
235 |     LFRM=9,
236 |     LFSH=10,
237 |     LFWT=11,
238 |     LHEL=12,
239 |     LIEL=13,
240 |     LIHAND=14,
241 |     LIWR=15,
242 |     LKNE=16,
243 |     LKNI=17,
244 |     LMT1=18,
245 |     LMT5=19,
246 |     LOHAND=20,
247 |     LOWR=21,
248 |     LSHN=22,
249 |     LTHI=23,
250 |     LTOE=24,
251 |     LUPA=25,
252 |     RANK=26,
253 |     RBHD=27,
254 |     RBSH=28,
255 |     RBWT=29,
256 |     RELB=30,
257 |     RFHD=31,
258 |     RFRM=32,
259 |     RFSH=33,
260 |     RFWT=34,
261 |     RHEL=35,
262 |     RIEL=36,
263 |     RIHAND=37,
264 |     RIWR=38,
265 |     RKNE=39,
266 |     RKNI=40,
267 |     RMT1=41,
268 |     RMT5=42,
269 |     ROHAND=43,
270 |     ROWR=44,
271 |     RSHN=45,
272 |     RTHI=46,
273 |     RTOE=47,
274 |     RUPA=48,
275 |     STRN=49,
276 |     T10=50,
277 |     MBWT=51,
278 |     MFWT=52,
279 | 
280 | class MARKER_S2S3_new(Enum):
281 |     ARIEL=0,
282 |     C7=1,
283 |     CLAV=2,
284 |     LANK=3,
285 |     LBHD=4,
286 |     LBSH=5,
287 |     LBWT=6,
288 |     LELB=7,
289 |     LFHD=8,
290 |     LFRM=9,
291 |     LFSH=10,
292 |     LFWT=11,
293 |     LHEL=12,
294 |     LIEL=13,
295 |     LIHAND=14,
296 |     LIWR=15,
297 |     LKNE=16,
298 |     LKNI=17,
299 |     LMT1=18,
300 |     LMT5=19,
301 |     LOHAND=20,
302 |     LOWR=21,
303 |     LSHN=22,
304 |     LTHI=23,
305 |     LTOE=24,
306 |     LUPA=25,
307 |     RANK=26,
308 |     RBHD=27,
309 |     RBSH=28,
310 |     RBWT=29,
311 |     RELB=30,
312 |     RFHD=31,
313 |     RFRM=32,
314 |     RFSH=33,
315 |     RFWT=34,
316 |     RHEL=35,
317 |     RIEL=36,
318 |     RIHAND=37,
319 |     RIWR=38,
320 |     RKNE=39,
321 |     RKNI=40,
322 |     RMT1=41,
323 |     RMT5=42,
324 |     ROHAND=43,
325 |     ROWR=44,
326 |     RSHN=45,
327 |     RTHI=46,
328 |     RTOE=47,
329 |     RUPA=48,
330 |     STRN=49,
331 |     T10=50,
332 |     LMWT=51,
333 |     RMWT=52,


--------------------------------------------------------------------------------
/src/data/datasets/human_pose/H4DIR/importers/projections.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy
  3 | from scipy.spatial.transform import Rotation as R
  4 | 
  5 | 
  6 | def f_rotate_back(coords):
  7 |     coords = coords.detach().clone()
  8 |     coords[..., 0] = 1.0 - coords[..., 0]
  9 |     coords[..., 2] = 1.0 - coords[..., 2]
 10 |     return coords
 11 | 
 12 | def f_rotate_right(coords):
 13 |     rot = torch.tensor([[
 14 |         [0.0, 0.0, -1.0],
 15 |         [0.0, 1.0, 0.0],
 16 |         [1.0, 0.0, 0.0],
 17 |     ]]).float()
 18 |     xformed_t = coords.detach().clone().float()
 19 |     xformed_t = rot @ xformed_t.permute(0, 2, 1)
 20 |     xformed_t += torch.tensor([1.0, 0.0, 0.0]).expand(1, xformed_t.size()[2], xformed_t.size()[1]).permute(0, 2, 1)
 21 |     return xformed_t.permute(0, 2, 1)
 22 |     # return torch.from_numpy(numpy.expand_dims(xformed, axis=0))
 23 | 
 24 | 
 25 | def f_rotate_left(coords):
 26 |     rot = torch.tensor([[
 27 |         [0.0, 0.0, 1.0],
 28 |         [0.0, 1.0, 0.0],
 29 |         [-1.0, 0.0, 0.0],
 30 |     ]]).float()
 31 |     xformed_t = coords.detach().clone().float()
 32 |     xformed_t = rot @ xformed_t.permute(0, 2, 1)
 33 |     xformed_t += torch.tensor([0.0, 0.0, 1.0]).expand(1, xformed_t.size()[2], xformed_t.size()[1]).permute(0, 2, 1)
 34 |     return xformed_t.permute(0, 2, 1)
 35 | 
 36 | def rotate_back(tensor, centered = True, masked=False):
 37 |     t = tensor.clone().detach()
 38 | 
 39 |     if (centered):
 40 |         if (masked):
 41 |             mask_0 = (t[..., 0] > 0).type(torch.FloatTensor).to('cuda')
 42 |             mask_2 = (t[..., 2] > 0).type(torch.FloatTensor).to('cuda')
 43 |             t[..., 0] = mask_0 * (- t[..., 0])
 44 |             t[..., 2] = mask_2 * (1 - t[..., 2])
 45 |         else:            
 46 |             t[..., 0] = - t[..., 0]
 47 |             t[..., 2] = (1 - t[..., 2])
 48 |     else:
 49 |         if (masked):
 50 |             mask_0 = (t[..., 0] > 0).type(torch.FloatTensor).to('cuda')
 51 |             mask_2 = (t[..., 2] > 0).type(torch.FloatTensor).to('cuda')
 52 |             t[..., 0] = mask_0 * (1 - t[..., 0])
 53 |             t[..., 2] = mask_2 * (1 - t[..., 2])
 54 |         else:
 55 |             t[..., 0] = 1 - t[..., 0]
 56 |             t[..., 2] = 1 - t[..., 2]
 57 |         
 58 |     return t
 59 | 
 60 | def rotate_back_(tensor, centered = True, masked=False):
 61 |     t = tensor
 62 | 
 63 |     if (centered):
 64 |         if (masked):
 65 |             mask_0 = (t[..., 0] > 0).type(torch.FloatTensor).to('cuda')
 66 |             mask_2 = (t[..., 2] > 0).type(torch.FloatTensor).to('cuda')
 67 |             t[..., 0] = mask_0 * (- t[..., 0])
 68 |             t[..., 2] = mask_2 * (1 - t[..., 2])
 69 |         else:            
 70 |             t[..., 0] = - t[..., 0]
 71 |             t[..., 2] = (1 - t[..., 2])
 72 |     else:
 73 |         if (masked):
 74 |             mask_0 = (t[..., 0] > 0).type(torch.FloatTensor).to('cuda')
 75 |             mask_2 = (t[..., 2] > 0).type(torch.FloatTensor).to('cuda')
 76 |             t[..., 0] = mask_0 * (1 - t[..., 0])
 77 |             t[..., 2] = mask_2 * (1 - t[..., 2])
 78 |         else:
 79 |             t[..., 0] = 1 - t[..., 0]
 80 |             t[..., 2] = 1 - t[..., 2]
 81 |         
 82 |     return t
 83 | 
 84 | def create_image_domain_grid(width, height, data_type=torch.float32):        
 85 |     v_range = (
 86 |         torch.arange(0, height) # [0 - h]
 87 |         .view(1, height, 1) # [1, [0 - h], 1]
 88 |         .expand(1, height, width) # [1, [0 - h], W]
 89 |         .type(data_type)  # [1, H, W]
 90 |     )
 91 |     u_range = (
 92 |         torch.arange(0, width) # [0 - w]
 93 |         .view(1, 1, width) # [1, 1, [0 - w]]
 94 |         .expand(1, height, width) # [1, H, [0 - w]]
 95 |         .type(data_type)  # [1, H, W]
 96 |     )
 97 |     ones = (
 98 |         torch.ones(1, height, width) # [1, H, W] := 1
 99 |         .type(data_type)
100 |     )
101 |     return torch.stack((u_range, v_range, ones), dim=1)  # [1, 3, H, W]
102 | 
103 | def project_points_to_uvs(points, intrinsics):
104 |     b, _, h, w = points.size()  # [B, 3, H, W]
105 |     x_coordinate3d = points[:, 0] #TODO: check if adding small value makes sense to avoid zeros?
106 |     y_coordinate3d = points[:, 1]
107 |     z_coordinate3d = points[:, 2].clamp(min=1e-3)
108 |     x_homogeneous = x_coordinate3d / z_coordinate3d
109 |     y_homogeneous = y_coordinate3d / z_coordinate3d
110 |     ones = z_coordinate3d.new_ones(z_coordinate3d.size())
111 |     homogeneous_coordinates = ( # (x/z, y/z, 1.0)
112 |         torch.stack([x_homogeneous, y_homogeneous, ones], dim=1)  # [B, 3, H, W]
113 |         .reshape(b, 3, -1) # [B, 3, H*W]
114 |     )
115 |     uv_coordinates = intrinsics @ homogeneous_coordinates # [B, 3, H*W]
116 |     return ( # image domain coordinates
117 |         uv_coordinates[:, :2, :] # [B, 2, H*W]
118 |         .reshape(b, 2, h, w) # [B, 2, H, W]
119 |     ) # [B, 2, H, W]
120 | 
121 | 
122 | def project_single_point_to_uv(point, intrinsics):
123 |     x_coordinate3d = point[0] #TODO: check if adding small value makes sense to avoid zeros?
124 |     y_coordinate3d = point[1]
125 |     z_coordinate3d = point[2]
126 |     x_homogeneous = x_coordinate3d / z_coordinate3d
127 |     y_homogeneous = y_coordinate3d / z_coordinate3d
128 | 
129 |     homogeneous_coordinates = (x_homogeneous, y_homogeneous, 1)
130 |     uv_coordinates = intrinsics.numpy() @ homogeneous_coordinates # [B, 3, H*W]
131 |     return uv_coordinates[:2]
132 | 
133 | def normalize_uvs(uvs):
134 |     _, __, h, w = uvs.size()
135 |     normalized_u = 2 * uvs[:, 0, :, :] / (w - 1) - 1
136 |     normalized_v = 2 * uvs[:, 1, :, :] / (h - 1) - 1
137 |     return torch.stack([normalized_u, normalized_v], dim=1)\
138 |         .clamp(min=-1, max=1) #TODO: check clamping or masking /w 2s
139 | 
140 | def deproject_depth_to_points(depth, grid, intrinsics_inv):     
141 |     b, _, h, w = depth.size()
142 |     # check https://pytorch.org/docs/stable/torch.html#torch.matmul 
143 |     # need to return a one-dimensional tensor to use the matrix-vector product
144 |     # as a result we reshape to [B, 3, H*W] in order to multiply the intrinsics matrix
145 |     # with a 3x1 vector (u, v, 1)
146 |     current_pixel_coords = ( # convert grid to appropriate dims for matrix multiplication
147 |         grid # [1, 3, H, W] #grid[:,:,:h,:w]
148 |         .expand(b, 3, h, w) # [B, 3, H, W]
149 |         .reshape(b, 3, -1)  # [B, 3, H*W] := [B, 3, UV1]    
150 |     )
151 |     # return ( # K_inv * [UV1] * depth
152 |     #     (intrinsics_inv @ current_pixel_coords) # [B, 3, 3] * [B, 3, UV1]
153 |     #     .reshape(b, 3, h, w) * # [B, 3, H, W]
154 |     #     depth
155 |     #     #.unsqueeze(1) # unsqueeze to tri-channel for element wise product
156 |     # ) # [B, 3, H, W]
157 |     p3d = ( # K_inv * [UV1] * depth
158 |         (intrinsics_inv @ current_pixel_coords) # [B, 3, 3] * [B, 3, UV1]
159 |         .reshape(b, 3, h, w) * # [B, 3, H, W]
160 |         depth
161 |         #.unsqueeze(1) # unsqueeze to tri-channel for element wise product
162 |     ) # [B, 3, H, W]
163 |     #p3d[:, 0, :, :] += 0.055 # magic3 number fixes all !
164 |     # p3d[:, 0, :, :] += 0.05 # magic number fixes all !
165 |     #p3d[:, 0, :, :] += 0.0275 # magic2 number 2 fixes all !
166 |     return p3d


--------------------------------------------------------------------------------
/src/modules/lightning/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from src.modules.lightning.models.stacked_hourglass import StackedHourglassMod
 2 | from src.modules.lightning.models.stacked_hourglass_e2e import StackedHourglassMod_e2e
 3 | from src.modules.lightning.models.cmpm import CMPM
 4 | from src.modules.lightning.models.cpm import CPM
 5 | from src.modules.lightning.models.hrnet_mod import HRNetMod
 6 | from src.modules.lightning.models.hrnet_e2e import HRNetMod_e2e
 7 | from src.modules.lightning.models.hrnet_ps import HRNetModPS
 8 | from src.modules.lightning.models.hopenet import HopeNet
 9 | from src.modules.lightning.models.oml_dual import OmlDual
10 | 
11 | __all__ = [
12 |     "StackedHourglassMod",
13 |     "StackedHourglassMod_e2e",
14 |     "CMPM",
15 |     "CPM",
16 |     "HRNetMod",
17 |     "HRNetMod_e2e",
18 |     "HopeNet",
19 |     "HRNetModPS"
20 |     "OmlDual"
21 | ]


--------------------------------------------------------------------------------
/src/modules/lightning/models/cmpm.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.functional as F
  3 | import torch
  4 | from .dsntnn import (
  5 |     flat_softmax,
  6 |     dsnt
  7 | )
  8 | 
  9 | import typing
 10 | 
 11 | 
 12 | class Interpolate(nn.Module):
 13 |     def __init__(self, size, mode):
 14 |         super(Interpolate, self).__init__()
 15 |         # size: expected size after interpolation
 16 |         # mode: interpolation type (e.g. bilinear, nearest)
 17 | 
 18 |         self.interp = nn.functional.interpolate
 19 |         self.size = size
 20 |         self.mode = mode
 21 |         
 22 |     def forward(self, x):
 23 |         out = self.interp(x, size=self.size, mode=self.mode) #, align_corners=False
 24 |         
 25 |         return out
 26 | 
 27 | class CMPM(nn.Module):
 28 |     def __init__(self,
 29 |     ):
 30 |         super(CMPM, self).__init__()
 31 |         num_markers = 53
 32 |         num_joints = 19
 33 |         self.num_stages = 6
 34 |         self.num_joints = num_joints#configer.get('network', 'heatmap_out')
 35 |         self.num_markers = num_markers#configer.get('network', 'heatmap_out')
 36 |         self.out_c = num_markers + num_joints#configer.get('network', 'heatmap_out')
 37 |         self.pool_center_lower = nn.AvgPool2d(kernel_size=9, stride=8)
 38 |         self.conv1_stage1 = nn.Conv2d(1, 128, kernel_size=9, padding=4) #change input to one channel
 39 |         self.pool1_stage1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 40 |         self.conv2_stage1 = nn.Conv2d(128, 128, kernel_size=9, padding=4)
 41 |         self.pool2_stage1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 42 |         self.conv3_stage1 = nn.Conv2d(128, 128, kernel_size=9, padding=4)
 43 |         # self.pool3_stage1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 44 |         self.conv4_stage1 = nn.Conv2d(128, 32, kernel_size=5, padding=2)
 45 |         self.conv5_stage1 = nn.Conv2d(32, 512, kernel_size=9, padding=4)
 46 |         self.conv6_stage1 = nn.Conv2d(512, 512, kernel_size=1)
 47 |         self.conv7_stage1 = nn.Conv2d(512, self.num_markers, kernel_size=1)
 48 | 
 49 |         self.conv1_stage2 = nn.Conv2d(1, 128, kernel_size=9, padding=4)
 50 |         self.pool1_stage2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 51 |         self.conv2_stage2 = nn.Conv2d(128, 128, kernel_size=9, padding=4)
 52 |         self.pool2_stage2 = nn.MaxPool2d(kernel_size=3, stride=2,  padding=1)
 53 |         self.conv3_stage2 = nn.Conv2d(128, 128, kernel_size=9, padding=4)
 54 |         self.pool3_stage2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 55 |         self.conv4_stage2 = nn.Conv2d(128, 32, kernel_size=5, padding=2)
 56 | 
 57 |         self.Mconv1_stage2 = nn.Conv2d(32 + self.num_markers, 128, kernel_size=11, padding=5)
 58 |         self.Mconv2_stage2 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
 59 |         self.Mconv3_stage2 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
 60 |         self.Mconv4_stage2 = nn.Conv2d(128, 128, kernel_size=1, padding=0)
 61 |         self.Mconv5_stage2 = nn.Conv2d(128, self.num_markers, kernel_size=1, padding=0)
 62 | 
 63 |         self.conv1_stage3 = nn.Conv2d(128, 32, kernel_size=5, padding=2)
 64 | 
 65 |         self.Mconv1_stage3 = nn.Conv2d(32 + self.num_markers, 128, kernel_size=11, padding=5)
 66 |         self.Mconv2_stage3 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
 67 |         self.Mconv3_stage3 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
 68 |         self.Mconv4_stage3 = nn.Conv2d(128, 128, kernel_size=1, padding=0)
 69 |         self.Mconv5_stage3 = nn.Conv2d(128, self.num_markers, kernel_size=1, padding=0)
 70 | 
 71 |         self.conv1_stage4 = nn.Conv2d(128, 32, kernel_size=5, padding=2)
 72 |         
 73 |         self.Mconv1_stage4 = nn.Conv2d(32 + self.num_markers, 128, kernel_size=11, padding=5)
 74 |         self.Mconv2_stage4 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
 75 |         self.Mconv3_stage4 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
 76 |         self.Mconv4_stage4 = nn.Conv2d(128, 128, kernel_size=1, padding=0)
 77 |         self.Mconv5_stage4 = nn.Conv2d(128, self.num_joints, kernel_size=1, padding=0)
 78 | 
 79 |         self.conv1_stage5 = nn.Conv2d(128, 32, kernel_size=5, padding=2)
 80 | 
 81 |         self.Mconv1_stage5 = nn.Conv2d(32 + self.num_joints, 128, kernel_size=11, padding=5)
 82 |         self.Mconv2_stage5 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
 83 |         self.Mconv3_stage5 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
 84 |         self.Mconv4_stage5 = nn.Conv2d(128, 128, kernel_size=1, padding=0)
 85 |         self.Mconv5_stage5 = nn.Conv2d(128, self.num_joints, kernel_size=1, padding=0)
 86 | 
 87 |         self.conv1_stage6 = nn.Conv2d(128, 32, kernel_size=5, padding=2)
 88 | 
 89 |         self.Mconv1_stage6 = nn.Conv2d(32 + self.num_joints, 128, kernel_size=11, padding=5)
 90 |         self.Mconv2_stage6 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
 91 |         self.Mconv3_stage6 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
 92 |         self.Mconv4_stage6 = nn.Conv2d(128, 128, kernel_size=1, padding=0)
 93 |         self.Mconv5_stage6 = nn.Conv2d(128, self.num_joints, kernel_size=1, padding=0)
 94 | 
 95 |         # self.upsample_heatmaps_block = Interpolate((136,136), mode = "bicubic")
 96 | 
 97 | 
 98 |     def _stage1(self, image):
 99 |         """
100 |         Output result of stage 1
101 |         :param image: source image with (368, 368)
102 |         :return: conv7_stage1_map
103 |         """
104 |         x = self.pool1_stage1(F.relu(self.conv1_stage1(image)))
105 |         x = self.pool2_stage1(F.relu(self.conv2_stage1(x)))
106 |         # x = self.pool3_stage1(F.relu(self.conv3_stage1(x)))
107 |         x = F.relu(self.conv4_stage1(x))
108 |         x = F.relu(self.conv5_stage1(x))
109 |         x = F.relu(self.conv6_stage1(x))
110 |         # x = F.sigmoid(self.conv6_stage1(x))
111 |         x = self.conv7_stage1(x)
112 |         return x
113 | 
114 |     def _middle(self, image):
115 |         """
116 |         Compute shared pool3_stage_map for the following stage
117 |         :param image: source image with (368, 368)
118 |         :return: pool3_stage2_map
119 |         """
120 |         x = self.pool1_stage2(F.relu(self.conv1_stage2(image)))
121 |         x = self.pool2_stage2(F.relu(self.conv2_stage2(x)))
122 |         # x = self.pool3_stage2(F.relu(self.conv3_stage2(x)))
123 | 
124 |         return x
125 | 
126 |     def _stage2(self, pool3_stage2_map, conv7_stage1_map):
127 |         """
128 |         Output result of stage 2
129 |         :param pool3_stage2_map
130 |         :param conv7_stage1_map
131 |         :return: Mconv5_stage2_map
132 |         """
133 |         x = F.relu(self.conv4_stage2(pool3_stage2_map))
134 |         x = torch.cat([x, conv7_stage1_map], dim=1)
135 |         x = F.relu(self.Mconv1_stage2(x))
136 |         x = F.relu(self.Mconv2_stage2(x))
137 |         x = F.relu(self.Mconv3_stage2(x))
138 |         x = F.relu(self.Mconv4_stage2(x))
139 |         # x = F.sigmoid(self.Mconv4_stage2(x))
140 |         x = self.Mconv5_stage2(x)
141 | 
142 |         return x
143 | 
144 |     def _stage3(self, pool3_stage2_map, Mconv5_stage2_map):
145 |         """
146 |         Output result of stage 3
147 |         :param pool3_stage2_map:
148 |         :param Mconv5_stage2_map:
149 |         :return: Mconv5_stage3_map
150 |         """
151 |         x = F.relu(self.conv1_stage3(pool3_stage2_map))
152 |         x = torch.cat([x, Mconv5_stage2_map], dim=1)
153 |         x = F.relu(self.Mconv1_stage3(x))
154 |         x = F.relu(self.Mconv2_stage3(x))
155 |         x = F.relu(self.Mconv3_stage3(x))
156 |         x = F.relu(self.Mconv4_stage3(x))
157 |         # x = F.sigmoid(self.Mconv4_stage3(x))
158 |         x = self.Mconv5_stage3(x)
159 | 
160 |         return x
161 | 
162 |     def _stage4(self, pool3_stage2_map, Mconv5_stage3_map):
163 |         """
164 |         Output result of stage 4
165 |         :param pool3_stage2_map:
166 |         :param Mconv5_stage3_map:
167 |         :return:Mconv5_stage4_map
168 |         """
169 |         x = F.relu(self.conv1_stage4(pool3_stage2_map))
170 |         x = torch.cat([x, Mconv5_stage3_map], dim=1)
171 |         x = F.relu(self.Mconv1_stage4(x))
172 |         x = F.relu(self.Mconv2_stage4(x))
173 |         x = F.relu(self.Mconv3_stage4(x))
174 |         x = F.relu(self.Mconv4_stage4(x))
175 |         # x = F.sigmoid(self.Mconv4_stage4(x))
176 |         x = self.Mconv5_stage4(x)
177 | 
178 |         return x
179 | 
180 |     def _stage5(self, pool3_stage2_map, Mconv5_stage4_map):
181 |         """
182 |         Output result of stage 5
183 |         :param pool3_stage2_map:
184 |         :param Mconv5_stage4_map:
185 |         :return:Mconv5_stage5_map
186 |         """
187 |         x = F.relu(self.conv1_stage5(pool3_stage2_map))
188 |         x = torch.cat([x, Mconv5_stage4_map], dim=1)
189 |         x = F.relu(self.Mconv1_stage5(x))
190 |         x = F.relu(self.Mconv2_stage5(x))
191 |         x = F.relu(self.Mconv3_stage5(x))
192 |         x = F.relu(self.Mconv4_stage5(x))
193 |         # x = F.sigmoid(self.Mconv4_stage5(x))
194 |         x = self.Mconv5_stage5(x)
195 | 
196 |         return x
197 | 
198 |     def _stage6(self, pool3_stage2_map, Mconv5_stage5_map):
199 |         """
200 |         Output result of stage 6
201 |         :param pool3_stage2_map:
202 |         :param Mconv5_stage6_map:
203 |         :param pool_center_lower_map:
204 |         :return:Mconv5_stage6_map
205 |         """
206 |         x = F.relu(self.conv1_stage6(pool3_stage2_map))
207 |         x = torch.cat([x, Mconv5_stage5_map], dim=1)
208 |         x = F.relu(self.Mconv1_stage6(x))
209 |         x = F.relu(self.Mconv2_stage6(x))
210 |         x = F.relu(self.Mconv3_stage6(x))
211 |         x = F.relu(self.Mconv4_stage6(x))
212 |         # x = F.sigmoid(self.Mconv4_stage6(x))
213 |         x = self.Mconv5_stage6(x)
214 | 
215 |         return x
216 |     
217 |     
218 |     def forward(self, 
219 |         data: torch.Tensor
220 |     ) -> typing.Tuple[torch.Tensor, torch.Tensor]:
221 |         depth_tensor = data
222 |         conv7_stage1_map = self._stage1(depth_tensor)  # result of stage 1
223 |         pool3_stage2_map = self._middle(depth_tensor)
224 | 
225 |         Mconv5_stage2_map = self._stage2(pool3_stage2_map, conv7_stage1_map)  # result of stage 2
226 |         Mconv5_stage3_map = self._stage3(pool3_stage2_map, Mconv5_stage2_map)  # result of stage 3
227 |         Mconv5_stage4_map = self._stage4(pool3_stage2_map, Mconv5_stage3_map)  # result of stage 4
228 |         Mconv5_stage5_map = self._stage5(pool3_stage2_map, Mconv5_stage4_map)  # result of stage 5
229 |         Mconv5_stage6_map = self._stage6(pool3_stage2_map, Mconv5_stage5_map)  # result of stage 6
230 |         ###################### customization for DSTN        
231 |         full_unnormalized_heatmaps_markers = conv7_stage1_map + Mconv5_stage2_map + Mconv5_stage3_map
232 |         full_unnormalized_heatmaps_joints = Mconv5_stage4_map + Mconv5_stage5_map + Mconv5_stage6_map
233 |         
234 |         return full_unnormalized_heatmaps_markers, full_unnormalized_heatmaps_joints
235 | 


--------------------------------------------------------------------------------
/src/modules/lightning/models/cpm.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.functional as F
  3 | import torch
  4 | from .dsntnn import (
  5 |     flat_softmax,
  6 |     dsnt
  7 | )
  8 | 
  9 | import typing
 10 | 
 11 | 
 12 | class Interpolate(nn.Module):
 13 |     def __init__(self, size, mode):
 14 |         super(Interpolate, self).__init__()
 15 |         # size: expected size after interpolation
 16 |         # mode: interpolation type (e.g. bilinear, nearest)
 17 | 
 18 |         self.interp = nn.functional.interpolate
 19 |         self.size = size
 20 |         self.mode = mode
 21 |         
 22 |     def forward(self, x):
 23 |         out = self.interp(x, size=self.size, mode=self.mode) #, align_corners=False
 24 |         
 25 |         return out
 26 | 
 27 | class CPM(nn.Module):
 28 |     def __init__(self, 
 29 |         num_markers, 
 30 |         num_joints
 31 |     ):
 32 |         super(CPM, self).__init__()
 33 |         self.total_out = num_markers + num_joints
 34 |         self.num_stages = 6
 35 |         self.num_joints = num_joints#configer.get('network', 'heatmap_out')
 36 |         self.num_markers = num_markers#configer.get('network', 'heatmap_out')
 37 |         self.out_c = num_markers + num_joints#configer.get('network', 'heatmap_out')
 38 |         self.pool_center_lower = nn.AvgPool2d(kernel_size=9, stride=8)
 39 |         self.conv1_stage1 = nn.Conv2d(1, 128, kernel_size=9, padding=4) #change input to one channel
 40 |         self.pool1_stage1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 41 |         self.conv2_stage1 = nn.Conv2d(128, 128, kernel_size=9, padding=4)
 42 |         self.pool2_stage1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 43 |         self.conv3_stage1 = nn.Conv2d(128, 128, kernel_size=9, padding=4)
 44 |         # self.pool3_stage1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 45 |         self.conv4_stage1 = nn.Conv2d(128, 32, kernel_size=5, padding=2)
 46 |         self.conv5_stage1 = nn.Conv2d(32, 512, kernel_size=9, padding=4)
 47 |         self.conv6_stage1 = nn.Conv2d(512, 512, kernel_size=1)
 48 |         self.conv7_stage1 = nn.Conv2d(512, self.total_out, kernel_size=1)
 49 | 
 50 |         self.conv1_stage2 = nn.Conv2d(1, 128, kernel_size=9, padding=4)
 51 |         self.pool1_stage2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 52 |         self.conv2_stage2 = nn.Conv2d(128, 128, kernel_size=9, padding=4)
 53 |         self.pool2_stage2 = nn.MaxPool2d(kernel_size=3, stride=2,  padding=1)
 54 |         self.conv3_stage2 = nn.Conv2d(128, 128, kernel_size=9, padding=4)
 55 |         self.pool3_stage2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 56 |         self.conv4_stage2 = nn.Conv2d(128, 32, kernel_size=5, padding=2)
 57 | 
 58 |         self.Mconv1_stage2 = nn.Conv2d(32 + self.total_out, 128, kernel_size=11, padding=5)
 59 |         self.Mconv2_stage2 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
 60 |         self.Mconv3_stage2 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
 61 |         self.Mconv4_stage2 = nn.Conv2d(128, 128, kernel_size=1, padding=0)
 62 |         self.Mconv5_stage2 = nn.Conv2d(128, self.total_out, kernel_size=1, padding=0)
 63 | 
 64 |         self.conv1_stage3 = nn.Conv2d(128, 32, kernel_size=5, padding=2)
 65 | 
 66 |         self.Mconv1_stage3 = nn.Conv2d(32 + self.total_out, 128, kernel_size=11, padding=5)
 67 |         self.Mconv2_stage3 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
 68 |         self.Mconv3_stage3 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
 69 |         self.Mconv4_stage3 = nn.Conv2d(128, 128, kernel_size=1, padding=0)
 70 |         self.Mconv5_stage3 = nn.Conv2d(128, self.total_out, kernel_size=1, padding=0)
 71 | 
 72 |         self.conv1_stage4 = nn.Conv2d(128, 32, kernel_size=5, padding=2)
 73 |         
 74 |         self.Mconv1_stage4 = nn.Conv2d(32 + self.total_out, 128, kernel_size=11, padding=5)
 75 |         self.Mconv2_stage4 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
 76 |         self.Mconv3_stage4 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
 77 |         self.Mconv4_stage4 = nn.Conv2d(128, 128, kernel_size=1, padding=0)
 78 |         self.Mconv5_stage4 = nn.Conv2d(128, self.total_out, kernel_size=1, padding=0)
 79 | 
 80 |         self.conv1_stage5 = nn.Conv2d(128, 32, kernel_size=5, padding=2)
 81 | 
 82 |         self.Mconv1_stage5 = nn.Conv2d(32 + self.total_out, 128, kernel_size=11, padding=5)
 83 |         self.Mconv2_stage5 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
 84 |         self.Mconv3_stage5 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
 85 |         self.Mconv4_stage5 = nn.Conv2d(128, 128, kernel_size=1, padding=0)
 86 |         self.Mconv5_stage5 = nn.Conv2d(128, self.total_out, kernel_size=1, padding=0)
 87 | 
 88 |         self.conv1_stage6 = nn.Conv2d(128, 32, kernel_size=5, padding=2)
 89 | 
 90 |         self.Mconv1_stage6 = nn.Conv2d(32 + self.total_out, 128, kernel_size=11, padding=5)
 91 |         self.Mconv2_stage6 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
 92 |         self.Mconv3_stage6 = nn.Conv2d(128, 128, kernel_size=11, padding=5)
 93 |         self.Mconv4_stage6 = nn.Conv2d(128, 128, kernel_size=1, padding=0)
 94 |         self.Mconv5_stage6 = nn.Conv2d(128, self.total_out, kernel_size=1, padding=0)
 95 | 
 96 |         # self.upsample_heatmaps_block = Interpolate((136,136), mode = "bicubic")
 97 | 
 98 | 
 99 |     def _stage1(self, image):
100 |         """
101 |         Output result of stage 1
102 |         :param image: source image with (368, 368)
103 |         :return: conv7_stage1_map
104 |         """
105 |         x = self.pool1_stage1(F.relu(self.conv1_stage1(image)))
106 |         x = self.pool2_stage1(F.relu(self.conv2_stage1(x)))
107 |         # x = self.pool3_stage1(F.relu(self.conv3_stage1(x)))
108 |         x = F.relu(self.conv4_stage1(x))
109 |         x = F.relu(self.conv5_stage1(x))
110 |         x = F.relu(self.conv6_stage1(x))
111 |         # x = F.sigmoid(self.conv6_stage1(x))
112 |         x = self.conv7_stage1(x)
113 |         return x
114 | 
115 |     def _middle(self, image):
116 |         """
117 |         Compute shared pool3_stage_map for the following stage
118 |         :param image: source image with (368, 368)
119 |         :return: pool3_stage2_map
120 |         """
121 |         x = self.pool1_stage2(F.relu(self.conv1_stage2(image)))
122 |         x = self.pool2_stage2(F.relu(self.conv2_stage2(x)))
123 |         # x = self.pool3_stage2(F.relu(self.conv3_stage2(x)))
124 | 
125 |         return x
126 | 
127 |     def _stage2(self, pool3_stage2_map, conv7_stage1_map):
128 |         """
129 |         Output result of stage 2
130 |         :param pool3_stage2_map
131 |         :param conv7_stage1_map
132 |         :return: Mconv5_stage2_map
133 |         """
134 |         x = F.relu(self.conv4_stage2(pool3_stage2_map))
135 |         x = torch.cat([x, conv7_stage1_map], dim=1)
136 |         x = F.relu(self.Mconv1_stage2(x))
137 |         x = F.relu(self.Mconv2_stage2(x))
138 |         x = F.relu(self.Mconv3_stage2(x))
139 |         x = F.relu(self.Mconv4_stage2(x))
140 |         # x = F.sigmoid(self.Mconv4_stage2(x))
141 |         x = self.Mconv5_stage2(x)
142 | 
143 |         return x
144 | 
145 |     def _stage3(self, pool3_stage2_map, Mconv5_stage2_map):
146 |         """
147 |         Output result of stage 3
148 |         :param pool3_stage2_map:
149 |         :param Mconv5_stage2_map:
150 |         :return: Mconv5_stage3_map
151 |         """
152 |         x = F.relu(self.conv1_stage3(pool3_stage2_map))
153 |         x = torch.cat([x, Mconv5_stage2_map], dim=1)
154 |         x = F.relu(self.Mconv1_stage3(x))
155 |         x = F.relu(self.Mconv2_stage3(x))
156 |         x = F.relu(self.Mconv3_stage3(x))
157 |         x = F.relu(self.Mconv4_stage3(x))
158 |         # x = F.sigmoid(self.Mconv4_stage3(x))
159 |         x = self.Mconv5_stage3(x)
160 | 
161 |         return x
162 | 
163 |     def _stage4(self, pool3_stage2_map, Mconv5_stage3_map):
164 |         """
165 |         Output result of stage 4
166 |         :param pool3_stage2_map:
167 |         :param Mconv5_stage3_map:
168 |         :return:Mconv5_stage4_map
169 |         """
170 |         x = F.relu(self.conv1_stage4(pool3_stage2_map))
171 |         x = torch.cat([x, Mconv5_stage3_map], dim=1)
172 |         x = F.relu(self.Mconv1_stage4(x))
173 |         x = F.relu(self.Mconv2_stage4(x))
174 |         x = F.relu(self.Mconv3_stage4(x))
175 |         x = F.relu(self.Mconv4_stage4(x))
176 |         # x = F.sigmoid(self.Mconv4_stage4(x))
177 |         x = self.Mconv5_stage4(x)
178 | 
179 |         return x
180 | 
181 |     def _stage5(self, pool3_stage2_map, Mconv5_stage4_map):
182 |         """
183 |         Output result of stage 5
184 |         :param pool3_stage2_map:
185 |         :param Mconv5_stage4_map:
186 |         :return:Mconv5_stage5_map
187 |         """
188 |         x = F.relu(self.conv1_stage5(pool3_stage2_map))
189 |         x = torch.cat([x, Mconv5_stage4_map], dim=1)
190 |         x = F.relu(self.Mconv1_stage5(x))
191 |         x = F.relu(self.Mconv2_stage5(x))
192 |         x = F.relu(self.Mconv3_stage5(x))
193 |         x = F.relu(self.Mconv4_stage5(x))
194 |         # x = F.sigmoid(self.Mconv4_stage5(x))
195 |         x = self.Mconv5_stage5(x)
196 | 
197 |         return x
198 | 
199 |     def _stage6(self, pool3_stage2_map, Mconv5_stage5_map):
200 |         """
201 |         Output result of stage 6
202 |         :param pool3_stage2_map:
203 |         :param Mconv5_stage6_map:
204 |         :param pool_center_lower_map:
205 |         :return:Mconv5_stage6_map
206 |         """
207 |         x = F.relu(self.conv1_stage6(pool3_stage2_map))
208 |         x = torch.cat([x, Mconv5_stage5_map], dim=1)
209 |         x = F.relu(self.Mconv1_stage6(x))
210 |         x = F.relu(self.Mconv2_stage6(x))
211 |         x = F.relu(self.Mconv3_stage6(x))
212 |         x = F.relu(self.Mconv4_stage6(x))
213 |         # x = F.sigmoid(self.Mconv4_stage6(x))
214 |         x = self.Mconv5_stage6(x)
215 | 
216 |         return x
217 |     
218 |     
219 |     def forward(self, 
220 |         data: torch.Tensor
221 |     ) -> typing.Tuple[torch.Tensor, torch.Tensor]:
222 |         depth_tensor = data
223 |         conv7_stage1_map = self._stage1(depth_tensor)  # result of stage 1
224 |         pool3_stage2_map = self._middle(depth_tensor)
225 | 
226 |         Mconv5_stage2_map = self._stage2(pool3_stage2_map, conv7_stage1_map)  # result of stage 2
227 |         Mconv5_stage3_map = self._stage3(pool3_stage2_map, Mconv5_stage2_map)  # result of stage 3
228 |         Mconv5_stage4_map = self._stage4(pool3_stage2_map, Mconv5_stage3_map)  # result of stage 4
229 |         Mconv5_stage5_map = self._stage5(pool3_stage2_map, Mconv5_stage4_map)  # result of stage 5
230 |         Mconv5_stage6_map = self._stage6(pool3_stage2_map, Mconv5_stage5_map)  # result of stage 6
231 |         ###################### customization for DSTN        
232 |         # full_unnormalized_heatmaps_markers = conv7_stage1_map + Mconv5_stage2_map + Mconv5_stage3_map
233 |         full_unnormalized_heatmaps = conv7_stage1_map + Mconv5_stage2_map + Mconv5_stage3_map + Mconv5_stage4_map + Mconv5_stage5_map + Mconv5_stage6_map
234 |         
235 |         return full_unnormalized_heatmaps
236 | 


--------------------------------------------------------------------------------
/src/modules/lightning/models/dsntnn.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 Aiden Nibali
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """
 16 | DSNT (soft-argmax) operations for use in PyTorch computation graphs.
 17 | """
 18 | 
 19 | from functools import reduce
 20 | from operator import mul
 21 | 
 22 | import torch
 23 | import torch.nn.functional
 24 | from torch.nn import functional as F
 25 | 
 26 | 
 27 | def linear_expectation(probs, values):
 28 |     assert(len(values) == probs.ndimension() - 2)
 29 |     expectation = []
 30 |     for i in range(2, probs.ndimension()):
 31 |         # Marginalise probabilities
 32 |         marg = probs
 33 |         for j in range(probs.ndimension() - 1, 1, -1):
 34 |             if i != j:
 35 |                 marg = marg.sum(j, keepdim=False)
 36 |         # Calculate expectation along axis `i`
 37 |         expectation.append((marg * values[len(expectation)]).sum(-1, keepdim=False))
 38 |     return torch.stack(expectation, -1)
 39 | 
 40 | 
 41 | def normalized_linspace(length, dtype=None, device=None):
 42 |     """Generate a vector with values ranging from -1 to 1.
 43 | 
 44 |     Note that the values correspond to the "centre" of each cell, so
 45 |     -1 and 1 are always conceptually outside the bounds of the vector.
 46 |     For example, if length = 4, the following vector is generated:
 47 | 
 48 |     ```text
 49 |      [ -0.75, -0.25,  0.25,  0.75 ]
 50 |      ^              ^             ^
 51 |     -1              0             1
 52 |     ```
 53 | 
 54 |     Args:
 55 |         length: The length of the vector
 56 | 
 57 |     Returns:
 58 |         The generated vector
 59 |     """
 60 |     if isinstance(length, torch.Tensor):
 61 |         length = length.to(device, dtype)
 62 |     first = -(length - 1.0) / length
 63 |     return torch.arange(length, dtype=dtype, device=device) * (2.0 / length) + first
 64 | 
 65 | 
 66 | def soft_argmax(heatmaps, normalized_coordinates=True):
 67 |     if normalized_coordinates:
 68 |         values = [normalized_linspace(d, dtype=heatmaps.dtype, device=heatmaps.device)
 69 |                   for d in heatmaps.size()[2:]]
 70 |     else:
 71 |         values = [torch.arange(0, d, dtype=heatmaps.dtype, device=heatmaps.device)
 72 |                   for d in heatmaps.size()[2:]]
 73 |     coords = linear_expectation(heatmaps, values)
 74 |     # We flip the tensor like this instead of using `coords.flip(-1)` because aten::flip is not yet
 75 |     # supported by the ONNX exporter.
 76 |     coords = torch.cat(tuple(reversed(coords.split(1, -1))), -1)
 77 |     return coords
 78 | 
 79 | def soft_argmax_3d(heatmaps, depth_dim_scale=3):
 80 |     assert isinstance(heatmaps, torch.Tensor)
 81 |     out_divider = 4
 82 |     num_of_points = heatmaps.size()[1] // depth_dim_scale
 83 |     out_size = [out_divider * out_divider * depth_dim_scale, heatmaps.size()[2] // out_divider, heatmaps.size()[3] // out_divider]
 84 | 
 85 |     heatmaps = heatmaps.reshape((-1, num_of_points, out_divider * out_divider * depth_dim_scale * out_size[1] * out_size[2]))
 86 |     heatmaps = F.softmax(heatmaps, 2)
 87 |     heatmaps = heatmaps.reshape((-1, num_of_points, out_size[0], out_size[1], out_size[2]))
 88 | 
 89 |     accu_x = heatmaps.sum(dim=(2,3))
 90 |     accu_y = heatmaps.sum(dim=(2,4))
 91 |     accu_z = heatmaps.sum(dim=(3,4))
 92 | 
 93 |     accu_x = accu_x * torch.cuda.comm.broadcast(torch.arange(1,out_size[2]+1).type(torch.cuda.FloatTensor), devices=[accu_x.device.index])[0]
 94 |     accu_y = accu_y * torch.cuda.comm.broadcast(torch.arange(1,out_size[1]+1).type(torch.cuda.FloatTensor), devices=[accu_y.device.index])[0]
 95 |     accu_z = accu_z * torch.cuda.comm.broadcast(torch.arange(1,out_size[0]+1).type(torch.cuda.FloatTensor), devices=[accu_z.device.index])[0]
 96 | 
 97 |     accu_x = accu_x.sum(dim=2, keepdim=True) -1
 98 |     accu_y = accu_y.sum(dim=2, keepdim=True) -1
 99 |     accu_z = accu_z.sum(dim=2, keepdim=True) -1
100 | 
101 |     coord_out = torch.cat((accu_x, accu_y, accu_z), dim=2)
102 | 
103 |     return coord_out, heatmaps
104 | 
105 | def dsnt(heatmaps, **kwargs):
106 |     """Differentiable spatial to numerical transform.
107 | 
108 |     Args:
109 |         heatmaps (torch.Tensor): Spatial representation of locations
110 | 
111 |     Returns:
112 |         Numerical coordinates corresponding to the locations in the heatmaps.
113 |     """
114 |     return soft_argmax(heatmaps, **kwargs)
115 | 
116 | 
117 | def flat_softmax(inp):
118 |     """Compute the softmax with all but the first two tensor dimensions combined."""
119 | 
120 |     orig_size = inp.size()
121 |     # flat = inp.view(-1, reduce(mul, orig_size[2:]))
122 |     flat = inp.reshape(-1, reduce(mul, orig_size[2:]))
123 |     flat = torch.nn.functional.softmax(flat, -1)
124 |     return flat.view(*orig_size)
125 | 
126 | 
127 | def euclidean_losses(actual, target):
128 |     """Calculate the Euclidean losses for multi-point samples.
129 | 
130 |     Each sample must contain `n` points, each with `d` dimensions. For example,
131 |     in the MPII human pose estimation task n=16 (16 joint locations) and
132 |     d=2 (locations are 2D).
133 | 
134 |     Args:
135 |         actual (Tensor): Predictions (B x L x D)
136 |         target (Tensor): Ground truth target (B x L x D)
137 | 
138 | 
139 |     Returns:
140 |         Tensor: Losses (B x L)
141 |     """
142 |     assert actual.size() == target.size(), 'input tensors must have the same size'
143 |     return torch.norm(actual - target, p=2, dim=-1, keepdim=False)
144 | 
145 | def squared_losses(actual, target):
146 |     """Calculate the Euclidean losses for multi-point samples.
147 | 
148 |     Each sample must contain `n` points, each with `d` dimensions. For example,
149 |     in the MPII human pose estimation task n=16 (16 joint locations) and
150 |     d=2 (locations are 2D).
151 | 
152 |     Args:
153 |         actual (Tensor): Predictions (B x L x D)
154 |         target (Tensor): Ground truth target (B x L x D)
155 | 
156 | 
157 |     Returns:
158 |         Tensor: Losses (B x L)
159 |     """
160 |     assert actual.size() == target.size(), 'input tensors must have the same size'
161 |     e = torch.norm(actual - target, p=2, dim=-1, keepdim=False)
162 |     return e * e
163 | 
164 | 
165 | def l1_losses(actual, target):
166 |     """Calculate the average L1 losses for multi-point samples.
167 | 
168 |     Args:
169 |         actual (Tensor): Predictions (B x L x D)
170 |         target (Tensor): Ground truth target (B x L x D)
171 | 
172 |     Returns:
173 |         Tensor: Losses (B x L)
174 |     """
175 |     assert actual.size() == target.size(), 'input tensors must have the same size'
176 |     return torch.nn.functional.l1_loss(actual, target, reduction='none').mean(-1)
177 | 
178 | 
179 | def mse_losses(actual, target):
180 |     """Calculate the average squared L2 losses for multi-point samples.
181 | 
182 |     Args:
183 |         actual (Tensor): Predictions (B x L x D)
184 |         target (Tensor): Ground truth target (B x L x D)
185 | 
186 |     Returns:
187 |         Tensor: Losses (B x L)
188 |     """
189 |     assert actual.size() == target.size(), 'input tensors must have the same size'
190 |     return torch.nn.functional.mse_loss(actual, target, reduction='none').mean(-1)
191 | 
192 | 
193 | def make_gauss(means, size, sigma, normalize=True):
194 |     """Draw Gaussians.
195 | 
196 |     This function is differential with respect to means.
197 | 
198 |     Note on ordering: `size` expects [..., depth, height, width], whereas
199 |     `means` expects x, y, z, ...
200 | 
201 |     Args:
202 |         means: coordinates containing the Gaussian means (units: normalized coordinates)
203 |         size: size of the generated images (units: pixels)
204 |         sigma: standard deviation of the Gaussian (units: pixels)
205 |         normalize: when set to True, the returned Gaussians will be normalized
206 |     """
207 | 
208 |     dim_range = range(-1, -(len(size) + 1), -1)
209 |     coords_list = [normalized_linspace(s, dtype=means.dtype, device=means.device)
210 |                    for s in reversed(size)]
211 | 
212 |     # PDF = exp(-(x - \mu)^2 / (2 \sigma^2))
213 | 
214 |     # dists <- (x - \mu)^2
215 |     dists = [(x - mean) ** 2 for x, mean in zip(coords_list, means.split(1, -1))]
216 | 
217 |     # ks <- -1 / (2 \sigma^2)
218 |     stddevs = [2 * sigma / s for s in reversed(size)]
219 |     ks = [-0.5 * (1 / stddev) ** 2 for stddev in stddevs]
220 | 
221 |     exps = [(dist * k).exp() for k, dist in zip(ks, dists)]
222 | 
223 |     # Combine dimensions of the Gaussian
224 |     gauss = reduce(mul, [
225 |         reduce(lambda t, d: t.unsqueeze(d), filter(lambda d: d != dim, dim_range), dist)
226 |         for dim, dist in zip(dim_range, exps)
227 |     ])
228 | 
229 |     if not normalize:
230 |         return gauss
231 | 
232 |     # Normalize the Gaussians
233 |     val_sum = reduce(lambda t, dim: t.sum(dim, keepdim=True), dim_range, gauss) + 1e-24
234 |     return gauss / val_sum
235 | 
236 | 
237 | def average_loss(losses, mask=None):
238 |     """Calculate the average of per-location losses.
239 | 
240 |     Args:
241 |         losses (Tensor): Predictions (B x L)
242 |         mask (Tensor, optional): Mask of points to include in the loss calculation
243 |             (B x L), defaults to including everything
244 |     """
245 | 
246 |     if mask is not None:
247 |         assert mask.size() == losses.size(), 'mask must be the same size as losses'
248 |         losses = losses * mask
249 |         denom = mask.sum()
250 |     else:
251 |         denom = losses.numel()
252 | 
253 |     # Prevent division by zero
254 |     if isinstance(denom, int):
255 |         denom = max(denom, 1)
256 |     else:
257 |         denom = denom.clamp(1)
258 | 
259 |     return losses.sum() / denom
260 | 
261 | 
262 | def _kl(p, q, ndims):
263 |     eps = 1e-24
264 |     unsummed_kl = p * ((p + eps).log() - (q + eps).log())
265 |     kl_values = reduce(lambda t, _: t.sum(-1, keepdim=False), range(ndims), unsummed_kl)
266 |     return kl_values
267 | 
268 | 
269 | def _js(p, q, ndims):
270 |     m = 0.5 * (p + q)
271 |     return 0.5 * _kl(p, m, ndims) + 0.5 * _kl(q, m, ndims)
272 | 
273 | 
274 | def _divergence_reg_losses(heatmaps, mu_t, sigma_t, divergence):
275 |     ndims = mu_t.size(-1)
276 |     assert heatmaps.dim() == ndims + 2, 'expected heatmaps to be a {}D tensor'.format(ndims + 2)
277 |     assert heatmaps.size()[:-ndims] == mu_t.size()[:-1]
278 | 
279 |     gauss = make_gauss(mu_t, heatmaps.size()[2:], sigma_t)
280 |     divergences = divergence(heatmaps, gauss, ndims)
281 |     return divergences, gauss
282 | 
283 | 
284 | def kl_reg_losses(heatmaps, mu_t, sigma_t):
285 |     """Calculate Kullback-Leibler divergences between heatmaps and target Gaussians.
286 | 
287 |     Args:
288 |         heatmaps (torch.Tensor): Heatmaps generated by the model
289 |         mu_t (torch.Tensor): Centers of the target Gaussians (in normalized units)
290 |         sigma_t (float): Standard deviation of the target Gaussians (in pixels)
291 | 
292 |     Returns:
293 |         Per-location KL divergences.
294 |     """
295 | 
296 |     return _divergence_reg_losses(heatmaps, mu_t, sigma_t, _kl)
297 | 
298 | 
299 | def js_reg_losses(heatmaps, mu_t, sigma_t):
300 |     """Calculate Jensen-Shannon divergences between heatmaps and target Gaussians.
301 | 
302 |     Args:
303 |         heatmaps (torch.Tensor): Heatmaps generated by the model
304 |         mu_t (torch.Tensor): Centers of the target Gaussians (in normalized units)
305 |         sigma_t (float): Standard deviation of the target Gaussians (in pixels)
306 | 
307 |     Returns:
308 |         Per-location JS divergences.
309 |     """
310 | 
311 |     return _divergence_reg_losses(heatmaps, mu_t, sigma_t, _js)
312 | 
313 | 
314 | def variance_reg_losses(heatmaps, sigma_t):
315 |     """Calculate the loss between heatmap variances and target variance.
316 | 
317 |     Note that this is slightly different from the version used in the
318 |     DSNT paper. This version uses pixel units for variance, which
319 |     produces losses that are larger by a constant factor.
320 | 
321 |     Args:
322 |         heatmaps (torch.Tensor): Heatmaps generated by the model
323 |         sigma_t (float): Target standard deviation (in pixels)
324 | 
325 |     Returns:
326 |         Per-location sum of square errors for variance.
327 |     """
328 | 
329 |     # mu = E[X]
330 |     values = [normalized_linspace(d, dtype=heatmaps.dtype, device=heatmaps.device)
331 |               for d in heatmaps.size()[2:]]
332 |     mu = linear_expectation(heatmaps, values)
333 |     # var = E[(X - mu)^2]
334 |     values = [(a - b.squeeze(0)) ** 2 for a, b in zip(values, mu.split(1, -1))]
335 |     var = linear_expectation(heatmaps, values)
336 | 
337 | 
338 |     heatmap_size = torch.tensor(list(heatmaps.size()[2:]), dtype=var.dtype, device=var.device)
339 |     actual_variance = var * (heatmap_size / 2) ** 2
340 |     target_variance = sigma_t ** 2
341 |     sq_error = (actual_variance - target_variance) ** 2
342 | 
343 |     return sq_error.sum(-1, keepdim=False)
344 | 
345 | 
346 | def normalized_to_pixel_coordinates(coords, size):
347 |     """Convert from normalized coordinates to pixel coordinates.
348 | 
349 |     Args:
350 |         coords: Coordinate tensor, where elements in the last dimension are ordered as (x, y, ...).
351 |         size: Number of pixels in each spatial dimension, ordered as (..., height, width).
352 | 
353 |     Returns:
354 |         `coords` in pixel coordinates.
355 |     """
356 |     if torch.is_tensor(coords):
357 |         size = coords.new_tensor(size).flip(-1)
358 |     return 0.5 * ((coords + 1) * size - 1)
359 | 
360 | 
361 | def pixel_to_normalized_coordinates(coords, size):
362 |     """Convert from pixel coordinates to normalized coordinates.
363 | 
364 |     Args:
365 |         coords: Coordinate tensor, where elements in the last dimension are ordered as (x, y, ...).
366 |         size: Number of pixels in each spatial dimension, ordered as (..., height, width).
367 | 
368 |     Returns:
369 |         `coords` in normalized coordinates.
370 |     """
371 |     if torch.is_tensor(coords):
372 |         size = coords.new_tensor(size).flip(-1)
373 |     return ((2 * coords + 1) / size) - 1
374 | 


--------------------------------------------------------------------------------
/src/modules/lightning/models/graphunet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.nn.parameter import Parameter
  5 | import numpy as np
  6 | 
  7 | class GraphConv(nn.Module):
  8 |     
  9 |     def __init__(self, in_features, out_features, activation=nn.ReLU(inplace=True)):
 10 |         super(GraphConv, self).__init__()
 11 |         self.fc = nn.Linear(in_features=in_features, out_features=out_features)
 12 |         #self.adj_sq = adj_sq
 13 |         self.activation = activation
 14 |         #self.scale_identity = scale_identity
 15 |         #self.I = Parameter(torch.eye(number_of_nodes, requires_grad=False).unsqueeze(0))
 16 | 
 17 | 
 18 |     def laplacian(self, A_hat):
 19 |         D_hat = (torch.sum(A_hat, 0) + 1e-5) ** (-0.5)
 20 |         L = D_hat * A_hat * D_hat
 21 |         return L
 22 |     
 23 |     
 24 |     def laplacian_batch(self, A_hat):
 25 |         #batch, N = A.shape[:2]
 26 |         #if self.adj_sq:
 27 |         #    A = torch.bmm(A, A)  # use A^2 to increase graph connectivity
 28 |         #I = torch.eye(N).unsqueeze(0).to(device)
 29 |         #I = self.I
 30 |         #if self.scale_identity:
 31 |         #    I = 2 * I  # increase weight of self connections
 32 |         #A_hat = A + I
 33 |         batch, N = A_hat.shape[:2]
 34 |         D_hat = (torch.sum(A_hat, 1) + 1e-5) ** (-0.5)
 35 |         L = D_hat.view(batch, N, 1) * A_hat * D_hat.view(batch, 1, N)
 36 |         return L
 37 | 
 38 | 
 39 |     def forward(self, X, A):
 40 |         batch = X.size(0)
 41 |         #A = self.laplacian(A)
 42 |         A_hat = A.unsqueeze(0).repeat(batch, 1, 1)
 43 |         #X = self.fc(torch.bmm(A_hat, X))
 44 |         X = self.fc(torch.bmm(self.laplacian_batch(A_hat), X))
 45 |         if self.activation is not None:
 46 |             X = self.activation(X)
 47 |         return X
 48 | 
 49 | 
 50 | class GraphPool(nn.Module):
 51 | 
 52 |     def __init__(self, in_nodes, out_nodes):
 53 |         super(GraphPool, self).__init__()
 54 |         self.fc = nn.Linear(in_features=in_nodes, out_features=out_nodes)
 55 | 
 56 | 
 57 |     def forward(self, X):
 58 |         X = X.transpose(1, 2)
 59 |         X = self.fc(X)
 60 |         X = X.transpose(1, 2)
 61 |         return X
 62 | 
 63 | 
 64 | class GraphUnpool(nn.Module):
 65 | 
 66 |     def __init__(self, in_nodes, out_nodes):
 67 |         super(GraphUnpool, self).__init__()
 68 |         self.fc = nn.Linear(in_features=in_nodes, out_features=out_nodes)
 69 | 
 70 | 
 71 |     def forward(self, X):
 72 |         X = X.transpose(1, 2)
 73 |         X = self.fc(X)
 74 |         X = X.transpose(1, 2)
 75 |         return X
 76 | 
 77 | 
 78 | class GraphUNet(nn.Module):
 79 | 
 80 |     def __init__(self, in_features=2, out_features=3, initial_points=19):
 81 |         super(GraphUNet, self).__init__()
 82 | 
 83 |         a0 = initial_points
 84 |         a1 = (a0 + 1) // 2 
 85 |         a2 = (a1 + 1) // 2
 86 |         a3 = (a2 + 1) // 2 
 87 |         a4 = (a3 + 1) // 2
 88 |         a5 = (a4 + 1) // 2
 89 | 
 90 |         self.A_0 = Parameter(torch.eye(a0).float().cuda(), requires_grad=True)
 91 |         self.A_1 = Parameter(torch.eye(a1).float().cuda(), requires_grad=True)
 92 |         self.A_2 = Parameter(torch.eye(a2).float().cuda(), requires_grad=True)
 93 |         self.A_3 = Parameter(torch.eye(a3).float().cuda(), requires_grad=True)
 94 |         self.A_4 = Parameter(torch.eye(a4).float().cuda(), requires_grad=True)
 95 |         self.A_5 = Parameter(torch.eye(a5).float().cuda(), requires_grad=True)
 96 | 
 97 |         self.gconv1 = GraphConv(in_features, 4)  # 29 = 21 H + 8 O
 98 |         self.pool1 = GraphPool(a0, a1)
 99 | 
100 |         self.gconv2 = GraphConv(4, 8)  # 15 = 11 H + 4 O
101 |         self.pool2 = GraphPool(a1, a2)
102 | 
103 |         self.gconv3 = GraphConv(8, 16)  # 7 = 5 H + 2 O
104 |         self.pool3 = GraphPool(a2, a3)
105 | 
106 |         self.gconv4 = GraphConv(16, 32)  # 4 = 3 H + 1 O
107 |         self.pool4 = GraphPool(a3, a4)
108 | 
109 |         self.gconv5 = GraphConv(32, 64)  # 2 = 1 H + 1 O
110 |         self.pool5 = GraphPool(a4, a5)
111 | 
112 |         self.fc1 = nn.Linear(64, 20)
113 | 
114 |         self.fc2 = nn.Linear(20, 64)
115 | 
116 |         self.unpool6 = GraphUnpool(a5, a4)
117 |         self.gconv6 = GraphConv(128, 32)
118 | 
119 |         self.unpool7 = GraphUnpool(a4, a3)
120 |         self.gconv7 = GraphConv(64, 16)
121 | 
122 |         self.unpool8 = GraphUnpool(a3, a2)
123 |         self.gconv8 = GraphConv(32, 8)
124 | 
125 |         self.unpool9 = GraphUnpool(a2, a1)
126 |         self.gconv9 = GraphConv(16, 4)
127 | 
128 |         self.unpool10 = GraphUnpool(a1, a0)
129 |         self.gconv10 = GraphConv(8, out_features, activation=None)
130 | 
131 |         self.ReLU = nn.ReLU()
132 | 
133 |     def _get_decoder_input(self, X_e, X_d):
134 |         return torch.cat((X_e, X_d), 2)
135 | 
136 |     def forward(self, X):
137 |         X_0 = self.gconv1(X, self.A_0)
138 |         X_1 = self.pool1(X_0)
139 | 
140 |         X_1 = self.gconv2(X_1, self.A_1)
141 |         X_2 = self.pool2(X_1)
142 | 
143 |         X_2 = self.gconv3(X_2, self.A_2)
144 |         X_3 = self.pool3(X_2)
145 | 
146 |         X_3 = self.gconv4(X_3, self.A_3)
147 |         X_4 = self.pool4(X_3)
148 | 
149 |         X_4 = self.gconv5(X_4, self.A_4)
150 |         X_5 = self.pool5(X_4)
151 | 
152 |         global_features = self.ReLU(self.fc1(X_5))
153 |         global_features = self.ReLU(self.fc2(global_features))
154 | 
155 |         X_6 = self.unpool6(global_features)
156 |         X_6 = self.gconv6(self._get_decoder_input(X_4, X_6), self.A_4)
157 | 
158 |         X_7 = self.unpool7(X_6)
159 |         X_7 = self.gconv7(self._get_decoder_input(X_3, X_7), self.A_3)
160 | 
161 |         X_8 = self.unpool8(X_7)
162 |         X_8 = self.gconv8(self._get_decoder_input(X_2, X_8), self.A_2)
163 | 
164 |         X_9 = self.unpool9(X_8)
165 |         X_9 = self.gconv9(self._get_decoder_input(X_1, X_9), self.A_1)
166 | 
167 |         X_10 = self.unpool10(X_9)
168 |         X_10 = self.gconv10(self._get_decoder_input(X_0, X_10), self.A_0)
169 | 
170 |         return X_10
171 | 
172 | 
173 | class GraphNet(nn.Module):
174 |     
175 |     def __init__(self, in_features=2, out_features=2, initial_points=19):
176 |         super(GraphNet, self).__init__()
177 | 
178 |         self.A_hat = Parameter(torch.eye(initial_points).float().cuda(), requires_grad=True)
179 |         
180 |         self.gconv1 = GraphConv(in_features, 128)
181 |         self.gconv2 = GraphConv(128, 16)
182 |         self.gconv3 = GraphConv(16, out_features, activation=None)
183 |         
184 |     
185 |     def forward(self, X):
186 |         X_0 = self.gconv1(X, self.A_hat)
187 |         X_1 = self.gconv2(X_0, self.A_hat)
188 |         X_2 = self.gconv3(X_1, self.A_hat)
189 |         
190 |         return X_2
191 | 


--------------------------------------------------------------------------------
/src/modules/lightning/models/hopenet.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import torch
 3 | import torch.nn as nn
 4 | from .resnet import (
 5 |     resnet50, 
 6 |     resnet10
 7 | )
 8 | 
 9 | from .graphunet import (
10 |     GraphNet,
11 |     GraphUNet
12 | )
13 | 
14 | import typing
15 | 
16 | class HopeNet(nn.Module):
17 |     def __init__(self):
18 |         super(HopeNet, self).__init__()
19 |         self.resnet = resnet50(pretrained=False, num_classes=19*2)
20 |         self.graphnet = GraphNet(in_features=2050, out_features=2)
21 |         self.graphunet = GraphUNet(in_features=2, out_features=3)
22 | 
23 |     def forward(self, 
24 |         data : torch.Tensor
25 |     ) -> typing.Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
26 |         points2D_init, features = self.resnet(data)
27 |         features = features.unsqueeze(1).repeat(1, 19, 1)
28 |         # batch = points2D.shape[0]
29 |         in_features = torch.cat([points2D_init, features], dim=2)
30 |         points2D = self.graphnet(in_features)
31 |         points3D = self.graphunet(points2D)
32 |         return points2D_init, points2D, points3D
33 | 


--------------------------------------------------------------------------------
/src/modules/lightning/models/hrnet_e2e.py:
--------------------------------------------------------------------------------
  1 | from moai.utils.arguments import ensure_string_list
  2 | 
  3 | import moai.networks.lightning as minet
  4 | import moai.nn.convolution as mic
  5 | import moai.nn.residual as mires
  6 | import moai.nn.sampling.spatial.downsample as mids
  7 | import moai.modules.lightning as mimod
  8 | import moai.nn.utils as miu
  9 | 
 10 | import torch
 11 | 
 12 | import hydra.utils as hyu
 13 | import omegaconf.omegaconf as omegaconf
 14 | import typing
 15 | import logging
 16 | 
 17 | log = logging.getLogger(__name__)
 18 | 
 19 | #NOTE: from https://github.com/HRNet/HRNet-Bottom-Up-Pose-Estimation/blob/master/lib/models/pose_hrnet.py
 20 | #NOTE: from https://arxiv.org/pdf/1908.07919.pdf
 21 | 
 22 | __all__ = ["HRNetMod_e2e"]
 23 | 
 24 | class HRNetMod_e2e(torch.nn.Module):
 25 |     def __init__(self,
 26 |         configuration:  omegaconf.DictConfig,
 27 |         modules:        omegaconf.DictConfig
 28 |     ):
 29 |         super(HRNetMod_e2e, self).__init__(
 30 |             # data=data, parameters=parameters,
 31 |             # feedforward=feedforward, monads=monads,
 32 |             # supervision=supervision, validation=validation,
 33 |             # export=export, visualization=visualization,            
 34 |         )
 35 |         preproc = configuration.preproc
 36 |         residual = configuration.residual
 37 |         #NOTE: preproc = stem + layer1
 38 |         preproc_convs = []
 39 |         prev_features = configuration.in_features
 40 |         self.out = configuration.out_features
 41 |         if not preproc == None:
 42 |             stem = preproc.stem
 43 |             for b, c, a, f, k, s, p in zip(
 44 |                 stem.blocks, stem.convolutions,
 45 |                 stem.activations, stem.features,
 46 |                 stem.kernel_sizes, stem.strides, stem.paddings):
 47 |                 preproc_convs.append(mic.make_conv_block(
 48 |                     block_type=b,
 49 |                     convolution_type=c,
 50 |                     in_features=prev_features, 
 51 |                     out_features=f,
 52 |                     activation_type=a,
 53 |                     convolution_params={
 54 |                         "kernel_size": k,
 55 |                         "stride": s,
 56 |                         "padding": p,
 57 |                     },
 58 |                 ))
 59 |                 prev_features = f
 60 |             residual_blocks = []
 61 |             for i, o, b in zip(
 62 |                 residual.features.in_features, residual.features.out_features,
 63 |                 residual.features.bottleneck_features, 
 64 |             ):
 65 |                 residual_blocks.append(mires.make_residual_block(
 66 |                     block_type=residual.block,
 67 |                     convolution_type=residual.convolution,
 68 |                     out_features=o,
 69 |                     in_features=i,
 70 |                     bottleneck_features=b,
 71 |                     activation_type=residual.activation,
 72 |                     strided=False,
 73 |                 ))
 74 |             self.pre = torch.nn.Sequential(
 75 |                 *preproc_convs, *residual_blocks,
 76 |             )
 77 | 
 78 |             start_transition_key = 'start_transition_standard_1'
 79 |             highres_key = 'highres_standard_1'
 80 |             stage_transition_key = 'stage_transition_standard_1'
 81 |             head_key = 'top_branch_1'
 82 |         else:
 83 |             start_transition_key = 'start_transition_standard_2'
 84 |             highres_key = 'highres_standard_2'
 85 |             stage_transition_key = 'stage_transition_standard_2'
 86 |             head_key = 'top_branch_2'
 87 | 
 88 |         branches_config = configuration.branches
 89 |         start_trans_config = modules[start_transition_key]
 90 |         self.start_trans = hyu.instantiate(start_trans_config, 
 91 |             in_features=residual.features.out_features[-1],
 92 |             start_features=branches_config.start_features
 93 |         )
 94 |         #NOTE: stages
 95 |         highres_module = modules[highres_key] # NOTE: outputs list of # branches outputs
 96 |         self.stages = torch.nn.ModuleList([
 97 |             torch.nn.Sequential(*[
 98 |                 hyu.instantiate(highres_module, 
 99 |                     branches=i, depth=d, start_features=branches_config.start_features
100 |                 ) for _, d in zip(range(modules), depths)
101 |             ]) for i, modules, depths in zip(
102 |                 range(2, configuration.stages + 1),
103 |                 branches_config.modules,
104 |                 branches_config.depths,
105 |             )
106 |         ])
107 |         stage_trans_config = modules[stage_transition_key]
108 |         self.stage_transitions = torch.nn.ModuleList([
109 |             hyu.instantiate(stage_trans_config, branches=i + 1,
110 |                 prev_branch_features=branches_config.start_features * (2 ** i),
111 |             ) for i in range(1, configuration.stages - 1)
112 |         ])
113 |         head_module = modules[head_key]
114 |         self.head = hyu.instantiate(head_module,
115 |             stages=configuration.stages,
116 |             start_features=branches_config.start_features,
117 |             out_features=configuration.out_features,
118 |         )
119 |         # self.input = ensure_string_list(configuration.input)
120 |         # self.output = ensure_string_list(configuration.output)
121 |         self.output_prefix = configuration.output
122 | 
123 |     def forward(self, 
124 |         data: torch.Tensor
125 |     ) -> typing.Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
126 |         x = data
127 |         if hasattr(self, 'pre'):
128 |             x = self.pre(x)
129 |         hr_inputs = self.start_trans(x)
130 |         combined_hm_preds = []
131 |         combined_hm_preds.append(hr_inputs)
132 |         for stage, trans in zip(self.stages, self.stage_transitions):
133 |             features = stage(hr_inputs)
134 |             combined_hm_preds.append(features)
135 |             hr_inputs = trans(features)
136 | 
137 |         combined_hm_preds.append(self.stages[-1](hr_inputs))
138 |         combined_hm_preds_final = []
139 |         for i, features in enumerate(combined_hm_preds):
140 |             combined_hm_preds_final.append(self.head(features))
141 | 
142 |         aggregated_hm = torch.zeros_like(combined_hm_preds_final[0])
143 |         for i, heatmap in enumerate(combined_hm_preds_final):
144 |             aggregated_hm += heatmap        
145 |         
146 |   
147 |         return aggregated_hm[:, :53, ...], aggregated_hm[:, 53:, ...], torch.cat([x, aggregated_hm], dim=1)
148 |  


--------------------------------------------------------------------------------
/src/modules/lightning/models/hrnet_mod.py:
--------------------------------------------------------------------------------
  1 | from moai.utils.arguments import ensure_string_list
  2 | 
  3 | import moai.networks.lightning as minet
  4 | import moai.nn.convolution as mic
  5 | import moai.nn.residual as mires
  6 | import moai.nn.sampling.spatial.downsample as mids
  7 | import moai.modules.lightning as mimod
  8 | import moai.nn.utils as miu
  9 | 
 10 | import torch
 11 | 
 12 | import hydra.utils as hyu
 13 | import omegaconf.omegaconf as omegaconf
 14 | import typing
 15 | import logging
 16 | 
 17 | log = logging.getLogger(__name__)
 18 | 
 19 | #NOTE: from https://github.com/HRNet/HRNet-Bottom-Up-Pose-Estimation/blob/master/lib/models/pose_hrnet.py
 20 | #NOTE: from https://arxiv.org/pdf/1908.07919.pdf
 21 | 
 22 | __all__ = ["HRNetMod"]
 23 | 
 24 | class HRNetMod(torch.nn.Module):
 25 |     def __init__(self,
 26 |         configuration:  omegaconf.DictConfig,
 27 |         modules:        omegaconf.DictConfig
 28 |     ):
 29 |         super(HRNetMod, self).__init__(
 30 |             # data=data, parameters=parameters,
 31 |             # feedforward=feedforward, monads=monads,
 32 |             # supervision=supervision, validation=validation,
 33 |             # export=export, visualization=visualization,            
 34 |         )
 35 |         preproc = configuration.preproc
 36 |         residual = configuration.residual
 37 |         #NOTE: preproc = stem + layer1
 38 |         preproc_convs = []
 39 |         prev_features = configuration.in_features
 40 |         
 41 |         if not preproc == None:
 42 |             stem = preproc.stem
 43 |             for b, c, a, f, k, s, p in zip(
 44 |                 stem.blocks, stem.convolutions,
 45 |                 stem.activations, stem.features,
 46 |                 stem.kernel_sizes, stem.strides, stem.paddings):
 47 |                 preproc_convs.append(mic.make_conv_block(
 48 |                     block_type=b,
 49 |                     convolution_type=c,
 50 |                     in_features=prev_features, 
 51 |                     out_features=f,
 52 |                     activation_type=a,
 53 |                     convolution_params={
 54 |                         "kernel_size": k,
 55 |                         "stride": s,
 56 |                         "padding": p,
 57 |                     },
 58 |                 ))
 59 |                 prev_features = f
 60 |             residual_blocks = []
 61 |             for i, o, b in zip(
 62 |                 residual.features.in_features, residual.features.out_features,
 63 |                 residual.features.bottleneck_features, 
 64 |             ):
 65 |                 residual_blocks.append(mires.make_residual_block(
 66 |                     block_type=residual.block,
 67 |                     convolution_type=residual.convolution,
 68 |                     out_features=o,
 69 |                     in_features=i,
 70 |                     bottleneck_features=b,
 71 |                     activation_type=residual.activation,
 72 |                     strided=False,
 73 |                 ))
 74 |             self.pre = torch.nn.Sequential(
 75 |                 *preproc_convs, *residual_blocks,
 76 |             )
 77 | 
 78 |             start_transition_key = 'start_transition_standard_1'
 79 |             highres_key = 'highres_standard_1'
 80 |             stage_transition_key = 'stage_transition_standard_1'
 81 |             head_key = 'top_branch_1'
 82 |         else:
 83 |             start_transition_key = 'start_transition_standard_2'
 84 |             highres_key = 'highres_standard_2'
 85 |             stage_transition_key = 'stage_transition_standard_2'
 86 |             head_key = 'top_branch_2'
 87 | 
 88 |         branches_config = configuration.branches
 89 |         start_trans_config = modules[start_transition_key]
 90 |         self.start_trans = hyu.instantiate(start_trans_config, 
 91 |             in_features=residual.features.out_features[-1],
 92 |             start_features=branches_config.start_features
 93 |         )
 94 |         #NOTE: stages
 95 |         highres_module = modules[highres_key] # NOTE: outputs list of # branches outputs
 96 |         self.stages = torch.nn.ModuleList([
 97 |             torch.nn.Sequential(*[
 98 |                 hyu.instantiate(highres_module, 
 99 |                     branches=i, depth=d, start_features=branches_config.start_features
100 |                 ) for _, d in zip(range(modules), depths)
101 |             ]) for i, modules, depths in zip(
102 |                 range(2, configuration.stages + 1),
103 |                 branches_config.modules,
104 |                 branches_config.depths,
105 |             )
106 |         ])
107 |         stage_trans_config = modules[stage_transition_key]
108 |         self.stage_transitions = torch.nn.ModuleList([
109 |             hyu.instantiate(stage_trans_config, branches=i + 1,
110 |                 prev_branch_features=branches_config.start_features * (2 ** i),
111 |             ) for i in range(1, configuration.stages - 1)
112 |         ])
113 |         head_module = modules[head_key]
114 |         self.head = hyu.instantiate(head_module,
115 |             stages=configuration.stages,
116 |             start_features=branches_config.start_features,
117 |             out_features=configuration.out_features,
118 |         )
119 |         # self.input = ensure_string_list(configuration.input)
120 |         # self.output = ensure_string_list(configuration.output)
121 |         self.output_prefix = configuration.output
122 | 
123 |     def forward(self, 
124 |         data: torch.Tensor
125 |     ) -> typing.Tuple[torch.Tensor, torch.Tensor]:
126 |         x = data
127 |         if hasattr(self, 'pre'):
128 |             x = self.pre(x)
129 |         hr_inputs = self.start_trans(x)
130 |         combined_hm_preds = []
131 |         combined_hm_preds.append(hr_inputs)
132 |         for stage, trans in zip(self.stages, self.stage_transitions):
133 |             features = stage(hr_inputs)
134 |             combined_hm_preds.append(features)
135 |             hr_inputs = trans(features)
136 | 
137 |         combined_hm_preds.append(self.stages[-1](hr_inputs))
138 |         combined_hm_preds_final = []
139 |         for i, features in enumerate(combined_hm_preds):
140 |             combined_hm_preds_final.append(self.head(features))
141 | 
142 |         aggregated_hm = torch.zeros_like(combined_hm_preds_final[0])
143 |         for i, heatmap in enumerate(combined_hm_preds_final):
144 |             aggregated_hm += heatmap        
145 |         return aggregated_hm, torch.cat([x, aggregated_hm], dim=1)


--------------------------------------------------------------------------------
/src/modules/lightning/models/hrnet_ps.py:
--------------------------------------------------------------------------------
  1 | from moai.utils.arguments import ensure_string_list
  2 | 
  3 | import moai.networks.lightning as minet
  4 | import moai.nn.convolution as mic
  5 | import moai.nn.residual as mires
  6 | import moai.nn.sampling.spatial.downsample as mids
  7 | import moai.modules.lightning as mimod
  8 | import moai.nn.utils as miu
  9 | 
 10 | import torch
 11 | 
 12 | import hydra.utils as hyu
 13 | import omegaconf.omegaconf as omegaconf
 14 | import typing
 15 | import logging
 16 | 
 17 | log = logging.getLogger(__name__)
 18 | 
 19 | #NOTE: from https://github.com/HRNet/HRNet-Bottom-Up-Pose-Estimation/blob/master/lib/models/pose_hrnet.py
 20 | #NOTE: from https://arxiv.org/pdf/1908.07919.pdf
 21 | 
 22 | __all__ = ["HRNetModPS"]
 23 | 
 24 | class HRNetModPS(torch.nn.Module):
 25 |     def __init__(self,
 26 |         configuration:  omegaconf.DictConfig,
 27 |         modules:        omegaconf.DictConfig
 28 |     ):
 29 |         super(HRNetModPS, self).__init__(
 30 |             # data=data, parameters=parameters,
 31 |             # feedforward=feedforward, monads=monads,
 32 |             # supervision=supervision, validation=validation,
 33 |             # export=export, visualization=visualization,            
 34 |         )
 35 |         preproc = configuration.preproc
 36 |         residual = configuration.residual
 37 |         #NOTE: preproc = stem + layer1
 38 |         preproc_convs = []
 39 |         prev_features = configuration.in_features
 40 |         
 41 |         if not preproc == None:
 42 |             stem = preproc.stem
 43 |             for b, c, a, f, k, s, p in zip(
 44 |                 stem.blocks, stem.convolutions,
 45 |                 stem.activations, stem.features,
 46 |                 stem.kernel_sizes, stem.strides, stem.paddings):
 47 |                 preproc_convs.append(mic.make_conv_block(
 48 |                     block_type=b,
 49 |                     convolution_type=c,
 50 |                     in_features=prev_features, 
 51 |                     out_features=f,
 52 |                     activation_type=a,
 53 |                     convolution_params={
 54 |                         "kernel_size": k,
 55 |                         "stride": s,
 56 |                         "padding": p,
 57 |                     },
 58 |                 ))
 59 |                 prev_features = f
 60 |             residual_blocks = []
 61 |             for i, o, b in zip(
 62 |                 residual.features.in_features, residual.features.out_features,
 63 |                 residual.features.bottleneck_features, 
 64 |             ):
 65 |                 residual_blocks.append(mires.make_residual_block(
 66 |                     block_type=residual.block,
 67 |                     convolution_type=residual.convolution,
 68 |                     out_features=o,
 69 |                     in_features=i,
 70 |                     bottleneck_features=b,
 71 |                     activation_type=residual.activation,
 72 |                     strided=False,
 73 |                 ))
 74 |             self.pre = torch.nn.Sequential(
 75 |                 *preproc_convs, *residual_blocks,
 76 |             )
 77 | 
 78 |             start_transition_key = 'start_transition_standard_1'
 79 |             highres_key = 'highres_standard_1'
 80 |             stage_transition_key = 'stage_transition_standard_1'
 81 |             head_key = 'top_branch_1'
 82 |         else:
 83 |             start_transition_key = 'start_transition_standard_2'
 84 |             highres_key = 'highres_standard_2'
 85 |             stage_transition_key = 'stage_transition_standard_2'
 86 |             head_key = 'top_branch_2'
 87 | 
 88 |         branches_config = configuration.branches
 89 |         start_trans_config = modules[start_transition_key]
 90 |         self.start_trans = hyu.instantiate(start_trans_config, 
 91 |             in_features=residual.features.out_features[-1],
 92 |             start_features=branches_config.start_features
 93 |         )
 94 |         #NOTE: stages
 95 |         highres_module = modules[highres_key] # NOTE: outputs list of # branches outputs
 96 |         self.stages = torch.nn.ModuleList([
 97 |             torch.nn.Sequential(*[
 98 |                 hyu.instantiate(highres_module, 
 99 |                     branches=i, depth=d, start_features=branches_config.start_features
100 |                 ) for _, d in zip(range(modules), depths)
101 |             ]) for i, modules, depths in zip(
102 |                 range(2, configuration.stages + 1),
103 |                 branches_config.modules,
104 |                 branches_config.depths,
105 |             )
106 |         ])
107 |         stage_trans_config = modules[stage_transition_key]
108 |         self.stage_transitions = torch.nn.ModuleList([
109 |             hyu.instantiate(stage_trans_config, branches=i + 1,
110 |                 prev_branch_features=branches_config.start_features * (2 ** i),
111 |             ) for i in range(1, configuration.stages - 1)
112 |         ])
113 |         head_module = modules[head_key]
114 |         self.head = hyu.instantiate(head_module,
115 |             stages=configuration.stages,
116 |             start_features=branches_config.start_features,
117 |             out_features=configuration.out_features,
118 |         )
119 |         # self.input = ensure_string_list(configuration.input)
120 |         # self.output = ensure_string_list(configuration.output)
121 |         self.output_prefix = configuration.output
122 | 
123 |     def forward(self, 
124 |         data: torch.Tensor
125 |     ) -> typing.Tuple[torch.Tensor, torch.Tensor]:
126 |         x = data
127 |         if hasattr(self, 'pre'):
128 |             x = self.pre(x)
129 |         hr_inputs = self.start_trans(x)
130 |         for stage, trans in zip(self.stages, self.stage_transitions):
131 |             features = stage(hr_inputs)
132 |             hr_inputs = trans(features)
133 |         prediction = self.head(self.stages[-1](hr_inputs))
134 |         aggregated_hm = prediction
135 |             
136 |         return aggregated_hm, torch.cat([x, aggregated_hm], dim=1)


--------------------------------------------------------------------------------
/src/modules/lightning/models/oml_dual.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.modules as nnm
 4 | 
 5 | import torch.nn.functional as F
 6 | 
 7 | import typing
 8 | 
 9 | """ typical 2D convolution, WxHxC => WxHxC """
10 | def conv(in_channels, out_channels, filter, pad, dil, n_type):
11 |     if (n_type == 'elu'):
12 |         return nn.Sequential(
13 |             nn.Conv2d(in_channels, out_channels, filter, stride=1, padding=(pad*dil), dilation=dil, bias=False),
14 |             nn.ELU(inplace=False)
15 |         )
16 |     else:
17 |         return nn.Sequential(
18 |             nn.Conv2d(in_channels, out_channels, filter, stride=1, padding=(pad*dil), dilation=dil, bias=False),
19 |             nn.BatchNorm2d(out_channels),
20 |             nn.ReLU(inplace=False)
21 |         )
22 | 
23 | 
24 | class OmlDual(nn.Module):
25 |     def __init__(self, 
26 |         num_markers, 
27 |         num_joints
28 |     ):
29 |         super(OmlDual, self).__init__()
30 |         """
31 |         Args:
32 |             width: input width
33 |             height: input height
34 |             ndf: constant number from channels
35 |             dil: dilation value - parameter for convolutional layers
36 |             norma_type: normalization type (elu | batch norm)
37 |         """
38 |         self.h = 136
39 |         self.w = 136
40 |         self.dil = 1
41 |         self.type = "batch_norm"
42 |         self.markers_out = num_markers
43 |         self.joints_out = num_joints
44 |         self.out = self.markers_out + self.joints_out
45 | 
46 | 
47 |         # ATTENTION: this is hardcoded due to the SoA model
48 |         ndf = 64
49 |         """ dmc_neural_network """
50 |         self.conv1 = conv(1, ndf, 3, 0, dil=self.dil, n_type=self.type) 
51 |         self.conv2 = conv(ndf, ndf, 3, 0, dil=self.dil, n_type=self.type) 
52 |         self.pool3 = nn.MaxPool2d(2, 2, 0, self.dil, False, False)
53 |         self.conv4 = conv(ndf, 2 * ndf, 3, 0, dil=self.dil, n_type=self.type) 
54 |         self.conv5 = conv(2 * ndf, 2 * ndf, 3, 0, dil=self.dil, n_type=self.type)
55 |         self.conv6 = conv(2 * ndf, 2 * ndf, 3, 0, dil=self.dil, n_type=self.type) 
56 |         self.pool7 = nn.MaxPool2d(2, 2, 0, self.dil, False, False)
57 |        
58 | 
59 |         f1d = int((((self.w-2 * 2 * self.dil) / 2 - 3 * 2 * self.dil) \
60 |              / 2 * ((self.w-2 * 2 * self.dil) / 2 - 3 * 2 * self.dil) / 2 ) * ndf * 2)
61 | 
62 |         #f1d = 4608
63 |         self.fc_1 = nn.Linear(f1d, 2048)        
64 |         self.fc_2 = nn.Linear(2048, 3 * self.out)
65 | 
66 |     def forward(self, 
67 |         data: torch.Tensor
68 |     ) -> typing.Tuple[torch.Tensor, torch.Tensor]:
69 |         out = self.conv1(data)
70 |         out = self.conv2(out)
71 |         out = self.pool3(out)
72 |         out = self.conv4(out)
73 |         out = self.conv5(out)
74 |         out = self.conv6(out)
75 |         out_viz = self.pool7(out)
76 | 
77 |         out = torch.reshape(out_viz, (out_viz.size()[0], out_viz.size()[1] * out_viz.size()[2] * out_viz.size()[3]))
78 | 
79 |         out = self.fc_1(out)
80 |         out = nn.functional.relu(out)
81 |         out_c = self.fc_2(out)
82 |         out = out_c.view(-1, self.out, 3)
83 |         return out[:, :self.markers_out], out[:, self.markers_out:, ...]
84 | 


--------------------------------------------------------------------------------
/src/modules/lightning/models/resnet.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file contains the definitions of the various ResNet models.
  3 | Code adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py.
  4 | Forward pass was modified to discard the last fully connected layer
  5 | """
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.utils.model_zoo as model_zoo
  9 | 
 10 | model_urls = {
 11 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 12 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 13 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 14 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 15 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 16 | }
 17 | 
 18 | 
 19 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
 20 |     """3x3 convolution with padding"""
 21 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 22 |                      padding=dilation, groups=groups, bias=False, dilation=dilation)
 23 | 
 24 | 
 25 | def conv1x1(in_planes, out_planes, stride=1):
 26 |     """1x1 convolution"""
 27 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 28 | 
 29 | 
 30 | class BasicBlock(nn.Module):
 31 |     expansion = 1
 32 |     __constants__ = ['downsample']
 33 | 
 34 |     def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
 35 |                  base_width=64, dilation=1, norm_layer=None):
 36 |         super(BasicBlock, self).__init__()
 37 |         if norm_layer is None:
 38 |             norm_layer = nn.BatchNorm2d
 39 |         if groups != 1 or base_width != 64:
 40 |             raise ValueError('BasicBlock only supports groups=1 and base_width=64')
 41 |         if dilation > 1:
 42 |             raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
 43 |         # Both self.conv1 and self.downsample layers downsample the input when stride != 1
 44 |         self.conv1 = conv3x3(inplanes, planes, stride)
 45 |         self.bn1 = norm_layer(planes)
 46 |         self.relu = nn.ReLU(inplace=True)
 47 |         self.conv2 = conv3x3(planes, planes)
 48 |         self.bn2 = norm_layer(planes)
 49 |         self.downsample = downsample
 50 |         self.stride = stride
 51 | 
 52 |     def forward(self, x):
 53 |         identity = x
 54 | 
 55 |         out = self.conv1(x)
 56 |         out = self.bn1(out)
 57 |         out = self.relu(out)
 58 | 
 59 |         out = self.conv2(out)
 60 |         out = self.bn2(out)
 61 | 
 62 |         if self.downsample is not None:
 63 |             identity = self.downsample(x)
 64 | 
 65 |         out += identity
 66 |         out = self.relu(out)
 67 | 
 68 |         return out
 69 | 
 70 | 
 71 | class Bottleneck(nn.Module):
 72 |     expansion = 4
 73 |     __constants__ = ['downsample']
 74 | 
 75 |     def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
 76 |                  base_width=64, dilation=1, norm_layer=None):
 77 |         super(Bottleneck, self).__init__()
 78 |         if norm_layer is None:
 79 |             norm_layer = nn.BatchNorm2d
 80 |         width = int(planes * (base_width / 64.)) * groups
 81 |         # Both self.conv2 and self.downsample layers downsample the input when stride != 1
 82 |         self.conv1 = conv1x1(inplanes, width)
 83 |         self.bn1 = norm_layer(width)
 84 |         self.conv2 = conv3x3(width, width, stride, groups, dilation)
 85 |         self.bn2 = norm_layer(width)
 86 |         self.conv3 = conv1x1(width, planes * self.expansion)
 87 |         self.bn3 = norm_layer(planes * self.expansion)
 88 |         self.relu = nn.ReLU(inplace=True)
 89 |         self.downsample = downsample
 90 |         self.stride = stride
 91 | 
 92 |     def forward(self, x):
 93 |         identity = x
 94 | 
 95 |         out = self.conv1(x)
 96 |         out = self.bn1(out)
 97 |         out = self.relu(out)
 98 | 
 99 |         out = self.conv2(out)
100 |         out = self.bn2(out)
101 |         out = self.relu(out)
102 | 
103 |         out = self.conv3(out)
104 |         out = self.bn3(out)
105 | 
106 |         if self.downsample is not None:
107 |             identity = self.downsample(x)
108 | 
109 |         out += identity
110 |         out = self.relu(out)
111 | 
112 |         return out
113 | 
114 | 
115 | class ResNet(nn.Module):
116 | 
117 |     def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
118 |                  groups=1, width_per_group=64, replace_stride_with_dilation=None,
119 |                  norm_layer=None):
120 |         super(ResNet, self).__init__()
121 |         if norm_layer is None:
122 |             norm_layer = nn.BatchNorm2d
123 |         self._norm_layer = norm_layer
124 | 
125 |         self.inplanes = 64
126 |         self.dilation = 1
127 |         if replace_stride_with_dilation is None:
128 |             # each element in the tuple indicates if we should replace
129 |             # the 2x2 stride with a dilated convolution instead
130 |             replace_stride_with_dilation = [False, False, False]
131 |         if len(replace_stride_with_dilation) != 3:
132 |             raise ValueError("replace_stride_with_dilation should be None "
133 |                              "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
134 |         self.groups = groups
135 |         self.base_width = width_per_group
136 |         self.conv1 = nn.Conv2d(1, self.inplanes, kernel_size=7, stride=2, padding=3,
137 |                                bias=False)
138 |         self.bn1 = norm_layer(self.inplanes)
139 |         self.relu = nn.ReLU(inplace=True)
140 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
141 |         self.layer1 = self._make_layer(block, 64, layers[0])
142 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
143 |                                        dilate=replace_stride_with_dilation[0])
144 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
145 |                                        dilate=replace_stride_with_dilation[1])
146 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
147 |                                        dilate=replace_stride_with_dilation[2])
148 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
149 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
150 | 
151 |         for m in self.modules():
152 |             if isinstance(m, nn.Conv2d):
153 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
154 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
155 |                 nn.init.constant_(m.weight, 1)
156 |                 nn.init.constant_(m.bias, 0)
157 | 
158 |         # Zero-initialize the last BN in each residual branch,
159 |         # so that the residual branch starts with zeros, and each residual block behaves like an identity.
160 |         # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
161 |         if zero_init_residual:
162 |             for m in self.modules():
163 |                 if isinstance(m, Bottleneck):
164 |                     nn.init.constant_(m.bn3.weight, 0)
165 |                 elif isinstance(m, BasicBlock):
166 |                     nn.init.constant_(m.bn2.weight, 0)
167 | 
168 |     def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
169 |         norm_layer = self._norm_layer
170 |         downsample = None
171 |         previous_dilation = self.dilation
172 |         if dilate:
173 |             self.dilation *= stride
174 |             stride = 1
175 |         if stride != 1 or self.inplanes != planes * block.expansion:
176 |             downsample = nn.Sequential(
177 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
178 |                 norm_layer(planes * block.expansion),
179 |             )
180 | 
181 |         layers = []
182 |         layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
183 |                             self.base_width, previous_dilation, norm_layer))
184 |         self.inplanes = planes * block.expansion
185 |         for _ in range(1, blocks):
186 |             layers.append(block(self.inplanes, planes, groups=self.groups,
187 |                                 base_width=self.base_width, dilation=self.dilation,
188 |                                 norm_layer=norm_layer))
189 | 
190 |         return nn.Sequential(*layers)
191 | 
192 |     def forward(self, x):
193 |         x = self.conv1(x)
194 |         x = self.bn1(x)
195 |         x = self.relu(x)
196 |         x = self.maxpool(x)
197 | 
198 |         x = self.layer1(x)
199 |         x = self.layer2(x)
200 |         x = self.layer3(x)
201 |         x = self.layer4(x)
202 | 
203 |         x = self.avgpool(x)
204 |         x = torch.flatten(x, 1)
205 |         out = x
206 |         x = self.fc(x)
207 | 
208 |         return x.view(-1, 19, 2), out
209 | 
210 | def resnet10(pretrained=False, num_classes=1000, **kwargs):
211 |     """Constructs a ResNet-10 model.
212 |     Args:
213 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
214 |     """ 
215 |     model = ResNet(BasicBlock, [1, 1, 1, 1], num_classes=1000, **kwargs)
216 |     # if pretrained:
217 |     #     model.load_state_dict(model_zoo.load_url(model_urls['resnet10']))
218 |     num_ftrs = model.fc.in_features
219 |     model.fc = nn.Linear(num_ftrs, num_classes)
220 |     return model
221 | 
222 | def resnet18(pretrained=False, num_classes=1000, **kwargs):
223 |     """Constructs a ResNet-18 model.
224 |     Args:
225 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
226 |     """ 
227 |     model = ResNet(BasicBlock, [2, 2, 2, 2], num_classes=1000, **kwargs)
228 |     if pretrained:
229 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
230 |     num_ftrs = model.fc.in_features
231 |     model.fc = nn.Linear(num_ftrs, num_classes)
232 |     return model
233 | 
234 | def resnet50(pretrained=False, num_classes=1000, **kwargs):
235 |     """Constructs a ResNet-50 model.
236 |     Args:
237 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
238 |     """ 
239 |     model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=1000, **kwargs)
240 |     if pretrained:
241 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
242 |     num_ftrs = model.fc.in_features
243 |     model.fc = nn.Linear(num_ftrs, num_classes)
244 |     return model
245 | 
246 | def resnet101(pretrained=False, num_classes=1000, **kwargs):
247 |     """Constructs a ResNet-101 model.
248 |     Args:
249 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
250 |     """ 
251 |     model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes=1000, **kwargs)
252 |     if pretrained:
253 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
254 |     num_ftrs = model.fc.in_features
255 |     model.fc = nn.Linear(num_ftrs, num_classes)
256 |     return model
257 | 
258 | 
259 | def resnet152(pretrained=False, num_classes=1000, **kwargs):
260 |     """Constructs a ResNet-152 model.
261 |     Args:
262 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
263 |     """ 
264 |     model = ResNet(Bottleneck, [3, 8, 36, 3], num_classes=1000, **kwargs)
265 |     if pretrained:
266 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
267 |     num_ftrs = model.fc.in_features
268 |     model.fc = nn.Linear(num_ftrs, num_classes)
269 |     return model
270 | 
271 | 


--------------------------------------------------------------------------------
/src/modules/lightning/models/stacked_hourglass.py:
--------------------------------------------------------------------------------
  1 | import moai.networks.lightning as minet
  2 | import moai.nn.convolution as mic
  3 | import moai.nn.residual as mires
  4 | import moai.nn.sampling.spatial.downsample as mids
  5 | import moai.modules.lightning as mimod
  6 | import moai.nn.utils as miu
  7 | 
  8 | import torch
  9 | 
 10 | import hydra.utils as hyu
 11 | import omegaconf.omegaconf as omegaconf
 12 | import typing
 13 | import logging
 14 | 
 15 | log = logging.getLogger(__name__)
 16 | 
 17 | #NOTE: from https://github.com/anibali/pytorch-stacked-hourglass/blob/master/src/stacked_hourglass/model.py
 18 | #NOTE: from https://github.com/princeton-vl/pytorch_stacked_hourglass/blob/master/models/posenet.py
 19 | 
 20 | __all__ = ["StackedHourglassMod"]
 21 | 
 22 | class StackedHourglassMod(torch.nn.Module):
 23 |     def __init__(self,
 24 |         configuration:  omegaconf.DictConfig,
 25 |         modules:        omegaconf.DictConfig,
 26 |         # data:           omegaconf.DictConfig=None,
 27 |         # parameters:     omegaconf.DictConfig=None,
 28 |         # feedforward:    omegaconf.DictConfig=None,
 29 |         # monads:         omegaconf.DictConfig=None,
 30 |         # supervision:    omegaconf.DictConfig=None,
 31 |         # validation:     omegaconf.DictConfig=None,
 32 |         # visualization:  omegaconf.DictConfig=None,
 33 |         # export:         omegaconf.DictConfig=None,
 34 |     ):
 35 |         super(StackedHourglassMod, self).__init__(
 36 |             # data=data, parameters=parameters,
 37 |             # feedforward=feedforward, monads=monads,
 38 |             # supervision=supervision, validation=validation,
 39 |             # export=export, visualization=visualization,            
 40 |         )        
 41 |         self.stacks = configuration.stacks
 42 |         preproc = configuration.preproc
 43 |         projection = configuration.projection
 44 |         prediction = configuration.prediction
 45 |         merge = configuration.merge
 46 |         hourglass = list(modules.values())[0]
 47 |         if not preproc == None: 
 48 |             self.pre = torch.nn.Sequential(
 49 |                 mic.make_conv_block(
 50 |                     block_type=preproc.block,
 51 |                     convolution_type=preproc.convolution,
 52 |                     in_features=configuration.in_features, 
 53 |                     out_features=hourglass.features // 4,
 54 |                     activation_type=preproc.activation,
 55 |                     convolution_params={
 56 |                         "kernel_size": preproc.stem.kernel_size,
 57 |                         "stride": preproc.stem.stride,
 58 |                         "padding": preproc.stem.padding,
 59 |                     },
 60 |                 ),
 61 |                 mires.make_residual_block(
 62 |                     block_type=preproc.residual,
 63 |                     convolution_type=preproc.convolution,
 64 |                     in_features=hourglass.features // 4,
 65 |                     out_features=hourglass.features // 2,
 66 |                     bottleneck_features=hourglass.features // 2,
 67 |                     activation_type=preproc.activation,
 68 |                     strided=False,
 69 |                 ),
 70 |                 mids.make_downsample(
 71 |                     downscale_type=preproc.downscale,
 72 |                     features=hourglass.features // 2,
 73 |                     kernel_size=3 if preproc.downscale == 'maxpool2d_aa' else 2,
 74 |                 ),
 75 |                 mires.make_residual_block(
 76 |                     block_type=preproc.residual,
 77 |                     convolution_type=preproc.convolution,
 78 |                     in_features=hourglass.features // 2,
 79 |                     out_features=hourglass.features // 2,
 80 |                     bottleneck_features=hourglass.features // 2,
 81 |                     activation_type=preproc.activation,
 82 |                     strided=False,
 83 |                 ),
 84 |                 mires.make_residual_block(
 85 |                     block_type=preproc.residual,
 86 |                     convolution_type=preproc.convolution,
 87 |                     in_features=hourglass.features // 2,
 88 |                     out_features=hourglass.features,
 89 |                     bottleneck_features=hourglass.features,
 90 |                     activation_type=preproc.activation,
 91 |                     strided=False,
 92 |                 ),
 93 |             )
 94 |         
 95 |         self.hgs = torch.nn.ModuleList([
 96 |                 torch.nn.Sequential(
 97 |                     hyu.instantiate(hourglass)
 98 |                 ) for i in range(self.stacks)
 99 |             ] 
100 |         )
101 |         
102 |         if not preproc == None: 
103 |             self.features = torch.nn.ModuleList([
104 |                     torch.nn.Sequential(
105 |                         mires.make_residual_block(
106 |                             block_type=preproc.residual,
107 |                             convolution_type=preproc.convolution,
108 |                             in_features=hourglass.features,
109 |                             out_features=hourglass.features,
110 |                             bottleneck_features=hourglass.features,
111 |                             activation_type=preproc.activation,
112 |                             strided=False,
113 |                         ),
114 |                         mic.make_conv_block(
115 |                             block_type=projection.block,
116 |                             convolution_type=projection.convolution,
117 |                             in_features=hourglass.features, 
118 |                             out_features=hourglass.features,
119 |                             activation_type=projection.activation,
120 |                             convolution_params={"kernel_size": 1, "padding": 0},
121 |                         )
122 |                     ) for i in range(self.stacks)
123 |                 ]
124 |             )
125 |         else:
126 |             self.features = torch.nn.ModuleList([
127 |                     torch.nn.Sequential(                        
128 |                         mic.make_conv_block(
129 |                             block_type=projection.block,
130 |                             convolution_type=projection.convolution,
131 |                             in_features=hourglass.features, 
132 |                             out_features=hourglass.features,
133 |                             activation_type=projection.activation,
134 |                             convolution_params={"kernel_size": 1, "padding": 0},
135 |                         )
136 |                     ) for i in range(self.stacks)
137 |                 ]
138 |             )
139 |         
140 |         self.outs = torch.nn.ModuleList([
141 |             mic.make_conv_block(
142 |                 block_type=prediction.block,
143 |                 convolution_type=prediction.convolution,
144 |                 in_features=hourglass.features, 
145 |                 out_features=configuration.out_features,
146 |                 activation_type=prediction.activation,
147 |                 convolution_params={
148 |                     "kernel_size": 1, "padding": 0, 
149 |                 },
150 |                 activation_params={"inplace": True} 
151 |             ) for i in range(self.stacks)         
152 |         ])
153 |         self.merge_features = torch.nn.ModuleList([
154 |             torch.nn.Sequential(
155 |                 mic.make_conv_1x1(
156 |                     convolution_type=projection.convolution,
157 |                     in_channels=hourglass.features,
158 |                     out_channels=hourglass.features
159 |                 ),
160 |                 torch.nn.Dropout2d(p=merge.dropout, inplace=True)\
161 |                     if merge.dropout > 0.0 else torch.nn.Identity()
162 |             ) for i in range(self.stacks-1)
163 |         ])
164 |         self.merge_preds = torch.nn.ModuleList([
165 |             torch.nn.Sequential(
166 |                 mic.make_conv_1x1(
167 |                     convolution_type=projection.convolution,
168 |                     in_channels=configuration.out_features,
169 |                     out_channels=hourglass.features
170 |                 ),
171 |                 torch.nn.Dropout2d(p=prediction.dropout, inplace=False)\
172 |                     if prediction.dropout > 0.0 else torch.nn.Identity()
173 |             ) for i in range(self.stacks-1)
174 |         ])
175 |         # self.input = configuration.input #NOTE: cannot be as required in cascase model
176 |         self.output_prefix = configuration.output
177 | 
178 |     def forward(self, 
179 |         data: torch.Tensor
180 |     ) -> typing.Tuple[torch.Tensor, torch.Tensor]:
181 |         x = data
182 |         if hasattr(self, 'pre'):
183 |             x = self.pre(x)
184 |         combined_hm_preds = []
185 |         for i in range(self.stacks):
186 |             hg = self.hgs[i](x)
187 |             feature = self.features[i](hg)
188 |             preds = self.outs[i](feature)
189 |             combined_hm_preds.append(preds)
190 |             if i < self.stacks - 1:                
191 |                 x = x + self.merge_preds[i](preds) + self.merge_features[i](feature)            
192 |         
193 |         aggregated_hm = torch.zeros_like(combined_hm_preds[0])
194 |         for i, heatmap in enumerate(combined_hm_preds):
195 |             aggregated_hm += heatmap
196 |         return aggregated_hm, torch.cat([x, aggregated_hm], dim=1)
197 | 
198 |     


--------------------------------------------------------------------------------
/src/modules/lightning/models/stacked_hourglass_e2e.py:
--------------------------------------------------------------------------------
  1 | import moai.networks.lightning as minet
  2 | import moai.nn.convolution as mic
  3 | import moai.nn.residual as mires
  4 | import moai.nn.sampling.spatial.downsample as mids
  5 | import moai.modules.lightning as mimod
  6 | import moai.nn.utils as miu
  7 | 
  8 | import torch
  9 | 
 10 | import hydra.utils as hyu
 11 | import omegaconf.omegaconf as omegaconf
 12 | import typing
 13 | import logging
 14 | 
 15 | log = logging.getLogger(__name__)
 16 | 
 17 | #NOTE: from https://github.com/anibali/pytorch-stacked-hourglass/blob/master/src/stacked_hourglass/model.py
 18 | #NOTE: from https://github.com/princeton-vl/pytorch_stacked_hourglass/blob/master/models/posenet.py
 19 | 
 20 | __all__ = ["StackedHourglassMod_e2e"]
 21 | 
 22 | class StackedHourglassMod_e2e(torch.nn.Module):
 23 |     def __init__(self,
 24 |         configuration:  omegaconf.DictConfig,
 25 |         modules:        omegaconf.DictConfig,
 26 |         # data:           omegaconf.DictConfig=None,
 27 |         # parameters:     omegaconf.DictConfig=None,
 28 |         # feedforward:    omegaconf.DictConfig=None,
 29 |         # monads:         omegaconf.DictConfig=None,
 30 |         # supervision:    omegaconf.DictConfig=None,
 31 |         # validation:     omegaconf.DictConfig=None,
 32 |         # visualization:  omegaconf.DictConfig=None,
 33 |         # export:         omegaconf.DictConfig=None,
 34 |     ):
 35 |         super(StackedHourglassMod_e2e, self).__init__(
 36 |             # data=data, parameters=parameters,
 37 |             # feedforward=feedforward, monads=monads,
 38 |             # supervision=supervision, validation=validation,
 39 |             # export=export, visualization=visualization,            
 40 |         )        
 41 |         self.stacks = configuration.stacks
 42 |         preproc = configuration.preproc
 43 |         projection = configuration.projection
 44 |         prediction = configuration.prediction
 45 |         merge = configuration.merge
 46 |         hourglass = list(modules.values())[0]
 47 |         self.out = configuration.out_features
 48 |         if not preproc == None: 
 49 |             self.pre = torch.nn.Sequential(
 50 |                 mic.make_conv_block(
 51 |                     block_type=preproc.block,
 52 |                     convolution_type=preproc.convolution,
 53 |                     in_features=configuration.in_features, 
 54 |                     out_features=hourglass.features // 4,
 55 |                     activation_type=preproc.activation,
 56 |                     convolution_params={
 57 |                         "kernel_size": preproc.stem.kernel_size,
 58 |                         "stride": preproc.stem.stride,
 59 |                         "padding": preproc.stem.padding,
 60 |                     },
 61 |                 ),
 62 |                 mires.make_residual_block(
 63 |                     block_type=preproc.residual,
 64 |                     convolution_type=preproc.convolution,
 65 |                     in_features=hourglass.features // 4,
 66 |                     out_features=hourglass.features // 2,
 67 |                     bottleneck_features=hourglass.features // 2,
 68 |                     activation_type=preproc.activation,
 69 |                     strided=False,
 70 |                 ),
 71 |                 mids.make_downsample(
 72 |                     downscale_type=preproc.downscale,
 73 |                     features=hourglass.features // 2,
 74 |                     kernel_size=3 if preproc.downscale == 'maxpool2d_aa' else 2,
 75 |                 ),
 76 |                 mires.make_residual_block(
 77 |                     block_type=preproc.residual,
 78 |                     convolution_type=preproc.convolution,
 79 |                     in_features=hourglass.features // 2,
 80 |                     out_features=hourglass.features // 2,
 81 |                     bottleneck_features=hourglass.features // 2,
 82 |                     activation_type=preproc.activation,
 83 |                     strided=False,
 84 |                 ),
 85 |                 mires.make_residual_block(
 86 |                     block_type=preproc.residual,
 87 |                     convolution_type=preproc.convolution,
 88 |                     in_features=hourglass.features // 2,
 89 |                     out_features=hourglass.features,
 90 |                     bottleneck_features=hourglass.features,
 91 |                     activation_type=preproc.activation,
 92 |                     strided=False,
 93 |                 ),
 94 |             )
 95 |         
 96 |         self.hgs = torch.nn.ModuleList([
 97 |                 torch.nn.Sequential(
 98 |                     hyu.instantiate(hourglass)
 99 |                 ) for i in range(self.stacks)
100 |             ] 
101 |         )
102 |         
103 |         if not preproc == None: 
104 |             self.features = torch.nn.ModuleList([
105 |                     torch.nn.Sequential(
106 |                         mires.make_residual_block(
107 |                             block_type=preproc.residual,
108 |                             convolution_type=preproc.convolution,
109 |                             in_features=hourglass.features,
110 |                             out_features=hourglass.features,
111 |                             bottleneck_features=hourglass.features,
112 |                             activation_type=preproc.activation,
113 |                             strided=False,
114 |                         ),
115 |                         mic.make_conv_block(
116 |                             block_type=projection.block,
117 |                             convolution_type=projection.convolution,
118 |                             in_features=hourglass.features, 
119 |                             out_features=hourglass.features,
120 |                             activation_type=projection.activation,
121 |                             convolution_params={"kernel_size": 1, "padding": 0},
122 |                         )
123 |                     ) for i in range(self.stacks)
124 |                 ]
125 |             )
126 |         else:
127 |             self.features = torch.nn.ModuleList([
128 |                     torch.nn.Sequential(                        
129 |                         mic.make_conv_block(
130 |                             block_type=projection.block,
131 |                             convolution_type=projection.convolution,
132 |                             in_features=hourglass.features, 
133 |                             out_features=hourglass.features,
134 |                             activation_type=projection.activation,
135 |                             convolution_params={"kernel_size": 1, "padding": 0},
136 |                         )
137 |                     ) for i in range(self.stacks)
138 |                 ]
139 |             )
140 |         
141 |         self.outs = torch.nn.ModuleList([
142 |             mic.make_conv_block(
143 |                 block_type=prediction.block,
144 |                 convolution_type=prediction.convolution,
145 |                 in_features=hourglass.features, 
146 |                 out_features=configuration.out_features,
147 |                 activation_type=prediction.activation,
148 |                 convolution_params={
149 |                     "kernel_size": 1, "padding": 0, 
150 |                 },
151 |                 activation_params={"inplace": True} 
152 |             ) for i in range(self.stacks)         
153 |         ])
154 |         self.merge_features = torch.nn.ModuleList([
155 |             torch.nn.Sequential(
156 |                 mic.make_conv_1x1(
157 |                     convolution_type=projection.convolution,
158 |                     in_channels=hourglass.features,
159 |                     out_channels=hourglass.features
160 |                 ),
161 |                 torch.nn.Dropout2d(p=merge.dropout, inplace=True)\
162 |                     if merge.dropout > 0.0 else torch.nn.Identity()
163 |             ) for i in range(self.stacks-1)
164 |         ])
165 |         self.merge_preds = torch.nn.ModuleList([
166 |             torch.nn.Sequential(
167 |                 mic.make_conv_1x1(
168 |                     convolution_type=projection.convolution,
169 |                     in_channels=configuration.out_features,
170 |                     out_channels=hourglass.features
171 |                 ),
172 |                 torch.nn.Dropout2d(p=prediction.dropout, inplace=False)\
173 |                     if prediction.dropout > 0.0 else torch.nn.Identity()
174 |             ) for i in range(self.stacks-1)
175 |         ])
176 |         # self.input = configuration.input #NOTE: cannot be as required in cascase model
177 |         self.output_prefix = configuration.output
178 | 
179 |     def forward(self, 
180 |         data: torch.Tensor
181 |     ) -> typing.Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
182 |         x = data
183 |         if hasattr(self, 'pre'):
184 |             x = self.pre(x)
185 |         combined_hm_preds = []
186 |         for i in range(self.stacks):
187 |             hg = self.hgs[i](x)
188 |             feature = self.features[i](hg)
189 |             preds = self.outs[i](feature)
190 |             combined_hm_preds.append(preds)
191 |             if i < self.stacks - 1:                
192 |                 x = x + self.merge_preds[i](preds) + self.merge_features[i](feature)            
193 |         
194 |         aggregated_hm = torch.zeros_like(combined_hm_preds[0])
195 |         for i, heatmap in enumerate(combined_hm_preds):
196 |             aggregated_hm += heatmap
197 | 
198 |         return aggregated_hm[:, :19, ...], aggregated_hm[:, 19:, ...], torch.cat([x, aggregated_hm], dim=1)
199 |         
200 |         # return aggregated_hm[:, :53, ...], aggregated_hm[:, 53:, ...]
201 | 
202 |     


--------------------------------------------------------------------------------
/src/monads/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tofis/democap/bc7f8cd27163085f78e164ac464df2336f0c6ad9/src/monads/__init__.py


--------------------------------------------------------------------------------
/src/monads/distribution/zmean.py:
--------------------------------------------------------------------------------
 1 | from moai.monads.utils import spatial_dim_list
 2 | 
 3 | import torch
 4 | 
 5 | __all__ = ["zMean"]
 6 | 
 7 | class zMean(torch.nn.Module):
 8 |     def __init__(self,
 9 |     ):
10 |         super(zMean, self).__init__()
11 |    
12 |     def forward(self, 
13 |         heatmaps: torch.Tensor, # spatial probability tensor of K keypoints
14 |     ) -> torch.Tensor:
15 |             hm_dims = spatial_dim_list(heatmaps)
16 |             return torch.mean(heatmaps, dim=tuple(hm_dims)).unsqueeze(2)
17 |             # return torch.amax(heatmaps, dim=tuple(hm_dims)).unsqueeze(2)
18 |             # return torch.sum(heatmaps, dim=tuple(hm_dims)).unsqueeze(2)
19 | 


--------------------------------------------------------------------------------
/src/monads/keypoints/fuse_coords.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import typing
 3 | 
 4 | __all__ = [
 5 |     "FuseCoords",
 6 | ]
 7 | 
 8 | class FuseCoords(torch.nn.Module):
 9 |     def __init__(self,
10 |         mode:       str="two" # two, four
11 |     ):
12 |         super(FuseCoords,self).__init__()
13 |         self.mode = mode
14 | 
15 |     def rotate_back_from_back(self, coords):
16 |         coords[..., 0] = 1.0 - coords[..., 0]
17 |         coords[..., 2] = 1.0 - coords[..., 2]
18 |         return coords
19 | 
20 |     def rotate_left_from_right(self, coords):
21 |         rot = torch.tensor([[
22 |             [0.0, 0.0, 1.0],
23 |             [0.0, 1.0, 0.0],
24 |             [-1.0, 0.0, 0.0],
25 |         ]]).to(coords.device).float()
26 |         xformed_t = rot @ coords.permute(0, 2, 1)
27 |         xformed_t += torch.tensor([0.0, 0.0, 1.0]).to(coords.device).expand(1, xformed_t.size()[2], xformed_t.size()[1]).permute(0, 2, 1)
28 |         return xformed_t.permute(0, 2, 1)
29 |     
30 |     def rotate_right_from_left(self, coords):
31 |         rot = torch.tensor([[
32 |             [0.0, 0.0, -1.0],
33 |             [0.0, 1.0, 0.0],
34 |             [1.0, 0.0, 0.0],
35 |         ]]).to(coords.device).float()
36 |         xformed_t = rot @ coords.permute(0, 2, 1)
37 |         xformed_t += torch.tensor([1.0, 0.0, 0.0]).to(coords.device).expand(1, xformed_t.size()[2], xformed_t.size()[1]).permute(0, 2, 1)
38 |         return xformed_t.permute(0, 2, 1)    
39 | 
40 |     def forward(self, coords: typing.List[torch.Tensor]) -> torch.Tensor:
41 |         fused_coords = torch.zeros_like(coords[0])
42 |         for i, coords_i in enumerate(coords):
43 |             if self.mode == "two":
44 |                 if i == 0:
45 |                     fused_coords += coords_i
46 |                 else:
47 |                     fused_coords += self.rotate_back_from_back(coords_i)
48 |             elif self.mode == "four":
49 |                 if i == 0:
50 |                     fused_coords += coords_i
51 |                 elif i == 1:
52 |                     fused_coords += self.rotate_back_from_back(coords_i)
53 |                 elif i == 2:
54 |                     fused_coords += self.rotate_right_from_left(coords_i)
55 |                 elif i == 3:
56 |                     fused_coords += self.rotate_left_from_right(coords_i)
57 |         fused_coords /= len(coords)
58 |         return fused_coords


--------------------------------------------------------------------------------
/src/validation/metrics/human_pose/mae.py:
--------------------------------------------------------------------------------
 1 | from moai.monads.utils.common import dim_list
 2 | 
 3 | import torch
 4 | import numpy
 5 | import os
 6 | 
 7 | # from moai.validation.metrics.human_pose.temp import (
 8 | #     save_ply_from_keypoints
 9 | # )
10 | 
11 | 
12 | class MAE(torch.nn.Module):
13 |     def __init__(self):
14 |         super(MAE, self).__init__()
15 |         self.counter_m = 0
16 |         self.counter_j = 0
17 |         self.per_joint_results = numpy.zeros([0, 19])
18 |         self.per_marker_results = numpy.zeros([0, 53])
19 |         # if not os.path.exists("ply"):
20 |         #     os.mkdir("ply")
21 |         # if not os.path.exists("csv"):
22 |         #     os.mkdir("csv")
23 | 
24 |     def forward(self,
25 |         gt:         torch.Tensor,
26 |         pred:       torch.Tensor,
27 |     ) -> torch.Tensor:
28 |         euc = torch.norm(gt - pred, p=2, dim=-1, keepdim=False)
29 | 
30 |         if gt.size()[1] == 53:
31 |             self.counter_m += gt.size()[0]   
32 |             self.per_marker_results = numpy.vstack([self.per_marker_results, euc.cpu().numpy()])
33 |             # numpy.savetxt("csv/mae_markers.csv", self.per_marker_results, delimiter=',')
34 |         else:
35 |             gt[:, 14, :] = 0.0
36 |             gt[:, 18, :] = 0.0
37 |             pred[:, 14, :] = 0.0
38 |             pred[:, 18, :] = 0.0
39 |             self.counter_j += gt.size()[0]     
40 |             self.per_joint_results = numpy.vstack([self.per_joint_results, euc.cpu().numpy()])
41 |             # numpy.savetxt("csv/mae_joints.csv", self.per_joint_results, delimiter=',')
42 |         
43 |         return torch.mean(torch.mean(euc, dim=-1))
44 |        


--------------------------------------------------------------------------------
/src/validation/metrics/human_pose/rmse.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from moai.monads.utils.common import dim_list
 4 | 
 5 | import torch
 6 | import numpy
 7 | 
 8 | 
 9 | class RMSE(torch.nn.Module):
10 |     def __init__(self):
11 |         super(RMSE, self).__init__()
12 |         self.counter_m = 0
13 |         self.counter_j = 0
14 |         self.per_joint_results = numpy.zeros([0, 19])
15 |         self.per_marker_results = numpy.zeros([0, 53])
16 |         if not os.path.exists("csv"):
17 |             os.mkdir("csv")
18 | 
19 |     def forward(self,
20 |         gt:         torch.Tensor,
21 |         pred:       torch.Tensor
22 |     ) -> torch.Tensor:
23 |         diff_sq = torch.norm(gt - pred, p=2, dim=-1, keepdim=False) ** 2   
24 | 
25 |         if gt.size()[1] == 53:
26 |             self.per_marker_results = numpy.vstack([self.per_marker_results, diff_sq.cpu().numpy()])
27 |             numpy.savetxt("csv/rmse_markers.csv", self.per_marker_results, delimiter=',')
28 |         else: 
29 |             self.per_joint_results = numpy.vstack([self.per_joint_results, diff_sq.cpu().numpy()])
30 |             numpy.savetxt("csv/rmse_joints.csv", self.per_joint_results, delimiter=',')
31 | 
32 |         return torch.mean(torch.sqrt(torch.mean(diff_sq, dim=-1)))


--------------------------------------------------------------------------------