├── .gitignore
├── LICENSE
├── README.md
├── config.yaml
├── configs
    ├── ViTPose_base_coco_256x192.py
    ├── ViTPose_base_simple_coco_256x192.py
    ├── ViTPose_huge_coco_256x192.py
    ├── ViTPose_huge_simple_coco_256x192.py
    ├── ViTPose_large_coco_256x192.py
    └── ViTPose_large_simple_coco_256x192.py
├── datasets
    ├── COCO.py
    └── HumanPoseEstimation.py
├── examples
    ├── .DS_Store
    ├── img1.jpg
    └── img1_result.jpg
├── inference.py
├── models
    ├── __init__.py
    ├── backbone
    │   └── vit.py
    ├── head
    │   ├── topdown_heatmap_base_head.py
    │   └── topdown_heatmap_simple_head.py
    ├── losses
    │   ├── __init__.py
    │   ├── classfication_loss.py
    │   ├── heatmap_loss.py
    │   ├── mesh_loss.py
    │   ├── mse_loss.py
    │   ├── multi_loss_factory.py
    │   └── regression_loss.py
    ├── model.py
    └── optimizer.py
├── requirements.txt
├── to_onnx.ipynb
├── train.py
└── utils
    ├── __init__.py
    ├── dist_util.py
    ├── logging.py
    ├── nms
        ├── __init__.py
        ├── cpu_nms.c
        ├── cpu_nms.cpython-37m-x86_64-linux-gnu.so
        ├── cpu_nms.cpython-39-x86_64-linux-gnu.so
        ├── cpu_nms.pyx
        ├── gpu_nms.cpp
        ├── gpu_nms.cpython-37m-x86_64-linux-gnu.so
        ├── gpu_nms.cpython-39-x86_64-linux-gnu.so
        ├── gpu_nms.cu
        ├── gpu_nms.hpp
        ├── gpu_nms.pyx
        ├── nms.py
        ├── nms_kernel.cu
        ├── nms_ori.py
        └── setup_linux.py
    ├── post_processing
        ├── __init__.py
        ├── group.py
        ├── nms.py
        ├── one_euro_filter.py
        └── post_transforms.py
    ├── top_down_eval.py
    ├── train_valid_fn.py
    ├── transform.py
    ├── util.py
    └── visualization.py


/.gitignore:
--------------------------------------------------------------------------------
1 | **/*.pth
2 | **/*.pt
3 | **/__pycache__
4 | **/coco/
5 | *.onnx
6 | .DS_Store
7 | runs
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ViTPose (simple version w/o mmcv)
 2 | An unofficial implementation of `ViTPose` [Y. Xu et al., 2022] <br>
 3 | ![result_image](./examples/img1_result.jpg "Result Image")
 4 | 
 5 | ## Usage
 6 | ### | **Inference**
 7 | ```
 8 | python inference.py --image-path './examples/img1.jpg'
 9 | ```
10 | 
11 | ### | **Training**
12 | ```
13 | python train.py --config-path config.yaml --model-name 'b'
14 | ```
15 | - `model_name` must be in (`b`, `l`, `h`)
16 | 
17 | 
18 | ## Note
19 | 1.  Download the trained model (.pth)
20 |     - [ViTPose-B-Multi-COCO.pth](https://1drv.ms/u/s!AimBgYV7JjTlgSrlMB093JzJtqq-?e=Jr5S3R)
21 |     - [ViTPose-L-Multi-COCO.pth](https://1drv.ms/u/s!AimBgYV7JjTlgTBm3dCVmBUbHYT6?e=fHUrTq)
22 |     - [ViTPose-H-Multi-COCO.pth](https://1drv.ms/u/s!AimBgYV7JjTlgS5rLeRAJiWobCdh?e=41GsDd)
23 | 2. Set the config. according to the trained model
24 |     - [ViTPose-B-COCO-256x192](/Users/jaehyun/workspace/ViTPose_pytorch/configs/ViTPose_base_coco_256x192.py) 
25 |     - [ViTPose-L-COCO-256x192](/Users/jaehyun/workspace/ViTPose_pytorch/configs/ViTPose_large_coco_256x192.py) 
26 |     - [ViTPose-H-COCO-256x192](/Users/jaehyun/workspace/ViTPose_pytorch/configs/ViTPose_huge_coco_256x192.py) 
27 | 
28 | ---
29 | ## Reference
30 | All codes were written with reference to [the official ViTPose repo.](https://github.com/ViTAE-Transformer/ViTPose)


--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
 1 | # Train config ---------------------------------------
 2 | log_level: logging.INFO
 3 | seed: 0
 4 | deterministic: True # whether not to evaluate the checkpoint during training
 5 | cudnn_benchmark: True # Use cudnn 
 6 | resume_from: "/home/jaehyun/workspace/PoseEstimation/ViTPose_pytorch/vitpose-b-multi-coco.pth" # CKPT path
 7 | gpu_ids: [0]
 8 | launcher: 'none' # When distributed training ['none', 'pytorch', 'slurm', 'mpi']
 9 | use_amp: True
10 | validate: True
11 | 
12 | autoscale_lr: True # automatically scale lr with the number of gpus
13 | 
14 | dist_params:
15 |   ...
16 | 


--------------------------------------------------------------------------------
/configs/ViTPose_base_coco_256x192.py:
--------------------------------------------------------------------------------
  1 | # _base_ = [
  2 | #     '../../../../_base_/default_runtime.py',
  3 | #     '../../../../_base_/datasets/coco.py'
  4 | # ]
  5 | evaluation = dict(interval=10, metric='mAP', save_best='AP')
  6 | 
  7 | optimizer = dict(type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1,
  8 |                  constructor='LayerDecayOptimizerConstructor', 
  9 |                  paramwise_cfg=dict(
 10 |                                     num_layers=12, 
 11 |                                     layer_decay_rate=0.75,
 12 |                                     custom_keys={
 13 |                                             'bias': dict(decay_multi=0.),
 14 |                                             'pos_embed': dict(decay_mult=0.),
 15 |                                             'relative_position_bias_table': dict(decay_mult=0.),
 16 |                                             'norm': dict(decay_mult=0.)
 17 |                                             }
 18 |                                     )
 19 |                 )
 20 | 
 21 | optimizer_config = dict(grad_clip=dict(max_norm=1., norm_type=2))
 22 | 
 23 | # learning policy
 24 | lr_config = dict(
 25 |     policy='step',
 26 |     warmup='linear',
 27 |     warmup_iters=500,
 28 |     warmup_ratio=0.001,
 29 |     step=[170, 200])
 30 | 
 31 | total_epochs = 210
 32 | target_type = 'GaussianHeatmap'
 33 | channel_cfg = dict(
 34 |     num_output_channels=17,
 35 |     dataset_joints=17,
 36 |     dataset_channel=[
 37 |         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
 38 |     ],
 39 |     inference_channel=[
 40 |         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
 41 |     ])
 42 | 
 43 | # model settings
 44 | model = dict(
 45 |     type='TopDown',
 46 |     pretrained=None,
 47 |     backbone=dict(
 48 |         type='ViT',
 49 |         img_size=(256, 192),
 50 |         patch_size=16,
 51 |         embed_dim=768,
 52 |         depth=12,
 53 |         num_heads=12,
 54 |         ratio=1,
 55 |         use_checkpoint=False,
 56 |         mlp_ratio=4,
 57 |         qkv_bias=True,
 58 |         drop_path_rate=0.3,
 59 |     ),
 60 |     keypoint_head=dict(
 61 |         type='TopdownHeatmapSimpleHead',
 62 |         in_channels=768,
 63 |         num_deconv_layers=2,
 64 |         num_deconv_filters=(256, 256),
 65 |         num_deconv_kernels=(4, 4),
 66 |         extra=dict(final_conv_kernel=1, ),
 67 |         out_channels=channel_cfg['num_output_channels'],
 68 |         loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
 69 |     train_cfg=dict(),
 70 |     test_cfg=dict(
 71 |         flip_test=True,
 72 |         post_process='default',
 73 |         shift_heatmap=False,
 74 |         target_type=target_type,
 75 |         modulate_kernel=11,
 76 |         use_udp=True))
 77 | 
 78 | data_cfg = dict(
 79 |     image_size=[192, 256],
 80 |     heatmap_size=[48, 64],
 81 |     num_output_channels=channel_cfg['num_output_channels'],
 82 |     num_joints=channel_cfg['dataset_joints'],
 83 |     dataset_channel=channel_cfg['dataset_channel'],
 84 |     inference_channel=channel_cfg['inference_channel'],
 85 |     soft_nms=False,
 86 |     nms_thr=1.0,
 87 |     oks_thr=0.9,
 88 |     vis_thr=0.2,
 89 |     use_gt_bbox=False,
 90 |     det_bbox_thr=0.0,
 91 |     bbox_file='data/coco/person_detection_results/'
 92 |     'COCO_val2017_detections_AP_H_56_person.json',
 93 | )
 94 | 
 95 | train_pipeline = [
 96 |     dict(type='LoadImageFromFile'),
 97 |     dict(type='TopDownRandomFlip', flip_prob=0.5),
 98 |     dict(
 99 |         type='TopDownHalfBodyTransform',
100 |         num_joints_half_body=8,
101 |         prob_half_body=0.3),
102 |     dict(
103 |         type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
104 |     dict(type='TopDownAffine', use_udp=True),
105 |     dict(type='ToTensor'),
106 |     dict(
107 |         type='NormalizeTensor',
108 |         mean=[0.485, 0.456, 0.406],
109 |         std=[0.229, 0.224, 0.225]),
110 |     dict(
111 |         type='TopDownGenerateTarget',
112 |         sigma=2,
113 |         encoding='UDP',
114 |         target_type=target_type),
115 |     dict(
116 |         type='Collect',
117 |         keys=['img', 'target', 'target_weight'],
118 |         meta_keys=[
119 |             'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
120 |             'rotation', 'bbox_score', 'flip_pairs'
121 |         ]),
122 | ]
123 | 
124 | val_pipeline = [
125 |     dict(type='LoadImageFromFile'),
126 |     dict(type='TopDownAffine', use_udp=True),
127 |     dict(type='ToTensor'),
128 |     dict(
129 |         type='NormalizeTensor',
130 |         mean=[0.485, 0.456, 0.406],
131 |         std=[0.229, 0.224, 0.225]),
132 |     dict(
133 |         type='Collect',
134 |         keys=['img'],
135 |         meta_keys=[
136 |             'image_file', 'center', 'scale', 'rotation', 'bbox_score',
137 |             'flip_pairs'
138 |         ]),
139 | ]
140 | 
141 | test_pipeline = val_pipeline
142 | 
143 | data_root = 'datasets/coco'
144 | data = dict(
145 |     samples_per_gpu=32,
146 |     workers_per_gpu=4,
147 |     val_dataloader=dict(samples_per_gpu=32),
148 |     test_dataloader=dict(samples_per_gpu=32),
149 |     train=dict(
150 |         type='TopDownCocoDataset',
151 |         ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
152 |         img_prefix=f'{data_root}/train2017/',
153 |         data_cfg=data_cfg,
154 |         pipeline=train_pipeline),
155 |     val=dict(
156 |         type='TopDownCocoDataset',
157 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
158 |         img_prefix=f'{data_root}/val2017/',
159 |         data_cfg=data_cfg,
160 |         pipeline=val_pipeline),
161 |     test=dict(
162 |         type='TopDownCocoDataset',
163 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
164 |         img_prefix=f'{data_root}/val2017/',
165 |         data_cfg=data_cfg,
166 |         pipeline=test_pipeline)
167 | )
168 | 
169 | 


--------------------------------------------------------------------------------
/configs/ViTPose_base_simple_coco_256x192.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../../../../_base_/default_runtime.py',
  3 |     '../../../../_base_/datasets/coco.py'
  4 | ]
  5 | 
  6 | evaluation = dict(interval=10, metric='mAP', save_best='AP')
  7 | 
  8 | optimizer = dict(type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1,
  9 |                  constructor='LayerDecayOptimizerConstructor', 
 10 |                  paramwise_cfg=dict(
 11 |                                     num_layers=12, 
 12 |                                     layer_decay_rate=0.75,
 13 |                                     custom_keys={
 14 |                                             'bias': dict(decay_multi=0.),
 15 |                                             'pos_embed': dict(decay_mult=0.),
 16 |                                             'relative_position_bias_table': dict(decay_mult=0.),
 17 |                                             'norm': dict(decay_mult=0.)
 18 |                                             }
 19 |                                     )
 20 |                 )
 21 | 
 22 | optimizer_config = dict(grad_clip=dict(max_norm=1., norm_type=2))
 23 | 
 24 | # learning policy
 25 | lr_config = dict(
 26 |     policy='step',
 27 |     warmup='linear',
 28 |     warmup_iters=500,
 29 |     warmup_ratio=0.001,
 30 |     step=[170, 200])
 31 | total_epochs = 210
 32 | target_type = 'GaussianHeatmap'
 33 | channel_cfg = dict(
 34 |     num_output_channels=17,
 35 |     dataset_joints=17,
 36 |     dataset_channel=[
 37 |         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
 38 |     ],
 39 |     inference_channel=[
 40 |         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
 41 |     ])
 42 | 
 43 | # model settings
 44 | model = dict(
 45 |     type='TopDown',
 46 |     pretrained=None,
 47 |     backbone=dict(
 48 |         type='ViT',
 49 |         img_size=(256, 192),
 50 |         patch_size=16,
 51 |         embed_dim=768,
 52 |         depth=12,
 53 |         num_heads=12,
 54 |         ratio=1,
 55 |         use_checkpoint=False,
 56 |         mlp_ratio=4,
 57 |         qkv_bias=True,
 58 |         drop_path_rate=0.3,
 59 |     ),
 60 |     keypoint_head=dict(
 61 |         type='TopdownHeatmapSimpleHead',
 62 |         in_channels=768,
 63 |         num_deconv_layers=0,
 64 |         num_deconv_filters=[],
 65 |         num_deconv_kernels=[],
 66 |         upsample=4,
 67 |         extra=dict(final_conv_kernel=3, ),
 68 |         out_channels=channel_cfg['num_output_channels'],
 69 |         loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
 70 |     train_cfg=dict(),
 71 |     test_cfg=dict(
 72 |         flip_test=True,
 73 |         post_process='default',
 74 |         shift_heatmap=False,
 75 |         target_type=target_type,
 76 |         modulate_kernel=11,
 77 |         use_udp=True))
 78 | 
 79 | data_cfg = dict(
 80 |     image_size=[192, 256],
 81 |     heatmap_size=[48, 64],
 82 |     num_output_channels=channel_cfg['num_output_channels'],
 83 |     num_joints=channel_cfg['dataset_joints'],
 84 |     dataset_channel=channel_cfg['dataset_channel'],
 85 |     inference_channel=channel_cfg['inference_channel'],
 86 |     soft_nms=False,
 87 |     nms_thr=1.0,
 88 |     oks_thr=0.9,
 89 |     vis_thr=0.2,
 90 |     use_gt_bbox=False,
 91 |     det_bbox_thr=0.0,
 92 |     bbox_file='data/coco/person_detection_results/'
 93 |     'COCO_val2017_detections_AP_H_56_person.json',
 94 | )
 95 | 
 96 | train_pipeline = [
 97 |     dict(type='LoadImageFromFile'),
 98 |     dict(type='TopDownRandomFlip', flip_prob=0.5),
 99 |     dict(
100 |         type='TopDownHalfBodyTransform',
101 |         num_joints_half_body=8,
102 |         prob_half_body=0.3),
103 |     dict(
104 |         type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
105 |     dict(type='TopDownAffine', use_udp=True),
106 |     dict(type='ToTensor'),
107 |     dict(
108 |         type='NormalizeTensor',
109 |         mean=[0.485, 0.456, 0.406],
110 |         std=[0.229, 0.224, 0.225]),
111 |     dict(
112 |         type='TopDownGenerateTarget',
113 |         sigma=2,
114 |         encoding='UDP',
115 |         target_type=target_type),
116 |     dict(
117 |         type='Collect',
118 |         keys=['img', 'target', 'target_weight'],
119 |         meta_keys=[
120 |             'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
121 |             'rotation', 'bbox_score', 'flip_pairs'
122 |         ]),
123 | ]
124 | 
125 | val_pipeline = [
126 |     dict(type='LoadImageFromFile'),
127 |     dict(type='TopDownAffine', use_udp=True),
128 |     dict(type='ToTensor'),
129 |     dict(
130 |         type='NormalizeTensor',
131 |         mean=[0.485, 0.456, 0.406],
132 |         std=[0.229, 0.224, 0.225]),
133 |     dict(
134 |         type='Collect',
135 |         keys=['img'],
136 |         meta_keys=[
137 |             'image_file', 'center', 'scale', 'rotation', 'bbox_score',
138 |             'flip_pairs'
139 |         ]),
140 | ]
141 | 
142 | test_pipeline = val_pipeline
143 | 
144 | data_root = 'datasets/coco'
145 | data = dict(
146 |     samples_per_gpu=64,
147 |     workers_per_gpu=4,
148 |     val_dataloader=dict(samples_per_gpu=32),
149 |     test_dataloader=dict(samples_per_gpu=32),
150 |     train=dict(
151 |         type='TopDownCocoDataset',
152 |         ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
153 |         img_prefix=f'{data_root}/train2017/',
154 |         data_cfg=data_cfg,
155 |         pipeline=train_pipeline,
156 |         dataset_info={{_base_.dataset_info}}),
157 |     val=dict(
158 |         type='TopDownCocoDataset',
159 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
160 |         img_prefix=f'{data_root}/val2017/',
161 |         data_cfg=data_cfg,
162 |         pipeline=val_pipeline,
163 |         dataset_info={{_base_.dataset_info}}),
164 |     test=dict(
165 |         type='TopDownCocoDataset',
166 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
167 |         img_prefix=f'{data_root}/val2017/',
168 |         data_cfg=data_cfg,
169 |         pipeline=test_pipeline,
170 |         dataset_info={{_base_.dataset_info}}),
171 | )
172 | 
173 | 


--------------------------------------------------------------------------------
/configs/ViTPose_huge_coco_256x192.py:
--------------------------------------------------------------------------------
  1 | # _base_ = [
  2 | #     '../../../../_base_/default_runtime.py',
  3 | #     '../../../../_base_/datasets/coco.py'
  4 | # ]
  5 | evaluation = dict(interval=10, metric='mAP', save_best='AP')
  6 | 
  7 | optimizer = dict(type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1,
  8 |                  constructor='LayerDecayOptimizerConstructor', 
  9 |                  paramwise_cfg=dict(
 10 |                                     num_layers=32, 
 11 |                                     layer_decay_rate=0.85,
 12 |                                     custom_keys={
 13 |                                             'bias': dict(decay_multi=0.),
 14 |                                             'pos_embed': dict(decay_mult=0.),
 15 |                                             'relative_position_bias_table': dict(decay_mult=0.),
 16 |                                             'norm': dict(decay_mult=0.)
 17 |                                             }
 18 |                                     )
 19 |                 )
 20 | 
 21 | optimizer_config = dict(grad_clip=dict(max_norm=1., norm_type=2))
 22 | 
 23 | # learning policy
 24 | lr_config = dict(
 25 |     policy='step',
 26 |     warmup='linear',
 27 |     warmup_iters=500,
 28 |     warmup_ratio=0.001,
 29 |     step=[170, 200])
 30 | total_epochs = 210
 31 | target_type = 'GaussianHeatmap'
 32 | channel_cfg = dict(
 33 |     num_output_channels=17,
 34 |     dataset_joints=17,
 35 |     dataset_channel=[
 36 |         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
 37 |     ],
 38 |     inference_channel=[
 39 |         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
 40 |     ])
 41 | 
 42 | # model settings
 43 | model = dict(
 44 |     type='TopDown',
 45 |     pretrained=None,
 46 |     backbone=dict(
 47 |         type='ViT',
 48 |         img_size=(256, 192),
 49 |         patch_size=16,
 50 |         embed_dim=1280,
 51 |         depth=32,
 52 |         num_heads=16,
 53 |         ratio=1,
 54 |         use_checkpoint=False,
 55 |         mlp_ratio=4,
 56 |         qkv_bias=True,
 57 |         drop_path_rate=0.55,
 58 |     ),
 59 |     keypoint_head=dict(
 60 |         type='TopdownHeatmapSimpleHead',
 61 |         in_channels=1280,
 62 |         num_deconv_layers=2,
 63 |         num_deconv_filters=(256, 256),
 64 |         num_deconv_kernels=(4, 4),
 65 |         extra=dict(final_conv_kernel=1, ),
 66 |         out_channels=channel_cfg['num_output_channels'],
 67 |         loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
 68 |     train_cfg=dict(),
 69 |     test_cfg=dict(
 70 |         flip_test=True,
 71 |         post_process='default',
 72 |         shift_heatmap=False,
 73 |         target_type=target_type,
 74 |         modulate_kernel=11,
 75 |         use_udp=True))
 76 | 
 77 | data_cfg = dict(
 78 |     image_size=[192, 256],
 79 |     heatmap_size=[48, 64],
 80 |     num_output_channels=channel_cfg['num_output_channels'],
 81 |     num_joints=channel_cfg['dataset_joints'],
 82 |     dataset_channel=channel_cfg['dataset_channel'],
 83 |     inference_channel=channel_cfg['inference_channel'],
 84 |     soft_nms=False,
 85 |     nms_thr=1.0,
 86 |     oks_thr=0.9,
 87 |     vis_thr=0.2,
 88 |     use_gt_bbox=False,
 89 |     det_bbox_thr=0.0,
 90 |     bbox_file='data/coco/person_detection_results/'
 91 |     'COCO_val2017_detections_AP_H_56_person.json',
 92 | )
 93 | 
 94 | train_pipeline = [
 95 |     dict(type='LoadImageFromFile'),
 96 |     dict(type='TopDownRandomFlip', flip_prob=0.5),
 97 |     dict(
 98 |         type='TopDownHalfBodyTransform',
 99 |         num_joints_half_body=8,
100 |         prob_half_body=0.3),
101 |     dict(
102 |         type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
103 |     dict(type='TopDownAffine', use_udp=True),
104 |     dict(type='ToTensor'),
105 |     dict(
106 |         type='NormalizeTensor',
107 |         mean=[0.485, 0.456, 0.406],
108 |         std=[0.229, 0.224, 0.225]),
109 |     dict(
110 |         type='TopDownGenerateTarget',
111 |         sigma=2,
112 |         encoding='UDP',
113 |         target_type=target_type),
114 |     dict(
115 |         type='Collect',
116 |         keys=['img', 'target', 'target_weight'],
117 |         meta_keys=[
118 |             'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
119 |             'rotation', 'bbox_score', 'flip_pairs'
120 |         ]),
121 | ]
122 | 
123 | val_pipeline = [
124 |     dict(type='LoadImageFromFile'),
125 |     dict(type='TopDownAffine', use_udp=True),
126 |     dict(type='ToTensor'),
127 |     dict(
128 |         type='NormalizeTensor',
129 |         mean=[0.485, 0.456, 0.406],
130 |         std=[0.229, 0.224, 0.225]),
131 |     dict(
132 |         type='Collect',
133 |         keys=['img'],
134 |         meta_keys=[
135 |             'image_file', 'center', 'scale', 'rotation', 'bbox_score',
136 |             'flip_pairs'
137 |         ]),
138 | ]
139 | 
140 | test_pipeline = val_pipeline
141 | 
142 | data_root = 'datasets/coco'
143 | data = dict(
144 |     samples_per_gpu=64,
145 |     workers_per_gpu=4,
146 |     val_dataloader=dict(samples_per_gpu=32),
147 |     test_dataloader=dict(samples_per_gpu=32),
148 |     train=dict(
149 |         type='TopDownCocoDataset',
150 |         ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
151 |         img_prefix=f'{data_root}/train2017/',
152 |         data_cfg=data_cfg,
153 |         pipeline=train_pipeline),
154 |     val=dict(
155 |         type='TopDownCocoDataset',
156 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
157 |         img_prefix=f'{data_root}/val2017/',
158 |         data_cfg=data_cfg,
159 |         pipeline=val_pipeline),
160 |     test=dict(
161 |         type='TopDownCocoDataset',
162 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
163 |         img_prefix=f'{data_root}/val2017/',
164 |         data_cfg=data_cfg,
165 |         pipeline=test_pipeline)
166 | )
167 | 
168 | 


--------------------------------------------------------------------------------
/configs/ViTPose_huge_simple_coco_256x192.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../../../../_base_/default_runtime.py',
  3 |     '../../../../_base_/datasets/coco.py'
  4 | ]
  5 | evaluation = dict(interval=10, metric='mAP', save_best='AP')
  6 | 
  7 | optimizer = dict(type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1,
  8 |                  constructor='LayerDecayOptimizerConstructor', 
  9 |                  paramwise_cfg=dict(
 10 |                                     num_layers=32, 
 11 |                                     layer_decay_rate=0.85,
 12 |                                     custom_keys={
 13 |                                             'bias': dict(decay_multi=0.),
 14 |                                             'pos_embed': dict(decay_mult=0.),
 15 |                                             'relative_position_bias_table': dict(decay_mult=0.),
 16 |                                             'norm': dict(decay_mult=0.)
 17 |                                             }
 18 |                                     )
 19 |                 )
 20 | 
 21 | optimizer_config = dict(grad_clip=dict(max_norm=1., norm_type=2))
 22 | 
 23 | # learning policy
 24 | lr_config = dict(
 25 |     policy='step',
 26 |     warmup='linear',
 27 |     warmup_iters=500,
 28 |     warmup_ratio=0.001,
 29 |     step=[170, 200])
 30 | total_epochs = 210
 31 | target_type = 'GaussianHeatmap'
 32 | channel_cfg = dict(
 33 |     num_output_channels=17,
 34 |     dataset_joints=17,
 35 |     dataset_channel=[
 36 |         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
 37 |     ],
 38 |     inference_channel=[
 39 |         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
 40 |     ])
 41 | 
 42 | # model settings
 43 | model = dict(
 44 |     type='TopDown',
 45 |     pretrained=None,
 46 |     backbone=dict(
 47 |         type='ViT',
 48 |         img_size=(256, 192),
 49 |         patch_size=16,
 50 |         embed_dim=1280,
 51 |         depth=32,
 52 |         num_heads=16,
 53 |         ratio=1,
 54 |         use_checkpoint=False,
 55 |         mlp_ratio=4,
 56 |         qkv_bias=True,
 57 |         drop_path_rate=0.55,
 58 |     ),
 59 |     keypoint_head=dict(
 60 |         type='TopdownHeatmapSimpleHead',
 61 |         in_channels=1280,
 62 |         num_deconv_layers=0,
 63 |         num_deconv_filters=[],
 64 |         num_deconv_kernels=[],
 65 |         upsample=4,
 66 |         extra=dict(final_conv_kernel=3, ),
 67 |         out_channels=channel_cfg['num_output_channels'],
 68 |         loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
 69 |     train_cfg=dict(),
 70 |     test_cfg=dict(
 71 |         flip_test=True,
 72 |         post_process='default',
 73 |         shift_heatmap=False,
 74 |         target_type=target_type,
 75 |         modulate_kernel=11,
 76 |         use_udp=True))
 77 | 
 78 | data_cfg = dict(
 79 |     image_size=[192, 256],
 80 |     heatmap_size=[48, 64],
 81 |     num_output_channels=channel_cfg['num_output_channels'],
 82 |     num_joints=channel_cfg['dataset_joints'],
 83 |     dataset_channel=channel_cfg['dataset_channel'],
 84 |     inference_channel=channel_cfg['inference_channel'],
 85 |     soft_nms=False,
 86 |     nms_thr=1.0,
 87 |     oks_thr=0.9,
 88 |     vis_thr=0.2,
 89 |     use_gt_bbox=False,
 90 |     det_bbox_thr=0.0,
 91 |     bbox_file='data/coco/person_detection_results/'
 92 |     'COCO_val2017_detections_AP_H_56_person.json',
 93 | )
 94 | 
 95 | train_pipeline = [
 96 |     dict(type='LoadImageFromFile'),
 97 |     dict(type='TopDownRandomFlip', flip_prob=0.5),
 98 |     dict(
 99 |         type='TopDownHalfBodyTransform',
100 |         num_joints_half_body=8,
101 |         prob_half_body=0.3),
102 |     dict(
103 |         type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
104 |     dict(type='TopDownAffine', use_udp=True),
105 |     dict(type='ToTensor'),
106 |     dict(
107 |         type='NormalizeTensor',
108 |         mean=[0.485, 0.456, 0.406],
109 |         std=[0.229, 0.224, 0.225]),
110 |     dict(
111 |         type='TopDownGenerateTarget',
112 |         sigma=2,
113 |         encoding='UDP',
114 |         target_type=target_type),
115 |     dict(
116 |         type='Collect',
117 |         keys=['img', 'target', 'target_weight'],
118 |         meta_keys=[
119 |             'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
120 |             'rotation', 'bbox_score', 'flip_pairs'
121 |         ]),
122 | ]
123 | 
124 | val_pipeline = [
125 |     dict(type='LoadImageFromFile'),
126 |     dict(type='TopDownAffine', use_udp=True),
127 |     dict(type='ToTensor'),
128 |     dict(
129 |         type='NormalizeTensor',
130 |         mean=[0.485, 0.456, 0.406],
131 |         std=[0.229, 0.224, 0.225]),
132 |     dict(
133 |         type='Collect',
134 |         keys=['img'],
135 |         meta_keys=[
136 |             'image_file', 'center', 'scale', 'rotation', 'bbox_score',
137 |             'flip_pairs'
138 |         ]),
139 | ]
140 | 
141 | test_pipeline = val_pipeline
142 | 
143 | data_root = 'datasets/coco'
144 | data = dict(
145 |     samples_per_gpu=64,
146 |     workers_per_gpu=4,
147 |     val_dataloader=dict(samples_per_gpu=32),
148 |     test_dataloader=dict(samples_per_gpu=32),
149 |     train=dict(
150 |         type='TopDownCocoDataset',
151 |         ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
152 |         img_prefix=f'{data_root}/train2017/',
153 |         data_cfg=data_cfg,
154 |         pipeline=train_pipeline,
155 |         dataset_info={{_base_.dataset_info}}),
156 |     val=dict(
157 |         type='TopDownCocoDataset',
158 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
159 |         img_prefix=f'{data_root}/val2017/',
160 |         data_cfg=data_cfg,
161 |         pipeline=val_pipeline,
162 |         dataset_info={{_base_.dataset_info}}),
163 |     test=dict(
164 |         type='TopDownCocoDataset',
165 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
166 |         img_prefix=f'{data_root}/val2017/',
167 |         data_cfg=data_cfg,
168 |         pipeline=test_pipeline,
169 |         dataset_info={{_base_.dataset_info}}),
170 | )
171 | 
172 | 


--------------------------------------------------------------------------------
/configs/ViTPose_large_coco_256x192.py:
--------------------------------------------------------------------------------
  1 | # _base_ = [
  2 | #     '../../../../_base_/default_runtime.py',
  3 | #     '../../../../_base_/datasets/coco.py'
  4 | # ]
  5 | evaluation = dict(interval=10, metric='mAP', save_best='AP')
  6 | 
  7 | optimizer = dict(type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1,
  8 |                  constructor='LayerDecayOptimizerConstructor', 
  9 |                  paramwise_cfg=dict(
 10 |                                     num_layers=16, 
 11 |                                     layer_decay_rate=0.8,
 12 |                                     custom_keys={
 13 |                                             'bias': dict(decay_multi=0.),
 14 |                                             'pos_embed': dict(decay_mult=0.),
 15 |                                             'relative_position_bias_table': dict(decay_mult=0.),
 16 |                                             'norm': dict(decay_mult=0.)
 17 |                                             }
 18 |                                     )
 19 |                 )
 20 | 
 21 | optimizer_config = dict(grad_clip=dict(max_norm=1., norm_type=2))
 22 | 
 23 | # learning policy
 24 | lr_config = dict(
 25 |     policy='step',
 26 |     warmup='linear',
 27 |     warmup_iters=500,
 28 |     warmup_ratio=0.001,
 29 |     step=[170, 200])
 30 | total_epochs = 210
 31 | target_type = 'GaussianHeatmap'
 32 | channel_cfg = dict(
 33 |     num_output_channels=17,
 34 |     dataset_joints=17,
 35 |     dataset_channel=[
 36 |         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
 37 |     ],
 38 |     inference_channel=[
 39 |         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
 40 |     ])
 41 | 
 42 | # model settings
 43 | model = dict(
 44 |     type='TopDown',
 45 |     pretrained=None,
 46 |     backbone=dict(
 47 |         type='ViT',
 48 |         img_size=(256, 192),
 49 |         patch_size=16,
 50 |         embed_dim=1024,
 51 |         depth=24,
 52 |         num_heads=16,
 53 |         ratio=1,
 54 |         use_checkpoint=False,
 55 |         mlp_ratio=4,
 56 |         qkv_bias=True,
 57 |         drop_path_rate=0.5,
 58 |     ),
 59 |     keypoint_head=dict(
 60 |         type='TopdownHeatmapSimpleHead',
 61 |         in_channels=1024,
 62 |         num_deconv_layers=2,
 63 |         num_deconv_filters=(256, 256),
 64 |         num_deconv_kernels=(4, 4),
 65 |         extra=dict(final_conv_kernel=1, ),
 66 |         out_channels=channel_cfg['num_output_channels'],
 67 |         loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
 68 |     train_cfg=dict(),
 69 |     test_cfg=dict(
 70 |         flip_test=True,
 71 |         post_process='default',
 72 |         shift_heatmap=False,
 73 |         target_type=target_type,
 74 |         modulate_kernel=11,
 75 |         use_udp=True))
 76 | 
 77 | data_cfg = dict(
 78 |     image_size=[192, 256],
 79 |     heatmap_size=[48, 64],
 80 |     num_output_channels=channel_cfg['num_output_channels'],
 81 |     num_joints=channel_cfg['dataset_joints'],
 82 |     dataset_channel=channel_cfg['dataset_channel'],
 83 |     inference_channel=channel_cfg['inference_channel'],
 84 |     soft_nms=False,
 85 |     nms_thr=1.0,
 86 |     oks_thr=0.9,
 87 |     vis_thr=0.2,
 88 |     use_gt_bbox=False,
 89 |     det_bbox_thr=0.0,
 90 |     bbox_file='data/coco/person_detection_results/'
 91 |     'COCO_val2017_detections_AP_H_56_person.json',
 92 | )
 93 | 
 94 | train_pipeline = [
 95 |     dict(type='LoadImageFromFile'),
 96 |     dict(type='TopDownRandomFlip', flip_prob=0.5),
 97 |     dict(
 98 |         type='TopDownHalfBodyTransform',
 99 |         num_joints_half_body=8,
100 |         prob_half_body=0.3),
101 |     dict(
102 |         type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
103 |     dict(type='TopDownAffine', use_udp=True),
104 |     dict(type='ToTensor'),
105 |     dict(
106 |         type='NormalizeTensor',
107 |         mean=[0.485, 0.456, 0.406],
108 |         std=[0.229, 0.224, 0.225]),
109 |     dict(
110 |         type='TopDownGenerateTarget',
111 |         sigma=2,
112 |         encoding='UDP',
113 |         target_type=target_type),
114 |     dict(
115 |         type='Collect',
116 |         keys=['img', 'target', 'target_weight'],
117 |         meta_keys=[
118 |             'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
119 |             'rotation', 'bbox_score', 'flip_pairs'
120 |         ]),
121 | ]
122 | 
123 | val_pipeline = [
124 |     dict(type='LoadImageFromFile'),
125 |     dict(type='TopDownAffine', use_udp=True),
126 |     dict(type='ToTensor'),
127 |     dict(
128 |         type='NormalizeTensor',
129 |         mean=[0.485, 0.456, 0.406],
130 |         std=[0.229, 0.224, 0.225]),
131 |     dict(
132 |         type='Collect',
133 |         keys=['img'],
134 |         meta_keys=[
135 |             'image_file', 'center', 'scale', 'rotation', 'bbox_score',
136 |             'flip_pairs'
137 |         ]),
138 | ]
139 | 
140 | test_pipeline = val_pipeline
141 | 
142 | data_root = 'datasets/coco'
143 | data = dict(
144 |     samples_per_gpu=64,
145 |     workers_per_gpu=4,
146 |     val_dataloader=dict(samples_per_gpu=32),
147 |     test_dataloader=dict(samples_per_gpu=32),
148 |     train=dict(
149 |         type='TopDownCocoDataset',
150 |         ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
151 |         img_prefix=f'{data_root}/train2017/',
152 |         data_cfg=data_cfg,
153 |         pipeline=train_pipeline),
154 |     val=dict(
155 |         type='TopDownCocoDataset',
156 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
157 |         img_prefix=f'{data_root}/val2017/',
158 |         data_cfg=data_cfg,
159 |         pipeline=val_pipeline),
160 |     test=dict(
161 |         type='TopDownCocoDataset',
162 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
163 |         img_prefix=f'{data_root}/val2017/',
164 |         data_cfg=data_cfg,
165 |         pipeline=test_pipeline)
166 | )
167 | 
168 | 


--------------------------------------------------------------------------------
/configs/ViTPose_large_simple_coco_256x192.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../../../../_base_/default_runtime.py',
  3 |     '../../../../_base_/datasets/coco.py'
  4 | ]
  5 | evaluation = dict(interval=10, metric='mAP', save_best='AP')
  6 | 
  7 | optimizer = dict(type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1,
  8 |                  constructor='LayerDecayOptimizerConstructor', 
  9 |                  paramwise_cfg=dict(
 10 |                                     num_layers=24, 
 11 |                                     layer_decay_rate=0.8,
 12 |                                     custom_keys={
 13 |                                             'bias': dict(decay_multi=0.),
 14 |                                             'pos_embed': dict(decay_mult=0.),
 15 |                                             'relative_position_bias_table': dict(decay_mult=0.),
 16 |                                             'norm': dict(decay_mult=0.)
 17 |                                             }
 18 |                                     )
 19 |                 )
 20 | 
 21 | optimizer_config = dict(grad_clip=dict(max_norm=1., norm_type=2))
 22 | 
 23 | # learning policy
 24 | lr_config = dict(
 25 |     policy='step',
 26 |     warmup='linear',
 27 |     warmup_iters=500,
 28 |     warmup_ratio=0.001,
 29 |     step=[170, 200])
 30 | total_epochs = 210
 31 | target_type = 'GaussianHeatmap'
 32 | channel_cfg = dict(
 33 |     num_output_channels=17,
 34 |     dataset_joints=17,
 35 |     dataset_channel=[
 36 |         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
 37 |     ],
 38 |     inference_channel=[
 39 |         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
 40 |     ])
 41 | 
 42 | # model settings
 43 | model = dict(
 44 |     type='TopDown',
 45 |     pretrained=None,
 46 |     backbone=dict(
 47 |         type='ViT',
 48 |         img_size=(256, 192),
 49 |         patch_size=16,
 50 |         embed_dim=1024,
 51 |         depth=24,
 52 |         num_heads=16,
 53 |         ratio=1,
 54 |         use_checkpoint=False,
 55 |         mlp_ratio=4,
 56 |         qkv_bias=True,
 57 |         drop_path_rate=0.5,
 58 |     ),
 59 |     keypoint_head=dict(
 60 |         type='TopdownHeatmapSimpleHead',
 61 |         in_channels=1024,
 62 |         num_deconv_layers=0,
 63 |         num_deconv_filters=[],
 64 |         num_deconv_kernels=[],
 65 |         upsample=4,
 66 |         extra=dict(final_conv_kernel=3, ),
 67 |         out_channels=channel_cfg['num_output_channels'],
 68 |         loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
 69 |     train_cfg=dict(),
 70 |     test_cfg=dict(
 71 |         flip_test=True,
 72 |         post_process='default',
 73 |         shift_heatmap=False,
 74 |         target_type=target_type,
 75 |         modulate_kernel=11,
 76 |         use_udp=True))
 77 | 
 78 | data_cfg = dict(
 79 |     image_size=[192, 256],
 80 |     heatmap_size=[48, 64],
 81 |     num_output_channels=channel_cfg['num_output_channels'],
 82 |     num_joints=channel_cfg['dataset_joints'],
 83 |     dataset_channel=channel_cfg['dataset_channel'],
 84 |     inference_channel=channel_cfg['inference_channel'],
 85 |     soft_nms=False,
 86 |     nms_thr=1.0,
 87 |     oks_thr=0.9,
 88 |     vis_thr=0.2,
 89 |     use_gt_bbox=False,
 90 |     det_bbox_thr=0.0,
 91 |     bbox_file='data/coco/person_detection_results/'
 92 |     'COCO_val2017_detections_AP_H_56_person.json',
 93 | )
 94 | 
 95 | train_pipeline = [
 96 |     dict(type='LoadImageFromFile'),
 97 |     dict(type='TopDownRandomFlip', flip_prob=0.5),
 98 |     dict(
 99 |         type='TopDownHalfBodyTransform',
100 |         num_joints_half_body=8,
101 |         prob_half_body=0.3),
102 |     dict(
103 |         type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
104 |     dict(type='TopDownAffine', use_udp=True),
105 |     dict(type='ToTensor'),
106 |     dict(
107 |         type='NormalizeTensor',
108 |         mean=[0.485, 0.456, 0.406],
109 |         std=[0.229, 0.224, 0.225]),
110 |     dict(
111 |         type='TopDownGenerateTarget',
112 |         sigma=2,
113 |         encoding='UDP',
114 |         target_type=target_type),
115 |     dict(
116 |         type='Collect',
117 |         keys=['img', 'target', 'target_weight'],
118 |         meta_keys=[
119 |             'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
120 |             'rotation', 'bbox_score', 'flip_pairs'
121 |         ]),
122 | ]
123 | 
124 | val_pipeline = [
125 |     dict(type='LoadImageFromFile'),
126 |     dict(type='TopDownAffine', use_udp=True),
127 |     dict(type='ToTensor'),
128 |     dict(
129 |         type='NormalizeTensor',
130 |         mean=[0.485, 0.456, 0.406],
131 |         std=[0.229, 0.224, 0.225]),
132 |     dict(
133 |         type='Collect',
134 |         keys=['img'],
135 |         meta_keys=[
136 |             'image_file', 'center', 'scale', 'rotation', 'bbox_score',
137 |             'flip_pairs'
138 |         ]),
139 | ]
140 | 
141 | test_pipeline = val_pipeline
142 | 
143 | data_root = 'datasets/coco'
144 | data = dict(
145 |     samples_per_gpu=64,
146 |     workers_per_gpu=4,
147 |     val_dataloader=dict(samples_per_gpu=32),
148 |     test_dataloader=dict(samples_per_gpu=32),
149 |     train=dict(
150 |         type='TopDownCocoDataset',
151 |         ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
152 |         img_prefix=f'{data_root}/train2017/',
153 |         data_cfg=data_cfg,
154 |         pipeline=train_pipeline,
155 |         dataset_info={{_base_.dataset_info}}),
156 |     val=dict(
157 |         type='TopDownCocoDataset',
158 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
159 |         img_prefix=f'{data_root}/val2017/',
160 |         data_cfg=data_cfg,
161 |         pipeline=val_pipeline,
162 |         dataset_info={{_base_.dataset_info}}),
163 |     test=dict(
164 |         type='TopDownCocoDataset',
165 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
166 |         img_prefix=f'{data_root}/val2017/',
167 |         data_cfg=data_cfg,
168 |         pipeline=test_pipeline,
169 |         dataset_info={{_base_.dataset_info}}),
170 | )
171 | 
172 | 


--------------------------------------------------------------------------------
/datasets/HumanPoseEstimation.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data import Dataset
 2 | 
 3 | 
 4 | class HumanPoseEstimationDataset(Dataset):
 5 |     """
 6 |     HumanPoseEstimationDataset class.
 7 | 
 8 |     Generic class for HPE datasets.
 9 |     """
10 |     def __init__(self):
11 |         pass
12 | 
13 |     def __len__(self):
14 |         pass
15 | 
16 |     def __getitem__(self, item):
17 |         pass


--------------------------------------------------------------------------------
/examples/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaehyunnn/ViTPose_pytorch/1bd3cc3982d5b976622df54bcdc2106bb16e6d16/examples/.DS_Store


--------------------------------------------------------------------------------
/examples/img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaehyunnn/ViTPose_pytorch/1bd3cc3982d5b976622df54bcdc2106bb16e6d16/examples/img1.jpg


--------------------------------------------------------------------------------
/examples/img1_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaehyunnn/ViTPose_pytorch/1bd3cc3982d5b976622df54bcdc2106bb16e6d16/examples/img1_result.jpg


--------------------------------------------------------------------------------
/inference.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os.path as osp
 3 | 
 4 | import torch
 5 | from torch import Tensor
 6 | 
 7 | from pathlib import Path
 8 | import cv2
 9 | import numpy as np
10 | 
11 | 
12 | from time import time
13 | from PIL import Image
14 | from torchvision.transforms import transforms
15 | 
16 | from models.model import ViTPose
17 | from utils.visualization import draw_points_and_skeleton, joints_dict
18 | from utils.dist_util import get_dist_info, init_dist
19 | from utils.top_down_eval import keypoints_from_heatmaps
20 | 
21 | __all__ = ['inference']
22 |             
23 |             
24 | @torch.no_grad()
25 | def inference(img_path: Path, img_size: tuple[int, int],
26 |               model_cfg: dict, ckpt_path: Path, device: torch.device, save_result: bool=True) -> np.ndarray:
27 |     
28 |     # Prepare model
29 |     vit_pose = ViTPose(model_cfg)
30 |     
31 |    
32 |     ckpt = torch.load(ckpt_path)
33 |     if 'state_dict' in ckpt:
34 |         vit_pose.load_state_dict(ckpt['state_dict'])
35 |     else:
36 |         vit_pose.load_state_dict(ckpt)
37 |     vit_pose.to(device)
38 |     print(f">>> Model loaded: {ckpt_path}")
39 |     
40 |     # Prepare input data
41 |     img = Image.open(img_path)
42 |     org_w, org_h = img.size
43 |     print(f">>> Original image size: {org_h} X {org_w} (height X width)")
44 |     print(f">>> Resized image size: {img_size[1]} X {img_size[0]} (height X width)")
45 |     print(f">>> Scale change: {org_h/img_size[1]}, {org_w/img_size[0]}")
46 |     img_tensor = transforms.Compose (
47 |         [transforms.Resize((img_size[1], img_size[0])),
48 |          transforms.ToTensor()]
49 |     )(img).unsqueeze(0).to(device)
50 |     
51 |     
52 |     # Feed to model
53 |     tic = time()
54 |     heatmaps = vit_pose(img_tensor).detach().cpu().numpy() # N, 17, h/4, w/4
55 |     elapsed_time = time()-tic
56 |     print(f">>> Output size: {heatmaps.shape} ---> {elapsed_time:.4f} sec. elapsed [{elapsed_time**-1: .1f} fps]\n")    
57 |     
58 |     # points = heatmap2coords(heatmaps=heatmaps, original_resolution=(org_h, org_w))
59 |     points, prob = keypoints_from_heatmaps(heatmaps=heatmaps, center=np.array([[org_w//2, org_h//2]]), scale=np.array([[org_w, org_h]]),
60 |                                            unbiased=True, use_udp=True)
61 |     points = np.concatenate([points[:, :, ::-1], prob], axis=2)
62 |     
63 |     # Visualization 
64 |     if save_result:
65 |         for pid, point in enumerate(points):
66 |             img = np.array(img)[:, :, ::-1] # RGB to BGR for cv2 modules
67 |             img = draw_points_and_skeleton(img.copy(), point, joints_dict()['coco']['skeleton'], person_index=pid,
68 |                                            points_color_palette='gist_rainbow', skeleton_color_palette='jet',
69 |                                            points_palette_samples=10, confidence_threshold=0.4)
70 |             save_name = img_path.replace(".jpg", "_result.jpg")
71 |             cv2.imwrite(save_name, img)
72 |     
73 |     return points
74 |     
75 | 
76 | if __name__ == "__main__":
77 |     from configs.ViTPose_base_coco_256x192 import model as model_cfg
78 |     from configs.ViTPose_base_coco_256x192 import data_cfg
79 |     
80 |     parser = argparse.ArgumentParser()
81 |     parser.add_argument('--image-path', nargs='+', type=str, default='examples/sample.jpg', help='image path(s)')
82 |     args = parser.parse_args()
83 |     
84 |     CUR_DIR = osp.dirname(__file__)
85 |     # CKPT_PATH = f"{CUR_DIR}/vitpose-b-multi-coco.pth"
86 |     CKPT_PATH = "/home/jaehyun/workspace/PoseEstimation/ViTPose_pytorch/runs/train/002/epoch010.pth"
87 |     
88 |     img_size = data_cfg['image_size']
89 |     if type(args.image_path) != list:
90 |          args.image_path = [args.image_path]
91 |     for img_path in args.image_path:
92 |         print(img_path)
93 |         keypoints = inference(img_path=img_path, img_size=img_size, model_cfg=model_cfg, ckpt_path=CKPT_PATH, 
94 |                               device=torch.device("cuda") if torch.cuda.is_available() else torch.device('cpu'),
95 |                               save_result=True)


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os.path as osp
3 | 
4 | sys.path.append(osp.dirname(osp.dirname(__file__)))
5 | 
6 | from utils.util import load_checkpoint, resize, constant_init, normal_init
7 | from utils.top_down_eval import keypoints_from_heatmaps, pose_pck_accuracy
8 | from utils.post_processing import *


--------------------------------------------------------------------------------
/models/head/topdown_heatmap_base_head.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | from abc import ABCMeta, abstractmethod
  3 | 
  4 | import numpy as np
  5 | import torch.nn as nn
  6 | 
  7 | from .. import keypoints_from_heatmaps
  8 | 
  9 | 
 10 | class TopdownHeatmapBaseHead(nn.Module):
 11 |     """Base class for top-down heatmap heads.
 12 | 
 13 |     All top-down heatmap heads should subclass it.
 14 |     All subclass should overwrite:
 15 | 
 16 |     Methods:`get_loss`, supporting to calculate loss.
 17 |     Methods:`get_accuracy`, supporting to calculate accuracy.
 18 |     Methods:`forward`, supporting to forward model.
 19 |     Methods:`inference_model`, supporting to inference model.
 20 |     """
 21 | 
 22 |     __metaclass__ = ABCMeta
 23 | 
 24 |     @abstractmethod
 25 |     def get_loss(self, **kwargs):
 26 |         """Gets the loss."""
 27 | 
 28 |     @abstractmethod
 29 |     def get_accuracy(self, **kwargs):
 30 |         """Gets the accuracy."""
 31 | 
 32 |     @abstractmethod
 33 |     def forward(self, **kwargs):
 34 |         """Forward function."""
 35 | 
 36 |     @abstractmethod
 37 |     def inference_model(self, **kwargs):
 38 |         """Inference function."""
 39 | 
 40 |     def decode(self, img_metas, output, **kwargs):
 41 |         """Decode keypoints from heatmaps.
 42 | 
 43 |         Args:
 44 |             img_metas (list(dict)): Information about data augmentation
 45 |                 By default this includes:
 46 | 
 47 |                 - "image_file: path to the image file
 48 |                 - "center": center of the bbox
 49 |                 - "scale": scale of the bbox
 50 |                 - "rotation": rotation of the bbox
 51 |                 - "bbox_score": score of bbox
 52 |             output (np.ndarray[N, K, H, W]): model predicted heatmaps.
 53 |         """
 54 |         batch_size = len(img_metas)
 55 | 
 56 |         if 'bbox_id' in img_metas[0]:
 57 |             bbox_ids = []
 58 |         else:
 59 |             bbox_ids = None
 60 | 
 61 |         c = np.zeros((batch_size, 2), dtype=np.float32)
 62 |         s = np.zeros((batch_size, 2), dtype=np.float32)
 63 |         image_paths = []
 64 |         score = np.ones(batch_size)
 65 |         for i in range(batch_size):
 66 |             c[i, :] = img_metas[i]['center']
 67 |             s[i, :] = img_metas[i]['scale']
 68 |             image_paths.append(img_metas[i]['image_file'])
 69 | 
 70 |             if 'bbox_score' in img_metas[i]:
 71 |                 score[i] = np.array(img_metas[i]['bbox_score']).reshape(-1)
 72 |             if bbox_ids is not None:
 73 |                 bbox_ids.append(img_metas[i]['bbox_id'])
 74 | 
 75 |         preds, maxvals = keypoints_from_heatmaps(
 76 |             output,
 77 |             c,
 78 |             s,
 79 |             unbiased=self.test_cfg.get('unbiased_decoding', False),
 80 |             post_process=self.test_cfg.get('post_process', 'default'),
 81 |             kernel=self.test_cfg.get('modulate_kernel', 11),
 82 |             valid_radius_factor=self.test_cfg.get('valid_radius_factor',
 83 |                                                   0.0546875),
 84 |             use_udp=self.test_cfg.get('use_udp', False),
 85 |             target_type=self.test_cfg.get('target_type', 'GaussianHeatmap'))
 86 | 
 87 |         all_preds = np.zeros((batch_size, preds.shape[1], 3), dtype=np.float32)
 88 |         all_boxes = np.zeros((batch_size, 6), dtype=np.float32)
 89 |         all_preds[:, :, 0:2] = preds[:, :, 0:2]
 90 |         all_preds[:, :, 2:3] = maxvals
 91 |         all_boxes[:, 0:2] = c[:, 0:2]
 92 |         all_boxes[:, 2:4] = s[:, 0:2]
 93 |         all_boxes[:, 4] = np.prod(s * 200.0, axis=1)
 94 |         all_boxes[:, 5] = score
 95 | 
 96 |         result = {}
 97 | 
 98 |         result['preds'] = all_preds
 99 |         result['boxes'] = all_boxes
100 |         result['image_paths'] = image_paths
101 |         result['bbox_ids'] = bbox_ids
102 | 
103 |         return result
104 | 
105 |     @staticmethod
106 |     def _get_deconv_cfg(deconv_kernel):
107 |         """Get configurations for deconv layers."""
108 |         if deconv_kernel == 4:
109 |             padding = 1
110 |             output_padding = 0
111 |         elif deconv_kernel == 3:
112 |             padding = 1
113 |             output_padding = 1
114 |         elif deconv_kernel == 2:
115 |             padding = 0
116 |             output_padding = 0
117 |         else:
118 |             raise ValueError(f'Not supported num_kernels ({deconv_kernel}).')
119 | 
120 |         return deconv_kernel, padding, output_padding
121 | 


--------------------------------------------------------------------------------
/models/head/topdown_heatmap_simple_head.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import torch
  3 | import torch.nn as nn
  4 | from .. import constant_init, normal_init
  5 | 
  6 | from .. import pose_pck_accuracy, flip_back, resize
  7 | import torch.nn.functional as F
  8 | from .topdown_heatmap_base_head import TopdownHeatmapBaseHead
  9 | 
 10 | 
 11 | class TopdownHeatmapSimpleHead(TopdownHeatmapBaseHead):
 12 |     """Top-down heatmap simple head. paper ref: Bin Xiao et al. ``Simple
 13 |     Baselines for Human Pose Estimation and Tracking``.
 14 | 
 15 |     TopdownHeatmapSimpleHead is consisted of (>=0) number of deconv layers
 16 |     and a simple conv2d layer.
 17 | 
 18 |     Args:
 19 |         in_channels (int): Number of input channels
 20 |         out_channels (int): Number of output channels
 21 |         num_deconv_layers (int): Number of deconv layers.
 22 |             num_deconv_layers should >= 0. Note that 0 means
 23 |             no deconv layers.
 24 |         num_deconv_filters (list|tuple): Number of filters.
 25 |             If num_deconv_layers > 0, the length of
 26 |         num_deconv_kernels (list|tuple): Kernel sizes.
 27 |         in_index (int|Sequence[int]): Input feature index. Default: 0
 28 |         input_transform (str|None): Transformation type of input features.
 29 |             Options: 'resize_concat', 'multiple_select', None.
 30 |             Default: None.
 31 | 
 32 |             - 'resize_concat': Multiple feature maps will be resized to the
 33 |                 same size as the first one and then concat together.
 34 |                 Usually used in FCN head of HRNet.
 35 |             - 'multiple_select': Multiple feature maps will be bundle into
 36 |                 a list and passed into decode head.
 37 |             - None: Only one select feature map is allowed.
 38 |         align_corners (bool): align_corners argument of F.interpolate.
 39 |             Default: False.
 40 |         loss_keypoint (dict): Config for keypoint loss. Default: None.
 41 |     """
 42 | 
 43 |     def __init__(self,
 44 |                  in_channels,
 45 |                  out_channels,
 46 |                  num_deconv_layers=3,
 47 |                  num_deconv_filters=(256, 256, 256),
 48 |                  num_deconv_kernels=(4, 4, 4),
 49 |                  extra=None,
 50 |                  in_index=0,
 51 |                  input_transform=None,
 52 |                  align_corners=False,
 53 |                  loss_keypoint=None,
 54 |                  train_cfg=None,
 55 |                  test_cfg=None,
 56 |                  upsample=0,):
 57 |         super().__init__()
 58 | 
 59 |         self.in_channels = in_channels
 60 |         self.loss = loss_keypoint
 61 |         self.upsample = upsample
 62 | 
 63 |         self.train_cfg = {} if train_cfg is None else train_cfg
 64 |         self.test_cfg = {} if test_cfg is None else test_cfg
 65 |         self.target_type = self.test_cfg.get('target_type', 'GaussianHeatmap')
 66 | 
 67 |         self._init_inputs(in_channels, in_index, input_transform)
 68 |         self.in_index = in_index
 69 |         self.align_corners = align_corners
 70 | 
 71 |         if extra is not None and not isinstance(extra, dict):
 72 |             raise TypeError('extra should be dict or None.')
 73 | 
 74 |         if num_deconv_layers > 0:
 75 |             self.deconv_layers = self._make_deconv_layer(
 76 |                 num_deconv_layers,
 77 |                 num_deconv_filters,
 78 |                 num_deconv_kernels,
 79 |             )
 80 |         elif num_deconv_layers == 0:
 81 |             self.deconv_layers = nn.Identity()
 82 |         else:
 83 |             raise ValueError(
 84 |                 f'num_deconv_layers ({num_deconv_layers}) should >= 0.')
 85 | 
 86 |         identity_final_layer = False
 87 |         if extra is not None and 'final_conv_kernel' in extra:
 88 |             assert extra['final_conv_kernel'] in [0, 1, 3]
 89 |             if extra['final_conv_kernel'] == 3:
 90 |                 padding = 1
 91 |             elif extra['final_conv_kernel'] == 1:
 92 |                 padding = 0
 93 |             else:
 94 |                 # 0 for Identity mapping.
 95 |                 identity_final_layer = True
 96 |             kernel_size = extra['final_conv_kernel']
 97 |         else:
 98 |             kernel_size = 1
 99 |             padding = 0
100 | 
101 |         if identity_final_layer:
102 |             self.final_layer = nn.Identity()
103 |         else:
104 |             conv_channels = num_deconv_filters[
105 |                 -1] if num_deconv_layers > 0 else self.in_channels
106 | 
107 |             layers = []
108 |             if extra is not None:
109 |                 num_conv_layers = extra.get('num_conv_layers', 0)
110 |                 num_conv_kernels = extra.get('num_conv_kernels',
111 |                                              [1] * num_conv_layers)
112 | 
113 |                 for i in range(num_conv_layers):
114 |                     layers.append(
115 |                         nn.Conv2d(in_channels=conv_channels,
116 |                                   out_channels=conv_channels,
117 |                                   kernel_size=num_conv_kernels[i],
118 |                                   stride=1,
119 |                                   padding=(num_conv_kernels[i] - 1) // 2)
120 |                         )
121 |                     layers.append(nn.BatchNorm2d(conv_channels))
122 |                     layers.append(nn.ReLU(inplace=True))
123 | 
124 |             layers.append(
125 |                 nn.Conv2d(in_channels=conv_channels,
126 |                           out_channels=out_channels,
127 |                           kernel_size=kernel_size,
128 |                           stride=1,
129 |                           padding=padding)
130 |                 )
131 | 
132 |             if len(layers) > 1:
133 |                 self.final_layer = nn.Sequential(*layers)
134 |             else:
135 |                 self.final_layer = layers[0]
136 | 
137 |     def get_loss(self, output, target, target_weight):
138 |         """Calculate top-down keypoint loss.
139 | 
140 |         Note:
141 |             - batch_size: N
142 |             - num_keypoints: K
143 |             - heatmaps height: H
144 |             - heatmaps weight: W
145 | 
146 |         Args:
147 |             output (torch.Tensor[N,K,H,W]): Output heatmaps.
148 |             target (torch.Tensor[N,K,H,W]): Target heatmaps.
149 |             target_weight (torch.Tensor[N,K,1]):
150 |                 Weights across different joint types.
151 |         """
152 | 
153 |         losses = dict()
154 | 
155 |         assert not isinstance(self.loss, nn.Sequential)
156 |         assert target.dim() == 4 and target_weight.dim() == 3
157 |         losses['heatmap_loss'] = self.loss(output, target, target_weight)
158 | 
159 |         return losses
160 | 
161 |     def get_accuracy(self, output, target, target_weight):
162 |         """Calculate accuracy for top-down keypoint loss.
163 | 
164 |         Note:
165 |             - batch_size: N
166 |             - num_keypoints: K
167 |             - heatmaps height: H
168 |             - heatmaps weight: W
169 | 
170 |         Args:
171 |             output (torch.Tensor[N,K,H,W]): Output heatmaps.
172 |             target (torch.Tensor[N,K,H,W]): Target heatmaps.
173 |             target_weight (torch.Tensor[N,K,1]):
174 |                 Weights across different joint types.
175 |         """
176 | 
177 |         accuracy = dict()
178 | 
179 |         if self.target_type == 'GaussianHeatmap':
180 |             _, avg_acc, _ = pose_pck_accuracy(
181 |                 output.detach().cpu().numpy(),
182 |                 target.detach().cpu().numpy(),
183 |                 target_weight.detach().cpu().numpy().squeeze(-1) > 0)
184 |             accuracy['acc_pose'] = float(avg_acc)
185 | 
186 |         return accuracy
187 | 
188 |     def forward(self, x):
189 |         """Forward function."""
190 |         x = self._transform_inputs(x)
191 |         x = self.deconv_layers(x)
192 |         x = self.final_layer(x)
193 |         return x
194 | 
195 |     def inference_model(self, x, flip_pairs=None):
196 |         """Inference function.
197 | 
198 |         Returns:
199 |             output_heatmap (np.ndarray): Output heatmaps.
200 | 
201 |         Args:
202 |             x (torch.Tensor[N,K,H,W]): Input features.
203 |             flip_pairs (None | list[tuple]):
204 |                 Pairs of keypoints which are mirrored.
205 |         """
206 |         output = self.forward(x)
207 | 
208 |         if flip_pairs is not None:
209 |             output_heatmap = flip_back(
210 |                 output.detach().cpu().numpy(),
211 |                 flip_pairs,
212 |                 target_type=self.target_type)
213 |             # feature is not aligned, shift flipped heatmap for higher accuracy
214 |             if self.test_cfg.get('shift_heatmap', False):
215 |                 output_heatmap[:, :, :, 1:] = output_heatmap[:, :, :, :-1]
216 |         else:
217 |             output_heatmap = output.detach().cpu().numpy()
218 |         return output_heatmap
219 | 
220 |     def _init_inputs(self, in_channels, in_index, input_transform):
221 |         """Check and initialize input transforms.
222 | 
223 |         The in_channels, in_index and input_transform must match.
224 |         Specifically, when input_transform is None, only single feature map
225 |         will be selected. So in_channels and in_index must be of type int.
226 |         When input_transform is not None, in_channels and in_index must be
227 |         list or tuple, with the same length.
228 | 
229 |         Args:
230 |             in_channels (int|Sequence[int]): Input channels.
231 |             in_index (int|Sequence[int]): Input feature index.
232 |             input_transform (str|None): Transformation type of input features.
233 |                 Options: 'resize_concat', 'multiple_select', None.
234 | 
235 |                 - 'resize_concat': Multiple feature maps will be resize to the
236 |                     same size as first one and than concat together.
237 |                     Usually used in FCN head of HRNet.
238 |                 - 'multiple_select': Multiple feature maps will be bundle into
239 |                     a list and passed into decode head.
240 |                 - None: Only one select feature map is allowed.
241 |         """
242 | 
243 |         if input_transform is not None:
244 |             assert input_transform in ['resize_concat', 'multiple_select']
245 |         self.input_transform = input_transform
246 |         self.in_index = in_index
247 |         if input_transform is not None:
248 |             assert isinstance(in_channels, (list, tuple))
249 |             assert isinstance(in_index, (list, tuple))
250 |             assert len(in_channels) == len(in_index)
251 |             if input_transform == 'resize_concat':
252 |                 self.in_channels = sum(in_channels)
253 |             else:
254 |                 self.in_channels = in_channels
255 |         else:
256 |             assert isinstance(in_channels, int)
257 |             assert isinstance(in_index, int)
258 |             self.in_channels = in_channels
259 | 
260 |     def _transform_inputs(self, inputs):
261 |         """Transform inputs for decoder.
262 | 
263 |         Args:
264 |             inputs (list[Tensor] | Tensor): multi-level img features.
265 | 
266 |         Returns:
267 |             Tensor: The transformed inputs
268 |         """
269 |         if not isinstance(inputs, list):
270 |             if not isinstance(inputs, list):
271 |                 if self.upsample > 0:
272 |                     inputs = resize(
273 |                         input=F.relu(inputs),
274 |                         scale_factor=self.upsample,
275 |                         mode='bilinear',
276 |                         align_corners=self.align_corners
277 |                         )
278 |             return inputs
279 | 
280 |         if self.input_transform == 'resize_concat':
281 |             inputs = [inputs[i] for i in self.in_index]
282 |             upsampled_inputs = [
283 |                 resize(
284 |                     input=x,
285 |                     size=inputs[0].shape[2:],
286 |                     mode='bilinear',
287 |                     align_corners=self.align_corners) for x in inputs
288 |             ]
289 |             inputs = torch.cat(upsampled_inputs, dim=1)
290 |         elif self.input_transform == 'multiple_select':
291 |             inputs = [inputs[i] for i in self.in_index]
292 |         else:
293 |             inputs = inputs[self.in_index]
294 | 
295 |         return inputs
296 | 
297 |     def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
298 |         """Make deconv layers."""
299 |         if num_layers != len(num_filters):
300 |             error_msg = f'num_layers({num_layers}) ' \
301 |                         f'!= length of num_filters({len(num_filters)})'
302 |             raise ValueError(error_msg)
303 |         if num_layers != len(num_kernels):
304 |             error_msg = f'num_layers({num_layers}) ' \
305 |                         f'!= length of num_kernels({len(num_kernels)})'
306 |             raise ValueError(error_msg)
307 | 
308 |         layers = []
309 |         for i in range(num_layers):
310 |             kernel, padding, output_padding = \
311 |                 self._get_deconv_cfg(num_kernels[i])
312 | 
313 |             planes = num_filters[i]
314 |             layers.append(
315 |                 nn.ConvTranspose2d(in_channels=self.in_channels,
316 |                                    out_channels=planes,
317 |                                    kernel_size=kernel,
318 |                                    stride=2,
319 |                                    padding=padding,
320 |                                    output_padding=output_padding,
321 |                                    bias=False)
322 |                 )
323 |             layers.append(nn.BatchNorm2d(planes))
324 |             layers.append(nn.ReLU(inplace=True))
325 |             self.in_channels = planes
326 | 
327 |         return nn.Sequential(*layers)
328 | 
329 |     def init_weights(self):
330 |         """Initialize model weights."""
331 |         for _, m in self.deconv_layers.named_modules():
332 |             if isinstance(m, nn.ConvTranspose2d):
333 |                 normal_init(m, std=0.001)
334 |             elif isinstance(m, nn.BatchNorm2d):
335 |                 constant_init(m, 1)
336 |         for m in self.final_layer.modules():
337 |             if isinstance(m, nn.Conv2d):
338 |                 normal_init(m, std=0.001, bias=0)
339 |             elif isinstance(m, nn.BatchNorm2d):
340 |                 constant_init(m, 1)
341 | 


--------------------------------------------------------------------------------
/models/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .classfication_loss import BCELoss
 3 | from .heatmap_loss import AdaptiveWingLoss
 4 | from .mesh_loss import GANLoss, MeshLoss
 5 | from .mse_loss import JointsMSELoss, JointsOHKMMSELoss
 6 | from .multi_loss_factory import AELoss, HeatmapLoss, MultiLossFactory
 7 | from .regression_loss import (BoneLoss, L1Loss, MPJPELoss, MSELoss,
 8 |                               SemiSupervisionLoss, SmoothL1Loss, SoftWingLoss,
 9 |                               WingLoss)
10 | 
11 | __all__ = [
12 |     'JointsMSELoss', 'JointsOHKMMSELoss', 'HeatmapLoss', 'AELoss',
13 |     'MultiLossFactory', 'MeshLoss', 'GANLoss', 'SmoothL1Loss', 'WingLoss',
14 |     'MPJPELoss', 'MSELoss', 'L1Loss', 'BCELoss', 'BoneLoss',
15 |     'SemiSupervisionLoss', 'SoftWingLoss', 'AdaptiveWingLoss'
16 | ]
17 | 


--------------------------------------------------------------------------------
/models/losses/classfication_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | __all__ = ['BCELoss']
 7 | 
 8 | 
 9 | class BCELoss(nn.Module):
10 |     """Binary Cross Entropy loss."""
11 | 
12 |     def __init__(self, use_target_weight=False, loss_weight=1.):
13 |         super().__init__()
14 |         self.criterion = F.binary_cross_entropy
15 |         self.use_target_weight = use_target_weight
16 |         self.loss_weight = loss_weight
17 | 
18 |     def forward(self, output, target, target_weight=None):
19 |         """Forward function.
20 | 
21 |         Note:
22 |             - batch_size: N
23 |             - num_labels: K
24 | 
25 |         Args:
26 |             output (torch.Tensor[N, K]): Output classification.
27 |             target (torch.Tensor[N, K]): Target classification.
28 |             target_weight (torch.Tensor[N, K] or torch.Tensor[N]):
29 |                 Weights across different labels.
30 |         """
31 | 
32 |         if self.use_target_weight:
33 |             assert target_weight is not None
34 |             loss = self.criterion(output, target, reduction='none')
35 |             if target_weight.dim() == 1:
36 |                 target_weight = target_weight[:, None]
37 |             loss = (loss * target_weight).mean()
38 |         else:
39 |             loss = self.criterion(output, target)
40 | 
41 |         return loss * self.loss_weight
42 | 


--------------------------------------------------------------------------------
/models/losses/heatmap_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | 
 6 | class AdaptiveWingLoss(nn.Module):
 7 |     """Adaptive wing loss. paper ref: 'Adaptive Wing Loss for Robust Face
 8 |     Alignment via Heatmap Regression' Wang et al. ICCV'2019.
 9 | 
10 |     Args:
11 |         alpha (float), omega (float), epsilon (float), theta (float)
12 |             are hyper-parameters.
13 |         use_target_weight (bool): Option to use weighted MSE loss.
14 |             Different joint types may have different target weights.
15 |         loss_weight (float): Weight of the loss. Default: 1.0.
16 |     """
17 | 
18 |     def __init__(self,
19 |                  alpha=2.1,
20 |                  omega=14,
21 |                  epsilon=1,
22 |                  theta=0.5,
23 |                  use_target_weight=False,
24 |                  loss_weight=1.):
25 |         super().__init__()
26 |         self.alpha = float(alpha)
27 |         self.omega = float(omega)
28 |         self.epsilon = float(epsilon)
29 |         self.theta = float(theta)
30 |         self.use_target_weight = use_target_weight
31 |         self.loss_weight = loss_weight
32 | 
33 |     def criterion(self, pred, target):
34 |         """Criterion of wingloss.
35 | 
36 |         Note:
37 |             batch_size: N
38 |             num_keypoints: K
39 | 
40 |         Args:
41 |             pred (torch.Tensor[NxKxHxW]): Predicted heatmaps.
42 |             target (torch.Tensor[NxKxHxW]): Target heatmaps.
43 |         """
44 |         H, W = pred.shape[2:4]
45 |         delta = (target - pred).abs()
46 | 
47 |         A = self.omega * (
48 |             1 / (1 + torch.pow(self.theta / self.epsilon, self.alpha - target))
49 |         ) * (self.alpha - target) * (torch.pow(
50 |             self.theta / self.epsilon,
51 |             self.alpha - target - 1)) * (1 / self.epsilon)
52 |         C = self.theta * A - self.omega * torch.log(
53 |             1 + torch.pow(self.theta / self.epsilon, self.alpha - target))
54 | 
55 |         losses = torch.where(
56 |             delta < self.theta,
57 |             self.omega *
58 |             torch.log(1 +
59 |                       torch.pow(delta / self.epsilon, self.alpha - target)),
60 |             A * delta - C)
61 | 
62 |         return torch.mean(losses)
63 | 
64 |     def forward(self, output, target, target_weight):
65 |         """Forward function.
66 | 
67 |         Note:
68 |             batch_size: N
69 |             num_keypoints: K
70 | 
71 |         Args:
72 |             output (torch.Tensor[NxKxHxW]): Output heatmaps.
73 |             target (torch.Tensor[NxKxHxW]): Target heatmaps.
74 |             target_weight (torch.Tensor[NxKx1]):
75 |                 Weights across different joint types.
76 |         """
77 |         if self.use_target_weight:
78 |             loss = self.criterion(output * target_weight.unsqueeze(-1),
79 |                                   target * target_weight.unsqueeze(-1))
80 |         else:
81 |             loss = self.criterion(output, target)
82 | 
83 |         return loss * self.loss_weight
84 | 


--------------------------------------------------------------------------------
/models/losses/mse_loss.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | 
  6 | __all__ = ['JointsMSELoss', 'JointsOHKMMSELoss',]
  7 | 
  8 | 
  9 | class JointsMSELoss(nn.Module):
 10 |     """MSE loss for heatmaps.
 11 | 
 12 |     Args:
 13 |         use_target_weight (bool): Option to use weighted MSE loss.
 14 |             Different joint types may have different target weights.
 15 |         loss_weight (float): Weight of the loss. Default: 1.0.
 16 |     """
 17 | 
 18 |     def __init__(self, use_target_weight=False, loss_weight=1.):
 19 |         super().__init__()
 20 |         self.criterion = nn.MSELoss()
 21 |         self.use_target_weight = use_target_weight
 22 |         self.loss_weight = loss_weight
 23 | 
 24 |     def forward(self, output, target, target_weight):
 25 |         """Forward function."""
 26 |         batch_size = output.size(0)
 27 |         num_joints = output.size(1)
 28 | 
 29 |         heatmaps_pred = output.reshape(
 30 |             (batch_size, num_joints, -1)).split(1, 1)
 31 |         heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1)
 32 | 
 33 |         loss = 0.
 34 | 
 35 |         for idx in range(num_joints):
 36 |             heatmap_pred = heatmaps_pred[idx].squeeze(1)
 37 |             heatmap_gt = heatmaps_gt[idx].squeeze(1)
 38 |             if self.use_target_weight:
 39 |                 loss += self.criterion(heatmap_pred * target_weight[:, idx],
 40 |                                        heatmap_gt * target_weight[:, idx])
 41 |             else:
 42 |                 loss += self.criterion(heatmap_pred, heatmap_gt)
 43 | 
 44 |         return loss / num_joints * self.loss_weight
 45 | 
 46 | 
 47 | class CombinedTargetMSELoss(nn.Module):
 48 |     """MSE loss for combined target.
 49 |         CombinedTarget: The combination of classification target
 50 |         (response map) and regression target (offset map).
 51 |         Paper ref: Huang et al. The Devil is in the Details: Delving into
 52 |         Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
 53 | 
 54 |     Args:
 55 |         use_target_weight (bool): Option to use weighted MSE loss.
 56 |             Different joint types may have different target weights.
 57 |         loss_weight (float): Weight of the loss. Default: 1.0.
 58 |     """
 59 | 
 60 |     def __init__(self, use_target_weight, loss_weight=1.):
 61 |         super().__init__()
 62 |         self.criterion = nn.MSELoss(reduction='mean')
 63 |         self.use_target_weight = use_target_weight
 64 |         self.loss_weight = loss_weight
 65 | 
 66 |     def forward(self, output, target, target_weight):
 67 |         batch_size = output.size(0)
 68 |         num_channels = output.size(1)
 69 |         heatmaps_pred = output.reshape(
 70 |             (batch_size, num_channels, -1)).split(1, 1)
 71 |         heatmaps_gt = target.reshape(
 72 |             (batch_size, num_channels, -1)).split(1, 1)
 73 |         loss = 0.
 74 |         num_joints = num_channels // 3
 75 |         for idx in range(num_joints):
 76 |             heatmap_pred = heatmaps_pred[idx * 3].squeeze()
 77 |             heatmap_gt = heatmaps_gt[idx * 3].squeeze()
 78 |             offset_x_pred = heatmaps_pred[idx * 3 + 1].squeeze()
 79 |             offset_x_gt = heatmaps_gt[idx * 3 + 1].squeeze()
 80 |             offset_y_pred = heatmaps_pred[idx * 3 + 2].squeeze()
 81 |             offset_y_gt = heatmaps_gt[idx * 3 + 2].squeeze()
 82 |             if self.use_target_weight:
 83 |                 heatmap_pred = heatmap_pred * target_weight[:, idx]
 84 |                 heatmap_gt = heatmap_gt * target_weight[:, idx]
 85 |             # classification loss
 86 |             loss += 0.5 * self.criterion(heatmap_pred, heatmap_gt)
 87 |             # regression loss
 88 |             loss += 0.5 * self.criterion(heatmap_gt * offset_x_pred,
 89 |                                          heatmap_gt * offset_x_gt)
 90 |             loss += 0.5 * self.criterion(heatmap_gt * offset_y_pred,
 91 |                                          heatmap_gt * offset_y_gt)
 92 |         return loss / num_joints * self.loss_weight
 93 | 
 94 | 
 95 | class JointsOHKMMSELoss(nn.Module):
 96 |     """MSE loss with online hard keypoint mining.
 97 | 
 98 |     Args:
 99 |         use_target_weight (bool): Option to use weighted MSE loss.
100 |             Different joint types may have different target weights.
101 |         topk (int): Only top k joint losses are kept.
102 |         loss_weight (float): Weight of the loss. Default: 1.0.
103 |     """
104 | 
105 |     def __init__(self, use_target_weight=False, topk=8, loss_weight=1.):
106 |         super().__init__()
107 |         assert topk > 0
108 |         self.criterion = nn.MSELoss(reduction='none')
109 |         self.use_target_weight = use_target_weight
110 |         self.topk = topk
111 |         self.loss_weight = loss_weight
112 | 
113 |     def _ohkm(self, loss):
114 |         """Online hard keypoint mining."""
115 |         ohkm_loss = 0.
116 |         N = len(loss)
117 |         for i in range(N):
118 |             sub_loss = loss[i]
119 |             _, topk_idx = torch.topk(
120 |                 sub_loss, k=self.topk, dim=0, sorted=False)
121 |             tmp_loss = torch.gather(sub_loss, 0, topk_idx)
122 |             ohkm_loss += torch.sum(tmp_loss) / self.topk
123 |         ohkm_loss /= N
124 |         return ohkm_loss
125 | 
126 |     def forward(self, output, target, target_weight):
127 |         """Forward function."""
128 |         batch_size = output.size(0)
129 |         num_joints = output.size(1)
130 |         if num_joints < self.topk:
131 |             raise ValueError(f'topk ({self.topk}) should not '
132 |                              f'larger than num_joints ({num_joints}).')
133 |         heatmaps_pred = output.reshape(
134 |             (batch_size, num_joints, -1)).split(1, 1)
135 |         heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1)
136 | 
137 |         losses = []
138 |         for idx in range(num_joints):
139 |             heatmap_pred = heatmaps_pred[idx].squeeze(1)
140 |             heatmap_gt = heatmaps_gt[idx].squeeze(1)
141 |             if self.use_target_weight:
142 |                 losses.append(
143 |                     self.criterion(heatmap_pred * target_weight[:, idx],
144 |                                    heatmap_gt * target_weight[:, idx]))
145 |             else:
146 |                 losses.append(self.criterion(heatmap_pred, heatmap_gt))
147 | 
148 |         losses = [loss.mean(dim=1).unsqueeze(dim=1) for loss in losses]
149 |         losses = torch.cat(losses, dim=1)
150 | 
151 |         return self._ohkm(losses) * self.loss_weight
152 | 


--------------------------------------------------------------------------------
/models/losses/multi_loss_factory.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Adapted from https://github.com/HRNet/HigherHRNet-Human-Pose-Estimation
  3 | # Original licence: Copyright (c) Microsoft, under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | 
  9 | 
 10 | __all__ = ['HeatmapLoss', 'AELoss', 'MultiLossFactory']
 11 | 
 12 | 
 13 | def _make_input(t, requires_grad=False, device=torch.device('cpu')):
 14 |     """Make zero inputs for AE loss.
 15 | 
 16 |     Args:
 17 |         t (torch.Tensor): input
 18 |         requires_grad (bool): Option to use requires_grad.
 19 |         device: torch device
 20 | 
 21 |     Returns:
 22 |         torch.Tensor: zero input.
 23 |     """
 24 |     inp = torch.autograd.Variable(t, requires_grad=requires_grad)
 25 |     inp = inp.sum()
 26 |     inp = inp.to(device)
 27 |     return inp
 28 | 
 29 | 
 30 | class HeatmapLoss(nn.Module):
 31 |     """Accumulate the heatmap loss for each image in the batch.
 32 | 
 33 |     Args:
 34 |         supervise_empty (bool): Whether to supervise empty channels.
 35 |     """
 36 | 
 37 |     def __init__(self, supervise_empty=True):
 38 |         super().__init__()
 39 |         self.supervise_empty = supervise_empty
 40 | 
 41 |     def forward(self, pred, gt, mask):
 42 |         """Forward function.
 43 | 
 44 |         Note:
 45 |             - batch_size: N
 46 |             - heatmaps weight: W
 47 |             - heatmaps height: H
 48 |             - max_num_people: M
 49 |             - num_keypoints: K
 50 | 
 51 |         Args:
 52 |             pred (torch.Tensor[N,K,H,W]):heatmap of output.
 53 |             gt (torch.Tensor[N,K,H,W]): target heatmap.
 54 |             mask (torch.Tensor[N,H,W]): mask of target.
 55 |         """
 56 |         assert pred.size() == gt.size(
 57 |         ), f'pred.size() is {pred.size()}, gt.size() is {gt.size()}'
 58 | 
 59 |         if not self.supervise_empty:
 60 |             empty_mask = (gt.sum(dim=[2, 3], keepdim=True) > 0).float()
 61 |             loss = ((pred - gt)**2) * empty_mask.expand_as(
 62 |                 pred) * mask[:, None, :, :].expand_as(pred)
 63 |         else:
 64 |             loss = ((pred - gt)**2) * mask[:, None, :, :].expand_as(pred)
 65 |         loss = loss.mean(dim=3).mean(dim=2).mean(dim=1)
 66 |         return loss
 67 | 
 68 | 
 69 | class AELoss(nn.Module):
 70 |     """Associative Embedding loss.
 71 | 
 72 |     `Associative Embedding: End-to-End Learning for Joint Detection and
 73 |     Grouping <https://arxiv.org/abs/1611.05424v2>`_.
 74 |     """
 75 | 
 76 |     def __init__(self, loss_type):
 77 |         super().__init__()
 78 |         self.loss_type = loss_type
 79 | 
 80 |     def singleTagLoss(self, pred_tag, joints):
 81 |         """Associative embedding loss for one image.
 82 | 
 83 |         Note:
 84 |             - heatmaps weight: W
 85 |             - heatmaps height: H
 86 |             - max_num_people: M
 87 |             - num_keypoints: K
 88 | 
 89 |         Args:
 90 |             pred_tag (torch.Tensor[KxHxW,1]): tag of output for one image.
 91 |             joints (torch.Tensor[M,K,2]): joints information for one image.
 92 |         """
 93 |         tags = []
 94 |         pull = 0
 95 |         for joints_per_person in joints:
 96 |             tmp = []
 97 |             for joint in joints_per_person:
 98 |                 if joint[1] > 0:
 99 |                     tmp.append(pred_tag[joint[0]])
100 |             if len(tmp) == 0:
101 |                 continue
102 |             tmp = torch.stack(tmp)
103 |             tags.append(torch.mean(tmp, dim=0))
104 |             pull = pull + torch.mean((tmp - tags[-1].expand_as(tmp))**2)
105 | 
106 |         num_tags = len(tags)
107 |         if num_tags == 0:
108 |             return (
109 |                 _make_input(torch.zeros(1).float(), device=pred_tag.device),
110 |                 _make_input(torch.zeros(1).float(), device=pred_tag.device))
111 |         elif num_tags == 1:
112 |             return (_make_input(
113 |                 torch.zeros(1).float(), device=pred_tag.device), pull)
114 | 
115 |         tags = torch.stack(tags)
116 | 
117 |         size = (num_tags, num_tags)
118 |         A = tags.expand(*size)
119 |         B = A.permute(1, 0)
120 | 
121 |         diff = A - B
122 | 
123 |         if self.loss_type == 'exp':
124 |             diff = torch.pow(diff, 2)
125 |             push = torch.exp(-diff)
126 |             push = torch.sum(push) - num_tags
127 |         elif self.loss_type == 'max':
128 |             diff = 1 - torch.abs(diff)
129 |             push = torch.clamp(diff, min=0).sum() - num_tags
130 |         else:
131 |             raise ValueError('Unknown ae loss type')
132 | 
133 |         push_loss = push / ((num_tags - 1) * num_tags) * 0.5
134 |         pull_loss = pull / (num_tags)
135 | 
136 |         return push_loss, pull_loss
137 | 
138 |     def forward(self, tags, joints):
139 |         """Accumulate the tag loss for each image in the batch.
140 | 
141 |         Note:
142 |             - batch_size: N
143 |             - heatmaps weight: W
144 |             - heatmaps height: H
145 |             - max_num_people: M
146 |             - num_keypoints: K
147 | 
148 |         Args:
149 |             tags (torch.Tensor[N,KxHxW,1]): tag channels of output.
150 |             joints (torch.Tensor[N,M,K,2]): joints information.
151 |         """
152 |         pushes, pulls = [], []
153 |         joints = joints.cpu().data.numpy()
154 |         batch_size = tags.size(0)
155 |         for i in range(batch_size):
156 |             push, pull = self.singleTagLoss(tags[i], joints[i])
157 |             pushes.append(push)
158 |             pulls.append(pull)
159 |         return torch.stack(pushes), torch.stack(pulls)
160 | 
161 | 
162 | class MultiLossFactory(nn.Module):
163 |     """Loss for bottom-up models.
164 | 
165 |     Args:
166 |         num_joints (int): Number of keypoints.
167 |         num_stages (int): Number of stages.
168 |         ae_loss_type (str): Type of ae loss.
169 |         with_ae_loss (list[bool]): Use ae loss or not in multi-heatmap.
170 |         push_loss_factor (list[float]):
171 |             Parameter of push loss in multi-heatmap.
172 |         pull_loss_factor (list[float]):
173 |             Parameter of pull loss in multi-heatmap.
174 |         with_heatmap_loss (list[bool]):
175 |             Use heatmap loss or not in multi-heatmap.
176 |         heatmaps_loss_factor (list[float]):
177 |             Parameter of heatmap loss in multi-heatmap.
178 |         supervise_empty (bool): Whether to supervise empty channels.
179 |     """
180 | 
181 |     def __init__(self,
182 |                  num_joints,
183 |                  num_stages,
184 |                  ae_loss_type,
185 |                  with_ae_loss,
186 |                  push_loss_factor,
187 |                  pull_loss_factor,
188 |                  with_heatmaps_loss,
189 |                  heatmaps_loss_factor,
190 |                  supervise_empty=True):
191 |         super().__init__()
192 | 
193 |         assert isinstance(with_heatmaps_loss, (list, tuple)), \
194 |             'with_heatmaps_loss should be a list or tuple'
195 |         assert isinstance(heatmaps_loss_factor, (list, tuple)), \
196 |             'heatmaps_loss_factor should be a list or tuple'
197 |         assert isinstance(with_ae_loss, (list, tuple)), \
198 |             'with_ae_loss should be a list or tuple'
199 |         assert isinstance(push_loss_factor, (list, tuple)), \
200 |             'push_loss_factor should be a list or tuple'
201 |         assert isinstance(pull_loss_factor, (list, tuple)), \
202 |             'pull_loss_factor should be a list or tuple'
203 | 
204 |         self.num_joints = num_joints
205 |         self.num_stages = num_stages
206 |         self.ae_loss_type = ae_loss_type
207 |         self.with_ae_loss = with_ae_loss
208 |         self.push_loss_factor = push_loss_factor
209 |         self.pull_loss_factor = pull_loss_factor
210 |         self.with_heatmaps_loss = with_heatmaps_loss
211 |         self.heatmaps_loss_factor = heatmaps_loss_factor
212 | 
213 |         self.heatmaps_loss = \
214 |             nn.ModuleList(
215 |                 [
216 |                     HeatmapLoss(supervise_empty)
217 |                     if with_heatmaps_loss else None
218 |                     for with_heatmaps_loss in self.with_heatmaps_loss
219 |                 ]
220 |             )
221 | 
222 |         self.ae_loss = \
223 |             nn.ModuleList(
224 |                 [
225 |                     AELoss(self.ae_loss_type) if with_ae_loss else None
226 |                     for with_ae_loss in self.with_ae_loss
227 |                 ]
228 |             )
229 | 
230 |     def forward(self, outputs, heatmaps, masks, joints):
231 |         """Forward function to calculate losses.
232 | 
233 |         Note:
234 |             - batch_size: N
235 |             - heatmaps weight: W
236 |             - heatmaps height: H
237 |             - max_num_people: M
238 |             - num_keypoints: K
239 |             - output_channel: C C=2K if use ae loss else K
240 | 
241 |         Args:
242 |             outputs (list(torch.Tensor[N,C,H,W])): outputs of stages.
243 |             heatmaps (list(torch.Tensor[N,K,H,W])): target of heatmaps.
244 |             masks (list(torch.Tensor[N,H,W])): masks of heatmaps.
245 |             joints (list(torch.Tensor[N,M,K,2])): joints of ae loss.
246 |         """
247 |         heatmaps_losses = []
248 |         push_losses = []
249 |         pull_losses = []
250 |         for idx in range(len(outputs)):
251 |             offset_feat = 0
252 |             if self.heatmaps_loss[idx]:
253 |                 heatmaps_pred = outputs[idx][:, :self.num_joints]
254 |                 offset_feat = self.num_joints
255 |                 heatmaps_loss = self.heatmaps_loss[idx](heatmaps_pred,
256 |                                                         heatmaps[idx],
257 |                                                         masks[idx])
258 |                 heatmaps_loss = heatmaps_loss * self.heatmaps_loss_factor[idx]
259 |                 heatmaps_losses.append(heatmaps_loss)
260 |             else:
261 |                 heatmaps_losses.append(None)
262 | 
263 |             if self.ae_loss[idx]:
264 |                 tags_pred = outputs[idx][:, offset_feat:]
265 |                 batch_size = tags_pred.size()[0]
266 |                 tags_pred = tags_pred.contiguous().view(batch_size, -1, 1)
267 | 
268 |                 push_loss, pull_loss = self.ae_loss[idx](tags_pred,
269 |                                                          joints[idx])
270 |                 push_loss = push_loss * self.push_loss_factor[idx]
271 |                 pull_loss = pull_loss * self.pull_loss_factor[idx]
272 | 
273 |                 push_losses.append(push_loss)
274 |                 pull_losses.append(pull_loss)
275 |             else:
276 |                 push_losses.append(None)
277 |                 pull_losses.append(None)
278 | 
279 |         return heatmaps_losses, push_losses, pull_losses
280 | 


--------------------------------------------------------------------------------
/models/model.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from .backbone.vit import ViT
 4 | from .head.topdown_heatmap_simple_head import TopdownHeatmapSimpleHead
 5 | 
 6 | 
 7 | __all__ = ['ViTPose']
 8 | 
 9 | 
10 | class ViTPose(nn.Module):
11 |     def __init__(self, cfg: dict) -> None:
12 |         super(ViTPose, self).__init__()
13 |         
14 |         backbone_cfg = {k: v for k, v in cfg['backbone'].items() if k != 'type'}
15 |         head_cfg = {k: v for k, v in cfg['keypoint_head'].items() if k != 'type'}
16 |         
17 |         self.backbone = ViT(**backbone_cfg)
18 |         self.keypoint_head = TopdownHeatmapSimpleHead(**head_cfg)
19 |     
20 |     def forward_features(self, x):
21 |         return self.backbone(x)
22 |     
23 |     def forward(self, x):
24 |         return self.keypoint_head(self.backbone(x))


--------------------------------------------------------------------------------
/models/optimizer.py:
--------------------------------------------------------------------------------
 1 | import torch.optim as optim
 2 | 
 3 | class LayerDecayOptimizer:
 4 |     def __init__(self, optimizer, layerwise_decay_rate):
 5 |         self.optimizer = optimizer
 6 |         self.layerwise_decay_rate = layerwise_decay_rate
 7 |         self.param_groups = optimizer.param_groups
 8 |         
 9 |     def step(self, *args, **kwargs):
10 |         for i, group in enumerate(self.optimizer.param_groups):
11 |             group['lr'] *= self.layerwise_decay_rate[i]
12 |         self.optimizer.step(*args, **kwargs)
13 |         
14 |     def zero_grad(self, *args, **kwargs):
15 |         self.optimizer.zero_grad(*args, **kwargs)


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ffmpeg==1.4
2 | matplotlib==3.6.2
3 | munkres==1.1.4
4 | numpy==1.23.5
5 | opencv_python==4.6.0.66
6 | Pillow==9.3.0
7 | torch==1.9.0+cu111
8 | torchvision==0.10.0+cu111
9 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import copy
  4 | import os
  5 | import os.path as osp
  6 | import time
  7 | import warnings
  8 | import click
  9 | import yaml
 10 | 
 11 | from glob import glob
 12 | 
 13 | import torch
 14 | import torch.distributed as dist
 15 | 
 16 | from utils.util import init_random_seed, set_random_seed
 17 | from utils.dist_util import get_dist_info, init_dist
 18 | from utils.logging import get_root_logger
 19 | 
 20 | import configs.ViTPose_base_coco_256x192 as b_cfg
 21 | import configs.ViTPose_large_coco_256x192 as l_cfg
 22 | import configs.ViTPose_huge_coco_256x192 as h_cfg
 23 | 
 24 | from models.model import ViTPose
 25 | from datasets.COCO import COCODataset
 26 | from utils.train_valid_fn import train_model
 27 | 
 28 | CUR_PATH = osp.dirname(__file__)
 29 | 
 30 | @click.command()
 31 | @click.option('--config-path', type=click.Path(exists=True), default='config.yaml', required=True, help='train config file path')
 32 | @click.option('--model-name', type=str, default='b', required=True, help='[b: ViT-B, l: ViT-L, h: ViT-H]')
 33 | def main(config_path, model_name):
 34 |         
 35 |     cfg = {'b':b_cfg,
 36 |            'l':l_cfg,
 37 |            'h':h_cfg}.get(model_name.lower())
 38 |     # Load config.yaml
 39 |     with open(config_path, 'r') as f:
 40 |         cfg_yaml = yaml.load(f, Loader=yaml.SafeLoader)
 41 |         
 42 |     for k, v in cfg_yaml.items():
 43 |         if hasattr(cfg, k):
 44 |             raise ValueError(f"Already exsist {k} in config")
 45 |         else:
 46 |             cfg.__setattr__(k, v)
 47 | 
 48 |     # set cudnn_benchmark
 49 |     if cfg.cudnn_benchmark:
 50 |         torch.backends.cudnn.benchmark = True
 51 |     
 52 |     # Set work directory (session-level)
 53 |     if not hasattr(cfg, 'work_dir'):
 54 |         cfg.__setattr__('work_dir', f"{CUR_PATH}/runs/train")
 55 |         
 56 |     if not osp.exists(cfg.work_dir):
 57 |         os.makedirs(cfg.work_dir)
 58 |     session_list = sorted(glob(f"{cfg.work_dir}/*"))
 59 |     if len(session_list) == 0:
 60 |         session = 1
 61 |     else:
 62 |         session = int(os.path.basename(session_list[-1])) + 1
 63 |     session_dir = osp.join(cfg.work_dir, str(session).zfill(3))
 64 |     os.makedirs(session_dir)
 65 |     cfg.__setattr__('work_dir', session_dir)
 66 |         
 67 | 
 68 |     if cfg.autoscale_lr:
 69 |         # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
 70 |         cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8
 71 | 
 72 |     # init distributed env first, since logger depends on the dist info.
 73 |     if cfg.launcher == 'none':
 74 |         distributed = False
 75 |         if len(cfg.gpu_ids) > 1:
 76 |             warnings.warn(
 77 |                 f"We treat {cfg['gpu_ids']} as gpu-ids, and reset to "
 78 |                 f"{cfg['gpu_ids'][0:1]} as gpu-ids to avoid potential error in "
 79 |                 "non-distribute training time.")
 80 |             cfg.gpu_ids = cfg.gpu_ids[0:1]
 81 |     else:
 82 |         distributed = True
 83 |         init_dist(cfg.launcher, **cfg.dist_params)
 84 |         # re-set gpu_ids with distributed training mode
 85 |         _, world_size = get_dist_info()
 86 |         cfg.gpu_ids = range(world_size)
 87 | 
 88 |     # init the logger before other steps
 89 |     timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
 90 |     log_file = osp.join(session_dir, f'{timestamp}.log')
 91 |     logger = get_root_logger(log_file=log_file)
 92 | 
 93 |     # init the meta dict to record some important information such as
 94 |     # environment info and seed, which will be logged
 95 |     meta = dict()
 96 | 
 97 |     # log some basic info
 98 |     logger.info(f'Distributed training: {distributed}')
 99 | 
100 |     # set random seeds
101 |     seed = init_random_seed(cfg.seed)
102 |     logger.info(f"Set random seed to {seed}, "
103 |                 f"deterministic: {cfg.deterministic}")
104 |     set_random_seed(seed, deterministic=cfg.deterministic)
105 |     meta['seed'] = seed
106 | 
107 |     # Set model
108 |     model = ViTPose(cfg.model)
109 |     if cfg.resume_from:
110 |         model.load_state_dict(torch.load(cfg.resume_from)['state_dict'])
111 |     
112 |     # Set dataset
113 |     datasets_train = COCODataset(
114 |         root_path=cfg.data_root, 
115 |         data_version="train_custom",
116 |         is_train=True, 
117 |         use_gt_bboxes=True,
118 |         image_width=192, 
119 |         image_height=256,
120 |         scale=True, 
121 |         scale_factor=0.35, 
122 |         flip_prob=0.5, 
123 |         rotate_prob=0.5, 
124 |         rotation_factor=45., 
125 |         half_body_prob=0.3,
126 |         use_different_joints_weight=True, 
127 |         heatmap_sigma=3, 
128 |         soft_nms=False
129 |         )
130 |     
131 |     datasets_valid = COCODataset(
132 |         root_path=cfg.data_root, 
133 |         data_version="valid_custom",
134 |         is_train=False, 
135 |         use_gt_bboxes=True,
136 |         image_width=192, 
137 |         image_height=256,
138 |         scale=False, 
139 |         scale_factor=0.35, 
140 |         flip_prob=0.5, 
141 |         rotate_prob=0.5, 
142 |         rotation_factor=45., 
143 |         half_body_prob=0.3,
144 |         use_different_joints_weight=True, 
145 |         heatmap_sigma=3, 
146 |         soft_nms=False
147 |         )
148 | 
149 |     train_model(
150 |         model=model,
151 |         datasets_train=datasets_train,
152 |         datasets_valid=datasets_valid,
153 |         cfg=cfg,
154 |         distributed=distributed,
155 |         validate=cfg.validate,
156 |         timestamp=timestamp,
157 |         meta=meta
158 |         )
159 | 
160 | 
161 | if __name__ == '__main__':
162 |     main()
163 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .util import *
2 | from .top_down_eval import *
3 | from .post_processing import *
4 | from .visualization import *
5 | from .dist_util import *
6 | from .logging import *
7 | 


--------------------------------------------------------------------------------
/utils/dist_util.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  3 | import functools
  4 | import os
  5 | import socket
  6 | import subprocess
  7 | from collections import OrderedDict
  8 | from typing import Callable, List, Optional, Tuple
  9 | 
 10 | import torch
 11 | import torch.multiprocessing as mp
 12 | from torch import distributed as dist
 13 | from torch._utils import (_flatten_dense_tensors, _take_tensors,
 14 |                           _unflatten_dense_tensors)
 15 | 
 16 | 
 17 | def is_mps_available() -> bool:
 18 |     """Return True if mps devices exist.
 19 | 
 20 |     It's specialized for mac m1 chips and require torch version 1.12 or higher.
 21 |     """
 22 |     try:
 23 |         import torch
 24 |         return hasattr(torch.backends,
 25 |                        'mps') and torch.backends.mps.is_available()
 26 |     except Exception:
 27 |         return False
 28 |     
 29 | def _find_free_port() -> str:
 30 |     # Copied from https://github.com/facebookresearch/detectron2/blob/main/detectron2/engine/launch.py # noqa: E501
 31 |     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 32 |     # Binding to port 0 will cause the OS to find an available port for us
 33 |     sock.bind(('', 0))
 34 |     port = sock.getsockname()[1]
 35 |     sock.close()
 36 |     # NOTE: there is still a chance the port could be taken by other processes.
 37 |     return port
 38 | 
 39 | 
 40 | def _is_free_port(port: int) -> bool:
 41 |     ips = socket.gethostbyname_ex(socket.gethostname())[-1]
 42 |     ips.append('localhost')
 43 |     with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
 44 |         return all(s.connect_ex((ip, port)) != 0 for ip in ips)
 45 | 
 46 | 
 47 | def init_dist(launcher: str, backend: str = 'nccl', **kwargs) -> None:
 48 |     if mp.get_start_method(allow_none=True) is None:
 49 |         mp.set_start_method('spawn')
 50 |     if launcher == 'pytorch':
 51 |         _init_dist_pytorch(backend, **kwargs)
 52 |     elif launcher == 'mpi':
 53 |         _init_dist_mpi(backend, **kwargs)
 54 |     elif launcher == 'slurm':
 55 |         _init_dist_slurm(backend, **kwargs)
 56 |     else:
 57 |         raise ValueError(f'Invalid launcher type: {launcher}')
 58 | 
 59 | 
 60 | def _init_dist_pytorch(backend: str, **kwargs) -> None:
 61 |     # TODO: use local_rank instead of rank % num_gpus
 62 |     rank = int(os.environ['RANK'])
 63 |     num_gpus = torch.cuda.device_count()
 64 |     torch.cuda.set_device(rank % num_gpus)
 65 |     dist.init_process_group(backend=backend, **kwargs)
 66 | 
 67 | 
 68 | def _init_dist_mpi(backend: str, **kwargs) -> None:
 69 |     local_rank = int(os.environ['OMPI_COMM_WORLD_LOCAL_RANK'])
 70 |     torch.cuda.set_device(local_rank)
 71 |     if 'MASTER_PORT' not in os.environ:
 72 |         # 29500 is torch.distributed default port
 73 |         os.environ['MASTER_PORT'] = '29500'
 74 |     if 'MASTER_ADDR' not in os.environ:
 75 |         raise KeyError('The environment variable MASTER_ADDR is not set')
 76 |     os.environ['WORLD_SIZE'] = os.environ['OMPI_COMM_WORLD_SIZE']
 77 |     os.environ['RANK'] = os.environ['OMPI_COMM_WORLD_RANK']
 78 |     dist.init_process_group(backend=backend, **kwargs)
 79 | 
 80 | 
 81 | def _init_dist_slurm(backend: str, port: Optional[int] = None) -> None:
 82 |     """Initialize slurm distributed training environment.
 83 | 
 84 |     If argument ``port`` is not specified, then the master port will be system
 85 |     environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system
 86 |     environment variable, then a default port ``29500`` will be used.
 87 | 
 88 |     Args:
 89 |         backend (str): Backend of torch.distributed.
 90 |         port (int, optional): Master port. Defaults to None.
 91 |     """
 92 |     proc_id = int(os.environ['SLURM_PROCID'])
 93 |     ntasks = int(os.environ['SLURM_NTASKS'])
 94 |     node_list = os.environ['SLURM_NODELIST']
 95 |     num_gpus = torch.cuda.device_count()
 96 |     torch.cuda.set_device(proc_id % num_gpus)
 97 |     addr = subprocess.getoutput(
 98 |         f'scontrol show hostname {node_list} | head -n1')
 99 |     # specify master port
100 |     if port is not None:
101 |         os.environ['MASTER_PORT'] = str(port)
102 |     elif 'MASTER_PORT' in os.environ:
103 |         pass  # use MASTER_PORT in the environment variable
104 |     else:
105 |         # if torch.distributed default port(29500) is available
106 |         # then use it, else find a free port
107 |         if _is_free_port(29500):
108 |             os.environ['MASTER_PORT'] = '29500'
109 |         else:
110 |             os.environ['MASTER_PORT'] = str(_find_free_port())
111 |     # use MASTER_ADDR in the environment variable if it already exists
112 |     if 'MASTER_ADDR' not in os.environ:
113 |         os.environ['MASTER_ADDR'] = addr
114 |     os.environ['WORLD_SIZE'] = str(ntasks)
115 |     os.environ['LOCAL_RANK'] = str(proc_id % num_gpus)
116 |     os.environ['RANK'] = str(proc_id)
117 |     dist.init_process_group(backend=backend)
118 | 
119 | 
120 | def get_dist_info() -> Tuple[int, int]:
121 |     if dist.is_available() and dist.is_initialized():
122 |         rank = dist.get_rank()
123 |         world_size = dist.get_world_size()
124 |     else:
125 |         rank = 0
126 |         world_size = 1
127 |     return rank, world_size
128 | 
129 | 
130 | def master_only(func: Callable) -> Callable:
131 | 
132 |     @functools.wraps(func)
133 |     def wrapper(*args, **kwargs):
134 |         rank, _ = get_dist_info()
135 |         if rank == 0:
136 |             return func(*args, **kwargs)
137 | 
138 |     return wrapper
139 | 
140 | 
141 | def allreduce_params(params: List[torch.nn.Parameter],
142 |                      coalesce: bool = True,
143 |                      bucket_size_mb: int = -1) -> None:
144 |     """Allreduce parameters.
145 | 
146 |     Args:
147 |         params (list[torch.nn.Parameter]): List of parameters or buffers
148 |             of a model.
149 |         coalesce (bool, optional): Whether allreduce parameters as a whole.
150 |             Defaults to True.
151 |         bucket_size_mb (int, optional): Size of bucket, the unit is MB.
152 |             Defaults to -1.
153 |     """
154 |     _, world_size = get_dist_info()
155 |     if world_size == 1:
156 |         return
157 |     params = [param.data for param in params]
158 |     if coalesce:
159 |         _allreduce_coalesced(params, world_size, bucket_size_mb)
160 |     else:
161 |         for tensor in params:
162 |             dist.all_reduce(tensor.div_(world_size))
163 | 
164 | 
165 | def allreduce_grads(params: List[torch.nn.Parameter],
166 |                     coalesce: bool = True,
167 |                     bucket_size_mb: int = -1) -> None:
168 |     """Allreduce gradients.
169 | 
170 |     Args:
171 |         params (list[torch.nn.Parameter]): List of parameters of a model.
172 |         coalesce (bool, optional): Whether allreduce parameters as a whole.
173 |             Defaults to True.
174 |         bucket_size_mb (int, optional): Size of bucket, the unit is MB.
175 |             Defaults to -1.
176 |     """
177 |     grads = [
178 |         param.grad.data for param in params
179 |         if param.requires_grad and param.grad is not None
180 |     ]
181 |     _, world_size = get_dist_info()
182 |     if world_size == 1:
183 |         return
184 |     if coalesce:
185 |         _allreduce_coalesced(grads, world_size, bucket_size_mb)
186 |     else:
187 |         for tensor in grads:
188 |             dist.all_reduce(tensor.div_(world_size))
189 | 
190 | 
191 | def _allreduce_coalesced(tensors: torch.Tensor,
192 |                          world_size: int,
193 |                          bucket_size_mb: int = -1) -> None:
194 |     if bucket_size_mb > 0:
195 |         bucket_size_bytes = bucket_size_mb * 1024 * 1024
196 |         buckets = _take_tensors(tensors, bucket_size_bytes)
197 |     else:
198 |         buckets = OrderedDict()
199 |         for tensor in tensors:
200 |             tp = tensor.type()
201 |             if tp not in buckets:
202 |                 buckets[tp] = []
203 |             buckets[tp].append(tensor)
204 |         buckets = buckets.values()
205 | 
206 |     for bucket in buckets:
207 |         flat_tensors = _flatten_dense_tensors(bucket)
208 |         dist.all_reduce(flat_tensors)
209 |         flat_tensors.div_(world_size)
210 |         for tensor, synced in zip(
211 |                 bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
212 |             tensor.copy_(synced)
213 | 


--------------------------------------------------------------------------------
/utils/logging.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import logging
  3 | 
  4 | import torch.distributed as dist
  5 | 
  6 | logger_initialized: dict = {}
  7 | 
  8 | 
  9 | def get_logger(name, log_file=None, log_level=logging.INFO, file_mode='w'):
 10 |     """Initialize and get a logger by name.
 11 | 
 12 |     If the logger has not been initialized, this method will initialize the
 13 |     logger by adding one or two handlers, otherwise the initialized logger will
 14 |     be directly returned. During initialization, a StreamHandler will always be
 15 |     added. If `log_file` is specified and the process rank is 0, a FileHandler
 16 |     will also be added.
 17 | 
 18 |     Args:
 19 |         name (str): Logger name.
 20 |         log_file (str | None): The log filename. If specified, a FileHandler
 21 |             will be added to the logger.
 22 |         log_level (int): The logger level. Note that only the process of
 23 |             rank 0 is affected, and other processes will set the level to
 24 |             "Error" thus be silent most of the time.
 25 |         file_mode (str): The file mode used in opening log file.
 26 |             Defaults to 'w'.
 27 | 
 28 |     Returns:
 29 |         logging.Logger: The expected logger.
 30 |     """
 31 |     logger = logging.getLogger(name)
 32 |     if name in logger_initialized:
 33 |         return logger
 34 |     # handle hierarchical names
 35 |     # e.g., logger "a" is initialized, then logger "a.b" will skip the
 36 |     # initialization since it is a child of "a".
 37 |     for logger_name in logger_initialized:
 38 |         if name.startswith(logger_name):
 39 |             return logger
 40 | 
 41 |     # handle duplicate logs to the console
 42 |     # Starting in 1.8.0, PyTorch DDP attaches a StreamHandler <stderr> (NOTSET)
 43 |     # to the root logger. As logger.propagate is True by default, this root
 44 |     # level handler causes logging messages from rank>0 processes to
 45 |     # unexpectedly show up on the console, creating much unwanted clutter.
 46 |     # To fix this issue, we set the root logger's StreamHandler, if any, to log
 47 |     # at the ERROR level.
 48 |     for handler in logger.root.handlers:
 49 |         if type(handler) is logging.StreamHandler:
 50 |             handler.setLevel(logging.ERROR)
 51 | 
 52 |     stream_handler = logging.StreamHandler()
 53 |     handlers = [stream_handler]
 54 | 
 55 |     if dist.is_available() and dist.is_initialized():
 56 |         rank = dist.get_rank()
 57 |     else:
 58 |         rank = 0
 59 | 
 60 |     # only rank 0 will add a FileHandler
 61 |     if rank == 0 and log_file is not None:
 62 |         # Here, the default behaviour of the official logger is 'a'. Thus, we
 63 |         # provide an interface to change the file mode to the default
 64 |         # behaviour.
 65 |         file_handler = logging.FileHandler(log_file, file_mode)
 66 |         handlers.append(file_handler)
 67 | 
 68 |     formatter = logging.Formatter(
 69 |         '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 70 |     for handler in handlers:
 71 |         handler.setFormatter(formatter)
 72 |         handler.setLevel(log_level)
 73 |         logger.addHandler(handler)
 74 | 
 75 |     if rank == 0:
 76 |         logger.setLevel(log_level)
 77 |     else:
 78 |         logger.setLevel(logging.ERROR)
 79 | 
 80 |     logger_initialized[name] = True
 81 | 
 82 |     return logger
 83 | 
 84 | 
 85 | def print_log(msg, logger=None, level=logging.INFO):
 86 |     """Print a log message.
 87 | 
 88 |     Args:
 89 |         msg (str): The message to be logged.
 90 |         logger (logging.Logger | str | None): The logger to be used.
 91 |             Some special loggers are:
 92 | 
 93 |             - "silent": no message will be printed.
 94 |             - other str: the logger obtained with `get_root_logger(logger)`.
 95 |             - None: The `print()` method will be used to print log messages.
 96 |         level (int): Logging level. Only available when `logger` is a Logger
 97 |             object or "root".
 98 |     """
 99 |     if logger is None:
100 |         print(msg)
101 |     elif isinstance(logger, logging.Logger):
102 |         logger.log(level, msg)
103 |     elif logger == 'silent':
104 |         pass
105 |     elif isinstance(logger, str):
106 |         _logger = get_logger(logger)
107 |         _logger.log(level, msg)
108 |     else:
109 |         raise TypeError(
110 |             'logger should be either a logging.Logger object, str, '
111 |             f'"silent" or None, but got {type(logger)}')
112 |         
113 |         
114 | def get_root_logger(log_file=None, log_level=logging.INFO):
115 |     """Use `get_logger` method in mmcv to get the root logger.
116 | 
117 |     The logger will be initialized if it has not been initialized. By default a
118 |     StreamHandler will be added. If `log_file` is specified, a FileHandler will
119 |     also be added. The name of the root logger is the top-level package name,
120 |     e.g., "mmpose".
121 | 
122 |     Args:
123 |         log_file (str | None): The log filename. If specified, a FileHandler
124 |             will be added to the root logger.
125 |         log_level (int): The root logger level. Note that only the process of
126 |             rank 0 is affected, while other processes will set the level to
127 |             "Error" and be silent most of the time.
128 | 
129 |     Returns:
130 |         logging.Logger: The root logger.
131 |     """
132 |     return get_logger(__name__.split('.')[0], log_file, log_level)
133 | 
134 | 


--------------------------------------------------------------------------------
/utils/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaehyunnn/ViTPose_pytorch/1bd3cc3982d5b976622df54bcdc2106bb16e6d16/utils/nms/__init__.py


--------------------------------------------------------------------------------
/utils/nms/cpu_nms.cpython-37m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaehyunnn/ViTPose_pytorch/1bd3cc3982d5b976622df54bcdc2106bb16e6d16/utils/nms/cpu_nms.cpython-37m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/utils/nms/cpu_nms.cpython-39-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaehyunnn/ViTPose_pytorch/1bd3cc3982d5b976622df54bcdc2106bb16e6d16/utils/nms/cpu_nms.cpython-39-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/utils/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import numpy as np
12 | cimport numpy as np
13 | 
14 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
15 |     return a if a >= b else b
16 | 
17 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
18 |     return a if a <= b else b
19 | 
20 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
21 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
22 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
23 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
24 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
25 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
26 | 
27 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
28 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1].astype('i')
29 | 
30 |     cdef int ndets = dets.shape[0]
31 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
32 |             np.zeros((ndets), dtype=np.int)
33 | 
34 |     # nominal indices
35 |     cdef int _i, _j
36 |     # sorted indices
37 |     cdef int i, j
38 |     # temp variables for box i's (the box currently under consideration)
39 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
40 |     # variables for computing overlap with box j (lower scoring box)
41 |     cdef np.float32_t xx1, yy1, xx2, yy2
42 |     cdef np.float32_t w, h
43 |     cdef np.float32_t inter, ovr
44 | 
45 |     keep = []
46 |     for _i in range(ndets):
47 |         i = order[_i]
48 |         if suppressed[i] == 1:
49 |             continue
50 |         keep.append(i)
51 |         ix1 = x1[i]
52 |         iy1 = y1[i]
53 |         ix2 = x2[i]
54 |         iy2 = y2[i]
55 |         iarea = areas[i]
56 |         for _j in range(_i + 1, ndets):
57 |             j = order[_j]
58 |             if suppressed[j] == 1:
59 |                 continue
60 |             xx1 = max(ix1, x1[j])
61 |             yy1 = max(iy1, y1[j])
62 |             xx2 = min(ix2, x2[j])
63 |             yy2 = min(iy2, y2[j])
64 |             w = max(0.0, xx2 - xx1 + 1)
65 |             h = max(0.0, yy2 - yy1 + 1)
66 |             inter = w * h
67 |             ovr = inter / (iarea + areas[j] - inter)
68 |             if ovr >= thresh:
69 |                 suppressed[j] = 1
70 | 
71 |     return keep
72 | 


--------------------------------------------------------------------------------
/utils/nms/gpu_nms.cpython-37m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaehyunnn/ViTPose_pytorch/1bd3cc3982d5b976622df54bcdc2106bb16e6d16/utils/nms/gpu_nms.cpython-37m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/utils/nms/gpu_nms.cpython-39-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaehyunnn/ViTPose_pytorch/1bd3cc3982d5b976622df54bcdc2106bb16e6d16/utils/nms/gpu_nms.cpython-39-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/utils/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/utils/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import numpy as np
12 | cimport numpy as np
13 | 
14 | assert sizeof(int) == sizeof(np.int32_t)
15 | 
16 | cdef extern from "gpu_nms.hpp":
17 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
18 | 
19 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
20 |             np.int32_t device_id=0):
21 |     cdef int boxes_num = dets.shape[0]
22 |     cdef int boxes_dim = dets.shape[1]
23 |     cdef int num_out
24 |     cdef np.ndarray[np.int32_t, ndim=1] \
25 |         keep = np.zeros(boxes_num, dtype=np.int32)
26 |     cdef np.ndarray[np.float32_t, ndim=1] \
27 |         scores = dets[:, 4]
28 |     cdef np.ndarray[np.int32_t, ndim=1] \
29 |         order = scores.argsort()[::-1].astype(np.int32)
30 |     cdef np.ndarray[np.float32_t, ndim=2] \
31 |         sorted_dets = dets[order, :]
32 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
33 |     keep = keep[:num_out]
34 |     return list(order[keep])
35 | 


--------------------------------------------------------------------------------
/utils/nms/nms.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import numpy as np
 12 | 
 13 | from .cpu_nms import cpu_nms
 14 | from .gpu_nms import gpu_nms
 15 | 
 16 | 
 17 | def py_nms_wrapper(thresh):
 18 |     def _nms(dets):
 19 |         return nms(dets, thresh)
 20 |     return _nms
 21 | 
 22 | 
 23 | def cpu_nms_wrapper(thresh):
 24 |     def _nms(dets):
 25 |         return cpu_nms(dets, thresh)
 26 |     return _nms
 27 | 
 28 | 
 29 | def gpu_nms_wrapper(thresh, device_id):
 30 |     def _nms(dets):
 31 |         return gpu_nms(dets, thresh, device_id)
 32 |     return _nms
 33 | 
 34 | 
 35 | def nms(dets, thresh):
 36 |     """
 37 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
 38 |     rule out overlap >= thresh
 39 |     :param dets: [[x1, y1, x2, y2 score]]
 40 |     :param thresh: retain overlap < thresh
 41 |     :return: indexes to keep
 42 |     """
 43 |     if dets.shape[0] == 0:
 44 |         return []
 45 | 
 46 |     x1 = dets[:, 0]
 47 |     y1 = dets[:, 1]
 48 |     x2 = dets[:, 2]
 49 |     y2 = dets[:, 3]
 50 |     scores = dets[:, 4]
 51 | 
 52 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 53 |     order = scores.argsort()[::-1]
 54 | 
 55 |     keep = []
 56 |     while order.size > 0:
 57 |         i = order[0]
 58 |         keep.append(i)
 59 |         xx1 = np.maximum(x1[i], x1[order[1:]])
 60 |         yy1 = np.maximum(y1[i], y1[order[1:]])
 61 |         xx2 = np.minimum(x2[i], x2[order[1:]])
 62 |         yy2 = np.minimum(y2[i], y2[order[1:]])
 63 | 
 64 |         w = np.maximum(0.0, xx2 - xx1 + 1)
 65 |         h = np.maximum(0.0, yy2 - yy1 + 1)
 66 |         inter = w * h
 67 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
 68 | 
 69 |         inds = np.where(ovr <= thresh)[0]
 70 |         order = order[inds + 1]
 71 | 
 72 |     return keep
 73 | 
 74 | 
 75 | def oks_iou(g, d, a_g, a_d, sigmas=None, in_vis_thre=None):
 76 |     if not isinstance(sigmas, np.ndarray):
 77 |         sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0
 78 |     vars = (sigmas * 2) ** 2
 79 |     xg = g[0::3]
 80 |     yg = g[1::3]
 81 |     vg = g[2::3]
 82 |     ious = np.zeros((d.shape[0]))
 83 |     for n_d in range(0, d.shape[0]):
 84 |         xd = d[n_d, 0::3]
 85 |         yd = d[n_d, 1::3]
 86 |         vd = d[n_d, 2::3]
 87 |         dx = xd - xg
 88 |         dy = yd - yg
 89 |         e = (dx ** 2 + dy ** 2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2
 90 |         if in_vis_thre is not None:
 91 |             ind = list(vg > in_vis_thre) and list(vd > in_vis_thre)
 92 |             e = e[ind]
 93 |         ious[n_d] = np.sum(np.exp(-e)) / e.shape[0] if e.shape[0] != 0 else 0.0
 94 |     return ious
 95 | 
 96 | 
 97 | def oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
 98 |     """
 99 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
100 |     rule out overlap >= thresh, overlap = oks
101 |     :param kpts_db
102 |     :param thresh: retain overlap < thresh
103 |     :return: indexes to keep
104 |     """
105 |     if len(kpts_db) == 0:
106 |         return []
107 | 
108 |     scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
109 |     kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
110 |     areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
111 | 
112 |     order = scores.argsort()[::-1]
113 | 
114 |     keep = []
115 |     while order.size > 0:
116 |         i = order[0]
117 |         keep.append(i)
118 | 
119 |         oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre)
120 | 
121 |         inds = np.where(oks_ovr <= thresh)[0]
122 |         order = order[inds + 1]
123 | 
124 |     return keep
125 | 
126 | 
127 | def rescore(overlap, scores, thresh, type='gaussian'):
128 |     assert overlap.shape[0] == scores.shape[0]
129 |     if type == 'linear':
130 |         inds = np.where(overlap >= thresh)[0]
131 |         scores[inds] = scores[inds] * (1 - overlap[inds])
132 |     else:
133 |         scores = scores * np.exp(- overlap**2 / thresh)
134 | 
135 |     return scores
136 | 
137 | 
138 | def soft_oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
139 |     """
140 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
141 |     rule out overlap >= thresh, overlap = oks
142 |     :param kpts_db
143 |     :param thresh: retain overlap < thresh
144 |     :return: indexes to keep
145 |     """
146 |     if len(kpts_db) == 0:
147 |         return []
148 | 
149 |     scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
150 |     kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
151 |     areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
152 | 
153 |     order = scores.argsort()[::-1]
154 |     scores = scores[order]
155 | 
156 |     # max_dets = order.size
157 |     max_dets = 20
158 |     keep = np.zeros(max_dets, dtype=np.intp)
159 |     keep_cnt = 0
160 |     while order.size > 0 and keep_cnt < max_dets:
161 |         i = order[0]
162 | 
163 |         oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre)
164 | 
165 |         order = order[1:]
166 |         scores = rescore(oks_ovr, scores[1:], thresh)
167 | 
168 |         tmp = scores.argsort()[::-1]
169 |         order = order[tmp]
170 |         scores = scores[tmp]
171 | 
172 |         keep[keep_cnt] = i
173 |         keep_cnt += 1
174 | 
175 |     keep = keep[:keep_cnt]
176 | 
177 |     return keep
178 |     # kpts_db = kpts_db[:keep_cnt]
179 | 
180 |     # return kpts_db
181 | 


--------------------------------------------------------------------------------
/utils/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Copyright (c) Microsoft
  3 | // Licensed under The MIT License
  4 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
  5 | // ------------------------------------------------------------------
  6 | 
  7 | #include "gpu_nms.hpp"
  8 | #include <vector>
  9 | #include <iostream>
 10 | 
 11 | #define CUDA_CHECK(condition) \
 12 |   /* Code block avoids redefinition of cudaError_t error */ \
 13 |   do { \
 14 |     cudaError_t error = condition; \
 15 |     if (error != cudaSuccess) { \
 16 |       std::cout << cudaGetErrorString(error) << std::endl; \
 17 |     } \
 18 |   } while (0)
 19 | 
 20 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 21 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 22 | 
 23 | __device__ inline float devIoU(float const * const a, float const * const b) {
 24 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 25 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 26 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 27 |   float interS = width * height;
 28 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 29 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 30 |   return interS / (Sa + Sb - interS);
 31 | }
 32 | 
 33 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 34 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 35 |   const int row_start = blockIdx.y;
 36 |   const int col_start = blockIdx.x;
 37 | 
 38 |   // if (row_start > col_start) return;
 39 | 
 40 |   const int row_size =
 41 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 42 |   const int col_size =
 43 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 44 | 
 45 |   __shared__ float block_boxes[threadsPerBlock * 5];
 46 |   if (threadIdx.x < col_size) {
 47 |     block_boxes[threadIdx.x * 5 + 0] =
 48 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 49 |     block_boxes[threadIdx.x * 5 + 1] =
 50 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 51 |     block_boxes[threadIdx.x * 5 + 2] =
 52 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 53 |     block_boxes[threadIdx.x * 5 + 3] =
 54 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 55 |     block_boxes[threadIdx.x * 5 + 4] =
 56 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 57 |   }
 58 |   __syncthreads();
 59 | 
 60 |   if (threadIdx.x < row_size) {
 61 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 62 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 63 |     int i = 0;
 64 |     unsigned long long t = 0;
 65 |     int start = 0;
 66 |     if (row_start == col_start) {
 67 |       start = threadIdx.x + 1;
 68 |     }
 69 |     for (i = start; i < col_size; i++) {
 70 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 71 |         t |= 1ULL << i;
 72 |       }
 73 |     }
 74 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 75 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 76 |   }
 77 | }
 78 | 
 79 | void _set_device(int device_id) {
 80 |   int current_device;
 81 |   CUDA_CHECK(cudaGetDevice(&current_device));
 82 |   if (current_device == device_id) {
 83 |     return;
 84 |   }
 85 |   // The call to cudaSetDevice must come before any calls to Get, which
 86 |   // may perform initialization using the GPU.
 87 |   CUDA_CHECK(cudaSetDevice(device_id));
 88 | }
 89 | 
 90 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 91 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 92 |   _set_device(device_id);
 93 | 
 94 |   float* boxes_dev = NULL;
 95 |   unsigned long long* mask_dev = NULL;
 96 | 
 97 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 98 | 
 99 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
100 |                         boxes_num * boxes_dim * sizeof(float)));
101 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
102 |                         boxes_host,
103 |                         boxes_num * boxes_dim * sizeof(float),
104 |                         cudaMemcpyHostToDevice));
105 | 
106 |   CUDA_CHECK(cudaMalloc(&mask_dev,
107 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
108 | 
109 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
110 |               DIVUP(boxes_num, threadsPerBlock));
111 |   dim3 threads(threadsPerBlock);
112 |   nms_kernel<<<blocks, threads>>>(boxes_num,
113 |                                   nms_overlap_thresh,
114 |                                   boxes_dev,
115 |                                   mask_dev);
116 | 
117 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
118 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
119 |                         mask_dev,
120 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
121 |                         cudaMemcpyDeviceToHost));
122 | 
123 |   std::vector<unsigned long long> remv(col_blocks);
124 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
125 | 
126 |   int num_to_keep = 0;
127 |   for (int i = 0; i < boxes_num; i++) {
128 |     int nblock = i / threadsPerBlock;
129 |     int inblock = i % threadsPerBlock;
130 | 
131 |     if (!(remv[nblock] & (1ULL << inblock))) {
132 |       keep_out[num_to_keep++] = i;
133 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
134 |       for (int j = nblock; j < col_blocks; j++) {
135 |         remv[j] |= p[j];
136 |       }
137 |     }
138 |   }
139 |   *num_out = num_to_keep;
140 | 
141 |   CUDA_CHECK(cudaFree(boxes_dev));
142 |   CUDA_CHECK(cudaFree(mask_dev));
143 | }
144 | 


--------------------------------------------------------------------------------
/utils/nms/nms_ori.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import numpy as np
 12 | 
 13 | from cpu_nms import cpu_nms
 14 | from gpu_nms import gpu_nms
 15 | 
 16 | 
 17 | def py_nms_wrapper(thresh):
 18 |     def _nms(dets):
 19 |         return nms(dets, thresh)
 20 |     return _nms
 21 | 
 22 | 
 23 | def cpu_nms_wrapper(thresh):
 24 |     def _nms(dets):
 25 |         return cpu_nms(dets, thresh)
 26 |     return _nms
 27 | 
 28 | 
 29 | def gpu_nms_wrapper(thresh, device_id):
 30 |     def _nms(dets):
 31 |         return gpu_nms(dets, thresh, device_id)
 32 |     return _nms
 33 | 
 34 | 
 35 | def nms(dets, thresh):
 36 |     """
 37 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
 38 |     rule out overlap >= thresh
 39 |     :param dets: [[x1, y1, x2, y2 score]]
 40 |     :param thresh: retain overlap < thresh
 41 |     :return: indexes to keep
 42 |     """
 43 |     if dets.shape[0] == 0:
 44 |         return []
 45 | 
 46 |     x1 = dets[:, 0]
 47 |     y1 = dets[:, 1]
 48 |     x2 = dets[:, 2]
 49 |     y2 = dets[:, 3]
 50 |     scores = dets[:, 4]
 51 | 
 52 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 53 |     order = scores.argsort()[::-1]
 54 | 
 55 |     keep = []
 56 |     while order.size > 0:
 57 |         i = order[0]
 58 |         keep.append(i)
 59 |         xx1 = np.maximum(x1[i], x1[order[1:]])
 60 |         yy1 = np.maximum(y1[i], y1[order[1:]])
 61 |         xx2 = np.minimum(x2[i], x2[order[1:]])
 62 |         yy2 = np.minimum(y2[i], y2[order[1:]])
 63 | 
 64 |         w = np.maximum(0.0, xx2 - xx1 + 1)
 65 |         h = np.maximum(0.0, yy2 - yy1 + 1)
 66 |         inter = w * h
 67 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
 68 | 
 69 |         inds = np.where(ovr <= thresh)[0]
 70 |         order = order[inds + 1]
 71 | 
 72 |     return keep
 73 | 
 74 | 
 75 | def oks_iou(g, d, a_g, a_d, sigmas=None, in_vis_thre=None):
 76 |     if not isinstance(sigmas, np.ndarray):
 77 |         sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0
 78 |     vars = (sigmas * 2) ** 2
 79 |     xg = g[0::3]
 80 |     yg = g[1::3]
 81 |     vg = g[2::3]
 82 |     ious = np.zeros((d.shape[0]))
 83 |     for n_d in range(0, d.shape[0]):
 84 |         xd = d[n_d, 0::3]
 85 |         yd = d[n_d, 1::3]
 86 |         vd = d[n_d, 2::3]
 87 |         dx = xd - xg
 88 |         dy = yd - yg
 89 |         e = (dx ** 2 + dy ** 2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2
 90 |         if in_vis_thre is not None:
 91 |             ind = list(vg > in_vis_thre) and list(vd > in_vis_thre)
 92 |             e = e[ind]
 93 |         ious[n_d] = np.sum(np.exp(-e)) / e.shape[0] if e.shape[0] != 0 else 0.0
 94 |     return ious
 95 | 
 96 | 
 97 | def oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
 98 |     """
 99 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
100 |     rule out overlap >= thresh, overlap = oks
101 |     :param kpts_db
102 |     :param thresh: retain overlap < thresh
103 |     :return: indexes to keep
104 |     """
105 |     if len(kpts_db) == 0:
106 |         return []
107 | 
108 |     scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
109 |     kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
110 |     areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
111 | 
112 |     order = scores.argsort()[::-1]
113 | 
114 |     keep = []
115 |     while order.size > 0:
116 |         i = order[0]
117 |         keep.append(i)
118 | 
119 |         oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre)
120 | 
121 |         inds = np.where(oks_ovr <= thresh)[0]
122 |         order = order[inds + 1]
123 | 
124 |     return keep
125 | 
126 | 
127 | def rescore(overlap, scores, thresh, type='gaussian'):
128 |     assert overlap.shape[0] == scores.shape[0]
129 |     if type == 'linear':
130 |         inds = np.where(overlap >= thresh)[0]
131 |         scores[inds] = scores[inds] * (1 - overlap[inds])
132 |     else:
133 |         scores = scores * np.exp(- overlap**2 / thresh)
134 | 
135 |     return scores
136 | 
137 | 
138 | def soft_oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
139 |     """
140 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
141 |     rule out overlap >= thresh, overlap = oks
142 |     :param kpts_db
143 |     :param thresh: retain overlap < thresh
144 |     :return: indexes to keep
145 |     """
146 |     if len(kpts_db) == 0:
147 |         return []
148 | 
149 |     scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
150 |     kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
151 |     areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
152 | 
153 |     order = scores.argsort()[::-1]
154 |     scores = scores[order]
155 | 
156 |     # max_dets = order.size
157 |     max_dets = 20
158 |     keep = np.zeros(max_dets, dtype=np.intp)
159 |     keep_cnt = 0
160 |     while order.size > 0 and keep_cnt < max_dets:
161 |         i = order[0]
162 | 
163 |         oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre)
164 | 
165 |         order = order[1:]
166 |         scores = rescore(oks_ovr, scores[1:], thresh)
167 | 
168 |         tmp = scores.argsort()[::-1]
169 |         order = order[tmp]
170 |         scores = scores[tmp]
171 | 
172 |         keep[keep_cnt] = i
173 |         keep_cnt += 1
174 | 
175 |     keep = keep[:keep_cnt]
176 | 
177 |     return keep
178 |     # kpts_db = kpts_db[:keep_cnt]
179 | 
180 |     # return kpts_db
181 | 


--------------------------------------------------------------------------------
/utils/nms/setup_linux.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Pose.gluon
  3 | # Copyright (c) 2018-present Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | from setuptools import setup
 11 | from distutils.extension import Extension
 12 | from Cython.Distutils import build_ext
 13 | import numpy as np
 14 | 
 15 | 
 16 | def find_in_path(name, path):
 17 |     "Find a file in a search path"
 18 |     # Adapted fom
 19 |     # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 20 |     for dir in path.split(os.pathsep):
 21 |         binpath = pjoin(dir, name)
 22 |         if os.path.exists(binpath):
 23 |             return os.path.abspath(binpath)
 24 |     return None
 25 | 
 26 | 
 27 | def locate_cuda():
 28 |     """Locate the CUDA environment on the system
 29 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 30 |     and values giving the absolute path to each directory.
 31 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 32 |     is based on finding 'nvcc' in the PATH.
 33 |     """
 34 | 
 35 |     # first check if the CUDAHOME env variable is in use
 36 |     if 'CUDAHOME' in os.environ:
 37 |         home = os.environ['CUDAHOME']
 38 |         nvcc = pjoin(home, 'bin', 'nvcc')
 39 |     else:
 40 |         # otherwise, search the PATH for NVCC
 41 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 42 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 43 |         if nvcc is None:
 44 |             raise EnvironmentError('The nvcc binary could not be '
 45 |                 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 46 |         home = os.path.dirname(os.path.dirname(nvcc))
 47 | 
 48 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 49 |                   'include': pjoin(home, 'include'),
 50 |                   'lib64': pjoin(home, 'lib64')}
 51 |     for k, v in cudaconfig.items():
 52 |         if not os.path.exists(v):
 53 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 54 | 
 55 |     return cudaconfig
 56 | CUDA = locate_cuda()
 57 | 
 58 | 
 59 | # Obtain the numpy include directory.  This logic works across numpy versions.
 60 | try:
 61 |     numpy_include = np.get_include()
 62 | except AttributeError:
 63 |     numpy_include = np.get_numpy_include()
 64 | 
 65 | 
 66 | def customize_compiler_for_nvcc(self):
 67 |     """inject deep into distutils to customize how the dispatch
 68 |     to gcc/nvcc works.
 69 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 70 |     injected in, and still have the right customizations (i.e.
 71 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 72 |     the OO route, I have this. Note, it's kindof like a wierd functional
 73 |     subclassing going on."""
 74 | 
 75 |     # tell the compiler it can processes .cu
 76 |     self.src_extensions.append('.cu')
 77 | 
 78 |     # save references to the default compiler_so and _comple methods
 79 |     default_compiler_so = self.compiler_so
 80 |     super = self._compile
 81 | 
 82 |     # now redefine the _compile method. This gets executed for each
 83 |     # object but distutils doesn't have the ability to change compilers
 84 |     # based on source extension: we add it.
 85 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 86 |         if os.path.splitext(src)[1] == '.cu':
 87 |             # use the cuda for .cu files
 88 |             self.set_executable('compiler_so', CUDA['nvcc'])
 89 |             # use only a subset of the extra_postargs, which are 1-1 translated
 90 |             # from the extra_compile_args in the Extension class
 91 |             postargs = extra_postargs['nvcc']
 92 |         else:
 93 |             postargs = extra_postargs['gcc']
 94 | 
 95 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 96 |         # reset the default compiler_so, which we might have changed for cuda
 97 |         self.compiler_so = default_compiler_so
 98 | 
 99 |     # inject our redefined _compile method into the class
100 |     self._compile = _compile
101 | 
102 | 
103 | # run the customize_compiler
104 | class custom_build_ext(build_ext):
105 |     def build_extensions(self):
106 |         customize_compiler_for_nvcc(self.compiler)
107 |         build_ext.build_extensions(self)
108 | 
109 | 
110 | ext_modules = [
111 |     Extension(
112 |         "cpu_nms",
113 |         ["cpu_nms.pyx"],
114 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
115 |         include_dirs = [numpy_include]
116 |     ),
117 |     Extension('gpu_nms',
118 |         ['nms_kernel.cu', 'gpu_nms.pyx'],
119 |         library_dirs=[CUDA['lib64']],
120 |         libraries=['cudart'],
121 |         language='c++',
122 |         runtime_library_dirs=[CUDA['lib64']],
123 |         # this syntax is specific to this build system
124 |         # we're only going to use certain compiler args with nvcc and not with
125 |         # gcc the implementation of this trick is in customize_compiler() below
126 |         extra_compile_args={'gcc': ["-Wno-unused-function"],
127 |                             'nvcc': ['-arch=sm_35',
128 |                                      '--ptxas-options=-v',
129 |                                      '-c',
130 |                                      '--compiler-options',
131 |                                      "'-fPIC'"]},
132 |         include_dirs = [numpy_include, CUDA['include']]
133 |     ),
134 | ]
135 | 
136 | setup(
137 |     name='nms',
138 |     ext_modules=ext_modules,
139 |     # inject our custom trigger
140 |     cmdclass={'build_ext': custom_build_ext},
141 | )
142 | 


--------------------------------------------------------------------------------
/utils/post_processing/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .nms import oks_iou, oks_nms, soft_oks_nms
 3 | from .one_euro_filter import OneEuroFilter
 4 | from .post_transforms import (affine_transform, flip_back, fliplr_joints,
 5 |                               fliplr_regression, get_affine_transform,
 6 |                               get_warp_matrix, rotate_point, transform_preds,
 7 |                               warp_affine_joints)
 8 | 
 9 | __all__ = [
10 |     'oks_nms', 'soft_oks_nms', 'affine_transform', 'rotate_point', 'flip_back',
11 |     'fliplr_joints', 'fliplr_regression', 'transform_preds',
12 |     'get_affine_transform', 'get_warp_matrix', 'warp_affine_joints',
13 |     'OneEuroFilter', 'oks_iou'
14 | ]
15 | 


--------------------------------------------------------------------------------
/utils/post_processing/group.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Adapted from https://github.com/princeton-vl/pose-ae-train/
  3 | # Original licence: Copyright (c) 2017, umich-vl, under BSD 3-Clause License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | from munkres import Munkres
  9 | 
 10 | from ..top_down_eval import post_dark_udp
 11 | 
 12 | 
 13 | def _py_max_match(scores):
 14 |     """Apply munkres algorithm to get the best match.
 15 | 
 16 |     Args:
 17 |         scores(np.ndarray): cost matrix.
 18 | 
 19 |     Returns:
 20 |         np.ndarray: best match.
 21 |     """
 22 |     m = Munkres()
 23 |     tmp = m.compute(scores)
 24 |     tmp = np.array(tmp).astype(int)
 25 |     return tmp
 26 | 
 27 | 
 28 | def _match_by_tag(inp, params):
 29 |     """Match joints by tags. Use Munkres algorithm to calculate the best match
 30 |     for keypoints grouping.
 31 | 
 32 |     Note:
 33 |         number of keypoints: K
 34 |         max number of people in an image: M (M=30 by default)
 35 |         dim of tags: L
 36 |             If use flip testing, L=2; else L=1.
 37 | 
 38 |     Args:
 39 |         inp(tuple):
 40 |             tag_k (np.ndarray[KxMxL]): tag corresponding to the
 41 |                 top k values of feature map per keypoint.
 42 |             loc_k (np.ndarray[KxMx2]): top k locations of the
 43 |                 feature maps for keypoint.
 44 |             val_k (np.ndarray[KxM]): top k value of the
 45 |                 feature maps per keypoint.
 46 |         params(Params): class Params().
 47 | 
 48 |     Returns:
 49 |         np.ndarray: result of pose groups.
 50 |     """
 51 |     assert isinstance(params, _Params), 'params should be class _Params()'
 52 | 
 53 |     tag_k, loc_k, val_k = inp
 54 | 
 55 |     default_ = np.zeros((params.num_joints, 3 + tag_k.shape[2]),
 56 |                         dtype=np.float32)
 57 | 
 58 |     joint_dict = {}
 59 |     tag_dict = {}
 60 |     for i in range(params.num_joints):
 61 |         idx = params.joint_order[i]
 62 | 
 63 |         tags = tag_k[idx]
 64 |         joints = np.concatenate((loc_k[idx], val_k[idx, :, None], tags), 1)
 65 |         mask = joints[:, 2] > params.detection_threshold
 66 |         tags = tags[mask]
 67 |         joints = joints[mask]
 68 | 
 69 |         if joints.shape[0] == 0:
 70 |             continue
 71 | 
 72 |         if i == 0 or len(joint_dict) == 0:
 73 |             for tag, joint in zip(tags, joints):
 74 |                 key = tag[0]
 75 |                 joint_dict.setdefault(key, np.copy(default_))[idx] = joint
 76 |                 tag_dict[key] = [tag]
 77 |         else:
 78 |             grouped_keys = list(joint_dict.keys())[:params.max_num_people]
 79 |             grouped_tags = [np.mean(tag_dict[i], axis=0) for i in grouped_keys]
 80 | 
 81 |             if (params.ignore_too_much
 82 |                     and len(grouped_keys) == params.max_num_people):
 83 |                 continue
 84 | 
 85 |             diff = joints[:, None, 3:] - np.array(grouped_tags)[None, :, :]
 86 |             diff_normed = np.linalg.norm(diff, ord=2, axis=2)
 87 |             diff_saved = np.copy(diff_normed)
 88 | 
 89 |             if params.use_detection_val:
 90 |                 diff_normed = np.round(diff_normed) * 100 - joints[:, 2:3]
 91 | 
 92 |             num_added = diff.shape[0]
 93 |             num_grouped = diff.shape[1]
 94 | 
 95 |             if num_added > num_grouped:
 96 |                 diff_normed = np.concatenate(
 97 |                     (diff_normed,
 98 |                      np.zeros((num_added, num_added - num_grouped),
 99 |                               dtype=np.float32) + 1e10),
100 |                     axis=1)
101 | 
102 |             pairs = _py_max_match(diff_normed)
103 |             for row, col in pairs:
104 |                 if (row < num_added and col < num_grouped
105 |                         and diff_saved[row][col] < params.tag_threshold):
106 |                     key = grouped_keys[col]
107 |                     joint_dict[key][idx] = joints[row]
108 |                     tag_dict[key].append(tags[row])
109 |                 else:
110 |                     key = tags[row][0]
111 |                     joint_dict.setdefault(key, np.copy(default_))[idx] = \
112 |                         joints[row]
113 |                     tag_dict[key] = [tags[row]]
114 | 
115 |     results = np.array([joint_dict[i] for i in joint_dict]).astype(np.float32)
116 |     return results
117 | 
118 | 
119 | class _Params:
120 |     """A class of parameter.
121 | 
122 |     Args:
123 |         cfg(Config): config.
124 |     """
125 | 
126 |     def __init__(self, cfg):
127 |         self.num_joints = cfg['num_joints']
128 |         self.max_num_people = cfg['max_num_people']
129 | 
130 |         self.detection_threshold = cfg['detection_threshold']
131 |         self.tag_threshold = cfg['tag_threshold']
132 |         self.use_detection_val = cfg['use_detection_val']
133 |         self.ignore_too_much = cfg['ignore_too_much']
134 | 
135 |         if self.num_joints == 17:
136 |             self.joint_order = [
137 |                 i - 1 for i in
138 |                 [1, 2, 3, 4, 5, 6, 7, 12, 13, 8, 9, 10, 11, 14, 15, 16, 17]
139 |             ]
140 |         else:
141 |             self.joint_order = list(np.arange(self.num_joints))
142 | 
143 | 
144 | class HeatmapParser:
145 |     """The heatmap parser for post processing."""
146 | 
147 |     def __init__(self, cfg):
148 |         self.params = _Params(cfg)
149 |         self.tag_per_joint = cfg['tag_per_joint']
150 |         self.pool = torch.nn.MaxPool2d(cfg['nms_kernel'], 1,
151 |                                        cfg['nms_padding'])
152 |         self.use_udp = cfg.get('use_udp', False)
153 |         self.score_per_joint = cfg.get('score_per_joint', False)
154 | 
155 |     def nms(self, heatmaps):
156 |         """Non-Maximum Suppression for heatmaps.
157 | 
158 |         Args:
159 |             heatmap(torch.Tensor): Heatmaps before nms.
160 | 
161 |         Returns:
162 |             torch.Tensor: Heatmaps after nms.
163 |         """
164 | 
165 |         maxm = self.pool(heatmaps)
166 |         maxm = torch.eq(maxm, heatmaps).float()
167 |         heatmaps = heatmaps * maxm
168 | 
169 |         return heatmaps
170 | 
171 |     def match(self, tag_k, loc_k, val_k):
172 |         """Group keypoints to human poses in a batch.
173 | 
174 |         Args:
175 |             tag_k (np.ndarray[NxKxMxL]): tag corresponding to the
176 |                 top k values of feature map per keypoint.
177 |             loc_k (np.ndarray[NxKxMx2]): top k locations of the
178 |                 feature maps for keypoint.
179 |             val_k (np.ndarray[NxKxM]): top k value of the
180 |                 feature maps per keypoint.
181 | 
182 |         Returns:
183 |             list
184 |         """
185 | 
186 |         def _match(x):
187 |             return _match_by_tag(x, self.params)
188 | 
189 |         return list(map(_match, zip(tag_k, loc_k, val_k)))
190 | 
191 |     def top_k(self, heatmaps, tags):
192 |         """Find top_k values in an image.
193 | 
194 |         Note:
195 |             batch size: N
196 |             number of keypoints: K
197 |             heatmap height: H
198 |             heatmap width: W
199 |             max number of people: M
200 |             dim of tags: L
201 |                 If use flip testing, L=2; else L=1.
202 | 
203 |         Args:
204 |             heatmaps (torch.Tensor[NxKxHxW])
205 |             tags (torch.Tensor[NxKxHxWxL])
206 | 
207 |         Returns:
208 |             dict: A dict containing top_k values.
209 | 
210 |             - tag_k (np.ndarray[NxKxMxL]):
211 |                 tag corresponding to the top k values of
212 |                 feature map per keypoint.
213 |             - loc_k (np.ndarray[NxKxMx2]):
214 |                 top k location of feature map per keypoint.
215 |             - val_k (np.ndarray[NxKxM]):
216 |                 top k value of feature map per keypoint.
217 |         """
218 |         heatmaps = self.nms(heatmaps)
219 |         N, K, H, W = heatmaps.size()
220 |         heatmaps = heatmaps.view(N, K, -1)
221 |         val_k, ind = heatmaps.topk(self.params.max_num_people, dim=2)
222 | 
223 |         tags = tags.view(tags.size(0), tags.size(1), W * H, -1)
224 |         if not self.tag_per_joint:
225 |             tags = tags.expand(-1, self.params.num_joints, -1, -1)
226 | 
227 |         tag_k = torch.stack(
228 |             [torch.gather(tags[..., i], 2, ind) for i in range(tags.size(3))],
229 |             dim=3)
230 | 
231 |         x = ind % W
232 |         y = ind // W
233 | 
234 |         ind_k = torch.stack((x, y), dim=3)
235 | 
236 |         results = {
237 |             'tag_k': tag_k.cpu().numpy(),
238 |             'loc_k': ind_k.cpu().numpy(),
239 |             'val_k': val_k.cpu().numpy()
240 |         }
241 | 
242 |         return results
243 | 
244 |     @staticmethod
245 |     def adjust(results, heatmaps):
246 |         """Adjust the coordinates for better accuracy.
247 | 
248 |         Note:
249 |             batch size: N
250 |             number of keypoints: K
251 |             heatmap height: H
252 |             heatmap width: W
253 | 
254 |         Args:
255 |             results (list(np.ndarray)): Keypoint predictions.
256 |             heatmaps (torch.Tensor[NxKxHxW]): Heatmaps.
257 |         """
258 |         _, _, H, W = heatmaps.shape
259 |         for batch_id, people in enumerate(results):
260 |             for people_id, people_i in enumerate(people):
261 |                 for joint_id, joint in enumerate(people_i):
262 |                     if joint[2] > 0:
263 |                         x, y = joint[0:2]
264 |                         xx, yy = int(x), int(y)
265 |                         tmp = heatmaps[batch_id][joint_id]
266 |                         if tmp[min(H - 1, yy + 1), xx] > tmp[max(0, yy - 1),
267 |                                                              xx]:
268 |                             y += 0.25
269 |                         else:
270 |                             y -= 0.25
271 | 
272 |                         if tmp[yy, min(W - 1, xx + 1)] > tmp[yy,
273 |                                                              max(0, xx - 1)]:
274 |                             x += 0.25
275 |                         else:
276 |                             x -= 0.25
277 |                         results[batch_id][people_id, joint_id,
278 |                                           0:2] = (x + 0.5, y + 0.5)
279 |         return results
280 | 
281 |     @staticmethod
282 |     def refine(heatmap, tag, keypoints, use_udp=False):
283 |         """Given initial keypoint predictions, we identify missing joints.
284 | 
285 |         Note:
286 |             number of keypoints: K
287 |             heatmap height: H
288 |             heatmap width: W
289 |             dim of tags: L
290 |                 If use flip testing, L=2; else L=1.
291 | 
292 |         Args:
293 |             heatmap: np.ndarray(K, H, W).
294 |             tag: np.ndarray(K, H, W) |  np.ndarray(K, H, W, L)
295 |             keypoints: np.ndarray of size (K, 3 + L)
296 |                         last dim is (x, y, score, tag).
297 |             use_udp: bool-unbiased data processing
298 | 
299 |         Returns:
300 |             np.ndarray: The refined keypoints.
301 |         """
302 | 
303 |         K, H, W = heatmap.shape
304 |         if len(tag.shape) == 3:
305 |             tag = tag[..., None]
306 | 
307 |         tags = []
308 |         for i in range(K):
309 |             if keypoints[i, 2] > 0:
310 |                 # save tag value of detected keypoint
311 |                 x, y = keypoints[i][:2].astype(int)
312 |                 x = np.clip(x, 0, W - 1)
313 |                 y = np.clip(y, 0, H - 1)
314 |                 tags.append(tag[i, y, x])
315 | 
316 |         # mean tag of current detected people
317 |         prev_tag = np.mean(tags, axis=0)
318 |         results = []
319 | 
320 |         for _heatmap, _tag in zip(heatmap, tag):
321 |             # distance of all tag values with mean tag of
322 |             # current detected people
323 |             distance_tag = (((_tag -
324 |                               prev_tag[None, None, :])**2).sum(axis=2)**0.5)
325 |             norm_heatmap = _heatmap - np.round(distance_tag)
326 | 
327 |             # find maximum position
328 |             y, x = np.unravel_index(np.argmax(norm_heatmap), _heatmap.shape)
329 |             xx = x.copy()
330 |             yy = y.copy()
331 |             # detection score at maximum position
332 |             val = _heatmap[y, x]
333 |             if not use_udp:
334 |                 # offset by 0.5
335 |                 x += 0.5
336 |                 y += 0.5
337 | 
338 |             # add a quarter offset
339 |             if _heatmap[yy, min(W - 1, xx + 1)] > _heatmap[yy, max(0, xx - 1)]:
340 |                 x += 0.25
341 |             else:
342 |                 x -= 0.25
343 | 
344 |             if _heatmap[min(H - 1, yy + 1), xx] > _heatmap[max(0, yy - 1), xx]:
345 |                 y += 0.25
346 |             else:
347 |                 y -= 0.25
348 | 
349 |             results.append((x, y, val))
350 |         results = np.array(results)
351 | 
352 |         if results is not None:
353 |             for i in range(K):
354 |                 # add keypoint if it is not detected
355 |                 if results[i, 2] > 0 and keypoints[i, 2] == 0:
356 |                     keypoints[i, :3] = results[i, :3]
357 | 
358 |         return keypoints
359 | 
360 |     def parse(self, heatmaps, tags, adjust=True, refine=True):
361 |         """Group keypoints into poses given heatmap and tag.
362 | 
363 |         Note:
364 |             batch size: N
365 |             number of keypoints: K
366 |             heatmap height: H
367 |             heatmap width: W
368 |             dim of tags: L
369 |                 If use flip testing, L=2; else L=1.
370 | 
371 |         Args:
372 |             heatmaps (torch.Tensor[NxKxHxW]): model output heatmaps.
373 |             tags (torch.Tensor[NxKxHxWxL]): model output tagmaps.
374 | 
375 |         Returns:
376 |             tuple: A tuple containing keypoint grouping results.
377 | 
378 |             - results (list(np.ndarray)): Pose results.
379 |             - scores (list/list(np.ndarray)): Score of people.
380 |         """
381 |         results = self.match(**self.top_k(heatmaps, tags))
382 | 
383 |         if adjust:
384 |             if self.use_udp:
385 |                 for i in range(len(results)):
386 |                     if results[i].shape[0] > 0:
387 |                         results[i][..., :2] = post_dark_udp(
388 |                             results[i][..., :2].copy(), heatmaps[i:i + 1, :])
389 |             else:
390 |                 results = self.adjust(results, heatmaps)
391 | 
392 |         if self.score_per_joint:
393 |             scores = [i[:, 2] for i in results[0]]
394 |         else:
395 |             scores = [i[:, 2].mean() for i in results[0]]
396 | 
397 |         if refine:
398 |             results = results[0]
399 |             # for every detected person
400 |             for i in range(len(results)):
401 |                 heatmap_numpy = heatmaps[0].cpu().numpy()
402 |                 tag_numpy = tags[0].cpu().numpy()
403 |                 if not self.tag_per_joint:
404 |                     tag_numpy = np.tile(tag_numpy,
405 |                                         (self.params.num_joints, 1, 1, 1))
406 |                 results[i] = self.refine(
407 |                     heatmap_numpy, tag_numpy, results[i], use_udp=self.use_udp)
408 |             results = [results]
409 | 
410 |         return results, scores
411 | 


--------------------------------------------------------------------------------
/utils/post_processing/nms.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Adapted from https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
  3 | # Original licence: Copyright (c) Microsoft, under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | import numpy as np
  7 | 
  8 | 
  9 | def nms(dets, thr):
 10 |     """Greedily select boxes with high confidence and overlap <= thr.
 11 | 
 12 |     Args:
 13 |         dets: [[x1, y1, x2, y2, score]].
 14 |         thr: Retain overlap < thr.
 15 | 
 16 |     Returns:
 17 |          list: Indexes to keep.
 18 |     """
 19 |     if len(dets) == 0:
 20 |         return []
 21 | 
 22 |     x1 = dets[:, 0]
 23 |     y1 = dets[:, 1]
 24 |     x2 = dets[:, 2]
 25 |     y2 = dets[:, 3]
 26 |     scores = dets[:, 4]
 27 | 
 28 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 29 |     order = scores.argsort()[::-1]
 30 | 
 31 |     keep = []
 32 |     while len(order) > 0:
 33 |         i = order[0]
 34 |         keep.append(i)
 35 |         xx1 = np.maximum(x1[i], x1[order[1:]])
 36 |         yy1 = np.maximum(y1[i], y1[order[1:]])
 37 |         xx2 = np.minimum(x2[i], x2[order[1:]])
 38 |         yy2 = np.minimum(y2[i], y2[order[1:]])
 39 | 
 40 |         w = np.maximum(0.0, xx2 - xx1 + 1)
 41 |         h = np.maximum(0.0, yy2 - yy1 + 1)
 42 |         inter = w * h
 43 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
 44 | 
 45 |         inds = np.where(ovr <= thr)[0]
 46 |         order = order[inds + 1]
 47 | 
 48 |     return keep
 49 | 
 50 | 
 51 | def oks_iou(g, d, a_g, a_d, sigmas=None, vis_thr=None):
 52 |     """Calculate oks ious.
 53 | 
 54 |     Args:
 55 |         g: Ground truth keypoints.
 56 |         d: Detected keypoints.
 57 |         a_g: Area of the ground truth object.
 58 |         a_d: Area of the detected object.
 59 |         sigmas: standard deviation of keypoint labelling.
 60 |         vis_thr: threshold of the keypoint visibility.
 61 | 
 62 |     Returns:
 63 |         list: The oks ious.
 64 |     """
 65 |     if sigmas is None:
 66 |         sigmas = np.array([
 67 |             .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07,
 68 |             .87, .87, .89, .89
 69 |         ]) / 10.0
 70 |     vars = (sigmas * 2)**2
 71 |     xg = g[0::3]
 72 |     yg = g[1::3]
 73 |     vg = g[2::3]
 74 |     ious = np.zeros(len(d), dtype=np.float32)
 75 |     for n_d in range(0, len(d)):
 76 |         xd = d[n_d, 0::3]
 77 |         yd = d[n_d, 1::3]
 78 |         vd = d[n_d, 2::3]
 79 |         dx = xd - xg
 80 |         dy = yd - yg
 81 |         e = (dx**2 + dy**2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2
 82 |         if vis_thr is not None:
 83 |             ind = list(vg > vis_thr) and list(vd > vis_thr)
 84 |             e = e[ind]
 85 |         ious[n_d] = np.sum(np.exp(-e)) / len(e) if len(e) != 0 else 0.0
 86 |     return ious
 87 | 
 88 | 
 89 | def oks_nms(kpts_db, thr, sigmas=None, vis_thr=None, score_per_joint=False):
 90 |     """OKS NMS implementations.
 91 | 
 92 |     Args:
 93 |         kpts_db: keypoints.
 94 |         thr: Retain overlap < thr.
 95 |         sigmas: standard deviation of keypoint labelling.
 96 |         vis_thr: threshold of the keypoint visibility.
 97 |         score_per_joint: the input scores (in kpts_db) are per joint scores
 98 | 
 99 |     Returns:
100 |         np.ndarray: indexes to keep.
101 |     """
102 |     if len(kpts_db) == 0:
103 |         return []
104 | 
105 |     if score_per_joint:
106 |         scores = np.array([k['score'].mean() for k in kpts_db])
107 |     else:
108 |         scores = np.array([k['score'] for k in kpts_db])
109 | 
110 |     kpts = np.array([k['keypoints'].flatten() for k in kpts_db])
111 |     areas = np.array([k['area'] for k in kpts_db])
112 | 
113 |     order = scores.argsort()[::-1]
114 | 
115 |     keep = []
116 |     while len(order) > 0:
117 |         i = order[0]
118 |         keep.append(i)
119 | 
120 |         oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]],
121 |                           sigmas, vis_thr)
122 | 
123 |         inds = np.where(oks_ovr <= thr)[0]
124 |         order = order[inds + 1]
125 | 
126 |     keep = np.array(keep)
127 | 
128 |     return keep
129 | 
130 | 
131 | def _rescore(overlap, scores, thr, type='gaussian'):
132 |     """Rescoring mechanism gaussian or linear.
133 | 
134 |     Args:
135 |         overlap: calculated ious
136 |         scores: target scores.
137 |         thr: retain oks overlap < thr.
138 |         type: 'gaussian' or 'linear'
139 | 
140 |     Returns:
141 |         np.ndarray: indexes to keep
142 |     """
143 |     assert len(overlap) == len(scores)
144 |     assert type in ['gaussian', 'linear']
145 | 
146 |     if type == 'linear':
147 |         inds = np.where(overlap >= thr)[0]
148 |         scores[inds] = scores[inds] * (1 - overlap[inds])
149 |     else:
150 |         scores = scores * np.exp(-overlap**2 / thr)
151 | 
152 |     return scores
153 | 
154 | 
155 | def soft_oks_nms(kpts_db,
156 |                  thr,
157 |                  max_dets=20,
158 |                  sigmas=None,
159 |                  vis_thr=None,
160 |                  score_per_joint=False):
161 |     """Soft OKS NMS implementations.
162 | 
163 |     Args:
164 |         kpts_db
165 |         thr: retain oks overlap < thr.
166 |         max_dets: max number of detections to keep.
167 |         sigmas: Keypoint labelling uncertainty.
168 |         score_per_joint: the input scores (in kpts_db) are per joint scores
169 | 
170 |     Returns:
171 |         np.ndarray: indexes to keep.
172 |     """
173 |     if len(kpts_db) == 0:
174 |         return []
175 | 
176 |     if score_per_joint:
177 |         scores = np.array([k['score'].mean() for k in kpts_db])
178 |     else:
179 |         scores = np.array([k['score'] for k in kpts_db])
180 | 
181 |     kpts = np.array([k['keypoints'].flatten() for k in kpts_db])
182 |     areas = np.array([k['area'] for k in kpts_db])
183 | 
184 |     order = scores.argsort()[::-1]
185 |     scores = scores[order]
186 | 
187 |     keep = np.zeros(max_dets, dtype=np.intp)
188 |     keep_cnt = 0
189 |     while len(order) > 0 and keep_cnt < max_dets:
190 |         i = order[0]
191 | 
192 |         oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]],
193 |                           sigmas, vis_thr)
194 | 
195 |         order = order[1:]
196 |         scores = _rescore(oks_ovr, scores[1:], thr)
197 | 
198 |         tmp = scores.argsort()[::-1]
199 |         order = order[tmp]
200 |         scores = scores[tmp]
201 | 
202 |         keep[keep_cnt] = i
203 |         keep_cnt += 1
204 | 
205 |     keep = keep[:keep_cnt]
206 | 
207 |     return keep
208 | 


--------------------------------------------------------------------------------
/utils/post_processing/one_euro_filter.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Adapted from https://github.com/HoBeom/OneEuroFilter-Numpy
  3 | # Original licence: Copyright (c)  HoBeom Jeon, under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | from time import time
  6 | 
  7 | import numpy as np
  8 | 
  9 | 
 10 | def smoothing_factor(t_e, cutoff):
 11 |     r = 2 * np.pi * cutoff * t_e
 12 |     return r / (r + 1)
 13 | 
 14 | 
 15 | def exponential_smoothing(a, x, x_prev):
 16 |     return a * x + (1 - a) * x_prev
 17 | 
 18 | 
 19 | class OneEuroFilter:
 20 | 
 21 |     def __init__(self,
 22 |                  x0,
 23 |                  dx0=0.0,
 24 |                  min_cutoff=1.7,
 25 |                  beta=0.3,
 26 |                  d_cutoff=30.0,
 27 |                  fps=None):
 28 |         """One Euro Filter for keypoints smoothing.
 29 | 
 30 |         Args:
 31 |             x0 (np.ndarray[K, 2]): Initialize keypoints value
 32 |             dx0 (float): 0.0
 33 |             min_cutoff (float): parameter for one euro filter
 34 |             beta (float): parameter for one euro filter
 35 |             d_cutoff (float): Input data FPS
 36 |             fps (float): Video FPS for video inference
 37 |         """
 38 | 
 39 |         # The parameters.
 40 |         self.data_shape = x0.shape
 41 |         self.min_cutoff = np.full(x0.shape, min_cutoff)
 42 |         self.beta = np.full(x0.shape, beta)
 43 |         self.d_cutoff = np.full(x0.shape, d_cutoff)
 44 |         # Previous values.
 45 |         self.x_prev = x0.astype(np.float32)
 46 |         self.dx_prev = np.full(x0.shape, dx0)
 47 |         self.mask_prev = np.ma.masked_where(x0 <= 0, x0)
 48 |         self.realtime = True
 49 |         if fps is None:
 50 |             # Using in realtime inference
 51 |             self.t_e = None
 52 |             self.skip_frame_factor = d_cutoff
 53 |         else:
 54 |             # fps using video inference
 55 |             self.realtime = False
 56 |             self.d_cutoff = np.full(x0.shape, float(fps))
 57 |         self.t_prev = time()
 58 | 
 59 |     def __call__(self, x, t_e=1.0):
 60 |         """Compute the filtered signal.
 61 | 
 62 |         Hyper-parameters (cutoff, beta) are from `VNect
 63 |         <http://gvv.mpi-inf.mpg.de/projects/VNect/>`__ .
 64 | 
 65 |         Realtime Camera fps (d_cutoff) default 30.0
 66 | 
 67 |         Args:
 68 |             x (np.ndarray[K, 2]): keypoints results in frame
 69 |             t_e (Optional): video skip frame count for posetrack
 70 |                 evaluation
 71 |         """
 72 |         assert x.shape == self.data_shape
 73 | 
 74 |         t = 0
 75 |         if self.realtime:
 76 |             t = time()
 77 |             t_e = (t - self.t_prev) * self.skip_frame_factor
 78 |         t_e = np.full(x.shape, t_e)
 79 | 
 80 |         # missing keypoints mask
 81 |         mask = np.ma.masked_where(x <= 0, x)
 82 | 
 83 |         # The filtered derivative of the signal.
 84 |         a_d = smoothing_factor(t_e, self.d_cutoff)
 85 |         dx = (x - self.x_prev) / t_e
 86 |         dx_hat = exponential_smoothing(a_d, dx, self.dx_prev)
 87 | 
 88 |         # The filtered signal.
 89 |         cutoff = self.min_cutoff + self.beta * np.abs(dx_hat)
 90 |         a = smoothing_factor(t_e, cutoff)
 91 |         x_hat = exponential_smoothing(a, x, self.x_prev)
 92 | 
 93 |         # missing keypoints remove
 94 |         np.copyto(x_hat, -10, where=mask.mask)
 95 | 
 96 |         # Memorize the previous values.
 97 |         self.x_prev = x_hat
 98 |         self.dx_prev = dx_hat
 99 |         self.t_prev = t
100 |         self.mask_prev = mask
101 | 
102 |         return x_hat
103 | 


--------------------------------------------------------------------------------
/utils/post_processing/post_transforms.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Adapted from https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
  3 | # Original licence: Copyright (c) Microsoft, under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | import math
  7 | 
  8 | import cv2
  9 | import numpy as np
 10 | import torch
 11 | 
 12 | 
 13 | def fliplr_joints(joints_3d, joints_3d_visible, img_width, flip_pairs):
 14 |     """Flip human joints horizontally.
 15 | 
 16 |     Note:
 17 |         - num_keypoints: K
 18 | 
 19 |     Args:
 20 |         joints_3d (np.ndarray([K, 3])): Coordinates of keypoints.
 21 |         joints_3d_visible (np.ndarray([K, 1])): Visibility of keypoints.
 22 |         img_width (int): Image width.
 23 |         flip_pairs (list[tuple]): Pairs of keypoints which are mirrored
 24 |             (for example, left ear and right ear).
 25 | 
 26 |     Returns:
 27 |         tuple: Flipped human joints.
 28 | 
 29 |         - joints_3d_flipped (np.ndarray([K, 3])): Flipped joints.
 30 |         - joints_3d_visible_flipped (np.ndarray([K, 1])): Joint visibility.
 31 |     """
 32 | 
 33 |     assert len(joints_3d) == len(joints_3d_visible)
 34 |     assert img_width > 0
 35 | 
 36 |     joints_3d_flipped = joints_3d.copy()
 37 |     joints_3d_visible_flipped = joints_3d_visible.copy()
 38 | 
 39 |     # Swap left-right parts
 40 |     for left, right in flip_pairs:
 41 |         joints_3d_flipped[left, :] = joints_3d[right, :]
 42 |         joints_3d_flipped[right, :] = joints_3d[left, :]
 43 | 
 44 |         joints_3d_visible_flipped[left, :] = joints_3d_visible[right, :]
 45 |         joints_3d_visible_flipped[right, :] = joints_3d_visible[left, :]
 46 | 
 47 |     # Flip horizontally
 48 |     joints_3d_flipped[:, 0] = img_width - 1 - joints_3d_flipped[:, 0]
 49 |     joints_3d_flipped = joints_3d_flipped * joints_3d_visible_flipped
 50 | 
 51 |     return joints_3d_flipped, joints_3d_visible_flipped
 52 | 
 53 | 
 54 | def fliplr_regression(regression,
 55 |                       flip_pairs,
 56 |                       center_mode='static',
 57 |                       center_x=0.5,
 58 |                       center_index=0):
 59 |     """Flip human joints horizontally.
 60 | 
 61 |     Note:
 62 |         - batch_size: N
 63 |         - num_keypoint: K
 64 | 
 65 |     Args:
 66 |         regression (np.ndarray([..., K, C])): Coordinates of keypoints, where K
 67 |             is the joint number and C is the dimension. Example shapes are:
 68 | 
 69 |             - [N, K, C]: a batch of keypoints where N is the batch size.
 70 |             - [N, T, K, C]: a batch of pose sequences, where T is the frame
 71 |                 number.
 72 |         flip_pairs (list[tuple()]): Pairs of keypoints which are mirrored
 73 |             (for example, left ear -- right ear).
 74 |         center_mode (str): The mode to set the center location on the x-axis
 75 |             to flip around. Options are:
 76 | 
 77 |             - static: use a static x value (see center_x also)
 78 |             - root: use a root joint (see center_index also)
 79 |         center_x (float): Set the x-axis location of the flip center. Only used
 80 |             when center_mode=static.
 81 |         center_index (int): Set the index of the root joint, whose x location
 82 |             will be used as the flip center. Only used when center_mode=root.
 83 | 
 84 |     Returns:
 85 |         np.ndarray([..., K, C]): Flipped joints.
 86 |     """
 87 |     assert regression.ndim >= 2, f'Invalid pose shape {regression.shape}'
 88 | 
 89 |     allowed_center_mode = {'static', 'root'}
 90 |     assert center_mode in allowed_center_mode, 'Get invalid center_mode ' \
 91 |         f'{center_mode}, allowed choices are {allowed_center_mode}'
 92 | 
 93 |     if center_mode == 'static':
 94 |         x_c = center_x
 95 |     elif center_mode == 'root':
 96 |         assert regression.shape[-2] > center_index
 97 |         x_c = regression[..., center_index:center_index + 1, 0]
 98 | 
 99 |     regression_flipped = regression.copy()
100 |     # Swap left-right parts
101 |     for left, right in flip_pairs:
102 |         regression_flipped[..., left, :] = regression[..., right, :]
103 |         regression_flipped[..., right, :] = regression[..., left, :]
104 | 
105 |     # Flip horizontally
106 |     regression_flipped[..., 0] = x_c * 2 - regression_flipped[..., 0]
107 |     return regression_flipped
108 | 
109 | 
110 | def flip_back(output_flipped, flip_pairs, target_type='GaussianHeatmap'):
111 |     """Flip the flipped heatmaps back to the original form.
112 | 
113 |     Note:
114 |         - batch_size: N
115 |         - num_keypoints: K
116 |         - heatmap height: H
117 |         - heatmap width: W
118 | 
119 |     Args:
120 |         output_flipped (np.ndarray[N, K, H, W]): The output heatmaps obtained
121 |             from the flipped images.
122 |         flip_pairs (list[tuple()): Pairs of keypoints which are mirrored
123 |             (for example, left ear -- right ear).
124 |         target_type (str): GaussianHeatmap or CombinedTarget
125 | 
126 |     Returns:
127 |         np.ndarray: heatmaps that flipped back to the original image
128 |     """
129 |     assert output_flipped.ndim == 4, \
130 |         'output_flipped should be [batch_size, num_keypoints, height, width]'
131 |     shape_ori = output_flipped.shape
132 |     channels = 1
133 |     if target_type.lower() == 'CombinedTarget'.lower():
134 |         channels = 3
135 |         output_flipped[:, 1::3, ...] = -output_flipped[:, 1::3, ...]
136 |     output_flipped = output_flipped.reshape(shape_ori[0], -1, channels,
137 |                                             shape_ori[2], shape_ori[3])
138 |     output_flipped_back = output_flipped.copy()
139 | 
140 |     # Swap left-right parts
141 |     for left, right in flip_pairs:
142 |         output_flipped_back[:, left, ...] = output_flipped[:, right, ...]
143 |         output_flipped_back[:, right, ...] = output_flipped[:, left, ...]
144 |     output_flipped_back = output_flipped_back.reshape(shape_ori)
145 |     # Flip horizontally
146 |     output_flipped_back = output_flipped_back[..., ::-1]
147 |     return output_flipped_back
148 | 
149 | 
150 | def transform_preds(coords, center, scale, output_size, use_udp=False):
151 |     """Get final keypoint predictions from heatmaps and apply scaling and
152 |     translation to map them back to the image.
153 | 
154 |     Note:
155 |         num_keypoints: K
156 | 
157 |     Args:
158 |         coords (np.ndarray[K, ndims]):
159 | 
160 |             * If ndims=2, corrds are predicted keypoint location.
161 |             * If ndims=4, corrds are composed of (x, y, scores, tags)
162 |             * If ndims=5, corrds are composed of (x, y, scores, tags,
163 |               flipped_tags)
164 | 
165 |         center (np.ndarray[2, ]): Center of the bounding box (x, y).
166 |         scale (np.ndarray[2, ]): Scale of the bounding box
167 |             wrt [width, height].
168 |         output_size (np.ndarray[2, ] | list(2,)): Size of the
169 |             destination heatmaps.
170 |         use_udp (bool): Use unbiased data processing
171 | 
172 |     Returns:
173 |         np.ndarray: Predicted coordinates in the images.
174 |     """
175 |     assert coords.shape[1] in (2, 4, 5)
176 |     assert len(center) == 2
177 |     assert len(scale) == 2
178 |     assert len(output_size) == 2
179 | 
180 |     # Recover the scale which is normalized by a factor of 200.
181 |     # scale = scale * 200.0
182 | 
183 |     if use_udp:
184 |         scale_x = scale[0] / (output_size[0] - 1.0)
185 |         scale_y = scale[1] / (output_size[1] - 1.0)
186 |     else:
187 |         scale_x = scale[0] / output_size[0]
188 |         scale_y = scale[1] / output_size[1]
189 | 
190 |     target_coords = np.ones_like(coords)
191 |     target_coords[:, 0] = coords[:, 0] * scale_x + center[0] - scale[0] * 0.5
192 |     target_coords[:, 1] = coords[:, 1] * scale_y + center[1] - scale[1] * 0.5
193 | 
194 |     return target_coords
195 | 
196 | 
197 | def get_affine_transform(center,
198 |                          scale,
199 |                          rot,
200 |                          output_size,
201 |                          shift=(0., 0.),
202 |                          inv=False):
203 |     """Get the affine transform matrix, given the center/scale/rot/output_size.
204 | 
205 |     Args:
206 |         center (np.ndarray[2, ]): Center of the bounding box (x, y).
207 |         scale (np.ndarray[2, ]): Scale of the bounding box
208 |             wrt [width, height].
209 |         rot (float): Rotation angle (degree).
210 |         output_size (np.ndarray[2, ] | list(2,)): Size of the
211 |             destination heatmaps.
212 |         shift (0-100%): Shift translation ratio wrt the width/height.
213 |             Default (0., 0.).
214 |         inv (bool): Option to inverse the affine transform direction.
215 |             (inv=False: src->dst or inv=True: dst->src)
216 | 
217 |     Returns:
218 |         np.ndarray: The transform matrix.
219 |     """
220 |     assert len(center) == 2
221 |     assert len(scale) == 2
222 |     assert len(output_size) == 2
223 |     assert len(shift) == 2
224 | 
225 |     # pixel_std is 200.
226 |     scale_tmp = scale * 200.0
227 | 
228 |     shift = np.array(shift)
229 |     src_w = scale_tmp[0]
230 |     dst_w = output_size[0]
231 |     dst_h = output_size[1]
232 | 
233 |     rot_rad = np.pi * rot / 180
234 |     src_dir = rotate_point([0., src_w * -0.5], rot_rad)
235 |     dst_dir = np.array([0., dst_w * -0.5])
236 | 
237 |     src = np.zeros((3, 2), dtype=np.float32)
238 |     src[0, :] = center + scale_tmp * shift
239 |     src[1, :] = center + src_dir + scale_tmp * shift
240 |     src[2, :] = _get_3rd_point(src[0, :], src[1, :])
241 | 
242 |     dst = np.zeros((3, 2), dtype=np.float32)
243 |     dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
244 |     dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
245 |     dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
246 | 
247 |     if inv:
248 |         trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
249 |     else:
250 |         trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
251 | 
252 |     return trans
253 | 
254 | 
255 | def affine_transform(pt, trans_mat):
256 |     """Apply an affine transformation to the points.
257 | 
258 |     Args:
259 |         pt (np.ndarray): a 2 dimensional point to be transformed
260 |         trans_mat (np.ndarray): 2x3 matrix of an affine transform
261 | 
262 |     Returns:
263 |         np.ndarray: Transformed points.
264 |     """
265 |     assert len(pt) == 2
266 |     new_pt = np.array(trans_mat) @ np.array([pt[0], pt[1], 1.])
267 | 
268 |     return new_pt
269 | 
270 | 
271 | def _get_3rd_point(a, b):
272 |     """To calculate the affine matrix, three pairs of points are required. This
273 |     function is used to get the 3rd point, given 2D points a & b.
274 | 
275 |     The 3rd point is defined by rotating vector `a - b` by 90 degrees
276 |     anticlockwise, using b as the rotation center.
277 | 
278 |     Args:
279 |         a (np.ndarray): point(x,y)
280 |         b (np.ndarray): point(x,y)
281 | 
282 |     Returns:
283 |         np.ndarray: The 3rd point.
284 |     """
285 |     assert len(a) == 2
286 |     assert len(b) == 2
287 |     direction = a - b
288 |     third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32)
289 | 
290 |     return third_pt
291 | 
292 | 
293 | def rotate_point(pt, angle_rad):
294 |     """Rotate a point by an angle.
295 | 
296 |     Args:
297 |         pt (list[float]): 2 dimensional point to be rotated
298 |         angle_rad (float): rotation angle by radian
299 | 
300 |     Returns:
301 |         list[float]: Rotated point.
302 |     """
303 |     assert len(pt) == 2
304 |     sn, cs = np.sin(angle_rad), np.cos(angle_rad)
305 |     new_x = pt[0] * cs - pt[1] * sn
306 |     new_y = pt[0] * sn + pt[1] * cs
307 |     rotated_pt = [new_x, new_y]
308 | 
309 |     return rotated_pt
310 | 
311 | 
312 | def get_warp_matrix(theta, size_input, size_dst, size_target):
313 |     """Calculate the transformation matrix under the constraint of unbiased.
314 |     Paper ref: Huang et al. The Devil is in the Details: Delving into Unbiased
315 |     Data Processing for Human Pose Estimation (CVPR 2020).
316 | 
317 |     Args:
318 |         theta (float): Rotation angle in degrees.
319 |         size_input (np.ndarray): Size of input image [w, h].
320 |         size_dst (np.ndarray): Size of output image [w, h].
321 |         size_target (np.ndarray): Size of ROI in input plane [w, h].
322 | 
323 |     Returns:
324 |         np.ndarray: A matrix for transformation.
325 |     """
326 |     theta = np.deg2rad(theta)
327 |     matrix = np.zeros((2, 3), dtype=np.float32)
328 |     scale_x = size_dst[0] / size_target[0]
329 |     scale_y = size_dst[1] / size_target[1]
330 |     matrix[0, 0] = math.cos(theta) * scale_x
331 |     matrix[0, 1] = -math.sin(theta) * scale_x
332 |     matrix[0, 2] = scale_x * (-0.5 * size_input[0] * math.cos(theta) +
333 |                               0.5 * size_input[1] * math.sin(theta) +
334 |                               0.5 * size_target[0])
335 |     matrix[1, 0] = math.sin(theta) * scale_y
336 |     matrix[1, 1] = math.cos(theta) * scale_y
337 |     matrix[1, 2] = scale_y * (-0.5 * size_input[0] * math.sin(theta) -
338 |                               0.5 * size_input[1] * math.cos(theta) +
339 |                               0.5 * size_target[1])
340 |     return matrix
341 | 
342 | 
343 | def warp_affine_joints(joints, mat):
344 |     """Apply affine transformation defined by the transform matrix on the
345 |     joints.
346 | 
347 |     Args:
348 |         joints (np.ndarray[..., 2]): Origin coordinate of joints.
349 |         mat (np.ndarray[3, 2]): The affine matrix.
350 | 
351 |     Returns:
352 |         np.ndarray[..., 2]: Result coordinate of joints.
353 |     """
354 |     joints = np.array(joints)
355 |     shape = joints.shape
356 |     joints = joints.reshape(-1, 2)
357 |     return np.dot(
358 |         np.concatenate((joints, joints[:, 0:1] * 0 + 1), axis=1),
359 |         mat.T).reshape(shape)
360 | 
361 | 
362 | def affine_transform_torch(pts, t):
363 |     npts = pts.shape[0]
364 |     pts_homo = torch.cat([pts, torch.ones(npts, 1, device=pts.device)], dim=1)
365 |     out = torch.mm(t, torch.t(pts_homo))
366 |     return torch.t(out[:2, :])
367 | 


--------------------------------------------------------------------------------
/utils/train_valid_fn.py:
--------------------------------------------------------------------------------
  1 | import os.path as osp
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | from models.losses import JointsMSELoss
  7 | from models.optimizer import LayerDecayOptimizer
  8 | 
  9 | from torch.nn.parallel import DataParallel, DistributedDataParallel
 10 | from torch.nn.utils import clip_grad_norm_
 11 | from torch.optim import AdamW
 12 | from torch.optim.lr_scheduler import LambdaLR, MultiStepLR
 13 | from torch.utils.data import DataLoader, Dataset
 14 | from torch.utils.data.distributed import DistributedSampler
 15 | from torch.cuda.amp import autocast, GradScaler
 16 | from tqdm import tqdm
 17 | from time import time
 18 | 
 19 | from utils.dist_util import get_dist_info, init_dist
 20 | from utils.logging import get_root_logger
 21 | 
 22 | @torch.no_grad()
 23 | def valid_model(model: nn.Module, dataloaders: DataLoader, criterion: nn.Module, cfg: dict) -> None:
 24 |     total_loss = 0
 25 |     total_metric = 0
 26 |     model.eval()
 27 |     for dataloader in dataloaders:
 28 |         for batch_idx, batch in enumerate(dataloader):
 29 |             images, targets, target_weights, __ = batch
 30 |             images = images.to('cuda')
 31 |             targets = targets.to('cuda')
 32 |             target_weights = target_weights.to('cuda')
 33 |             
 34 |             outputs = model(images)
 35 |             loss = criterion(outputs, targets, target_weights)
 36 |             total_loss += loss.item()
 37 |             
 38 |     avg_loss = total_loss/(len(dataloader)*len(dataloaders))
 39 |     return avg_loss
 40 |  
 41 | def train_model(model: nn.Module, datasets_train: Dataset, datasets_valid: Dataset, cfg: dict, distributed: bool, validate: bool,  timestamp: str, meta: dict) -> None:
 42 |     logger = get_root_logger()
 43 |     
 44 |     # Prepare data loaders
 45 |     datasets_train = datasets_train if isinstance(datasets_train, (list, tuple)) else [datasets_train]
 46 |     datasets_valid = datasets_valid if isinstance(datasets_valid, (list, tuple)) else [datasets_valid]
 47 |     
 48 |     if distributed:
 49 |         samplers_train = [DistributedSampler(ds, num_replicas=len(cfg.gpu_ids), rank=torch.cuda.current_device(), shuffle=True, drop_last=False) for ds in datasets_train]
 50 |         samplers_valid = [DistributedSampler(ds, num_replicas=len(cfg.gpu_ids), rank=torch.cuda.current_device(), shuffle=False, drop_last=False) for ds in datasets_valid]
 51 |     else:
 52 |         samplers_train = [None for ds in datasets_train]
 53 |         samplers_valid = [None for ds in datasets_valid]
 54 |     
 55 |     dataloaders_train = [DataLoader(ds, batch_size=cfg.data['samples_per_gpu'], shuffle=True, sampler=sampler, num_workers=cfg.data['workers_per_gpu'], pin_memory=False) for ds, sampler in zip(datasets_train, samplers_train)]
 56 |     dataloaders_valid = [DataLoader(ds, batch_size=cfg.data['samples_per_gpu'], shuffle=False, sampler=sampler, num_workers=cfg.data['workers_per_gpu'], pin_memory=False) for ds, sampler in zip(datasets_valid, samplers_valid)]
 57 | 
 58 |     # put model on gpus
 59 |     if distributed:
 60 |         find_unused_parameters = cfg.get('find_unused_parameters', False)
 61 |         # Sets the `find_unused_parameters` parameter in
 62 |         # torch.nn.parallel.DistributedDataParallel
 63 | 
 64 |         model = DistributedDataParallel(
 65 |             module=model, 
 66 |             device_ids=[torch.cuda.current_device()], 
 67 |             broadcast_buffers=False, 
 68 |             find_unused_parameters=find_unused_parameters)
 69 |     else:
 70 |         model = DataParallel(model, device_ids=cfg.gpu_ids)
 71 |     
 72 |     # Loss function
 73 |     criterion = JointsMSELoss(use_target_weight=cfg.model['keypoint_head']['loss_keypoint']['use_target_weight'])
 74 |     
 75 |     # Optimizer
 76 |     optimizer = AdamW(model.parameters(), lr=cfg.optimizer['lr'], betas=cfg.optimizer['betas'], weight_decay=cfg.optimizer['weight_decay'])
 77 |     
 78 |     # Layer-wise learning rate decay
 79 |     lr_mult = [cfg.optimizer['paramwise_cfg']['layer_decay_rate']] * cfg.optimizer['paramwise_cfg']['num_layers']
 80 |     layerwise_optimizer = LayerDecayOptimizer(optimizer, lr_mult)
 81 |     
 82 |     
 83 |     # Learning rate scheduler (MultiStepLR)
 84 |     milestones = cfg.lr_config['step']
 85 |     gamma = 0.1
 86 |     scheduler = MultiStepLR(optimizer, milestones, gamma)
 87 | 
 88 |     # Warm-up scheduler
 89 |     num_warmup_steps = cfg.lr_config['warmup_iters']  # Number of warm-up steps
 90 |     warmup_factor = cfg.lr_config['warmup_ratio']  # Initial learning rate = warmup_factor * learning_rate
 91 |     warmup_scheduler = LambdaLR(
 92 |         optimizer,
 93 |         lr_lambda=lambda step: warmup_factor + (1.0 - warmup_factor) * step / num_warmup_steps
 94 |     )
 95 |     
 96 |     # AMP setting
 97 |     if cfg.use_amp:
 98 |         logger.info("Using Automatic Mixed Precision (AMP) training...")
 99 |         # Create a GradScaler object for FP16 training
100 |         scaler = GradScaler()
101 |     
102 |     # Logging config
103 |     total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
104 |     logger.info(f'''\n
105 |     #========= [Train Configs] =========#
106 |     # - Num GPUs: {len(cfg.gpu_ids)}
107 |     # - Batch size (per gpu): {cfg.data['samples_per_gpu']}
108 |     # - LR: {cfg.optimizer['lr']: .6f}
109 |     # - Num params: {total_params:,d}
110 |     # - AMP: {cfg.use_amp}
111 |     #===================================# 
112 |     ''')
113 |     
114 |     global_step = 0
115 |     for dataloader in dataloaders_train:
116 |         for epoch in range(cfg.total_epochs):
117 |             model.train()
118 |             train_pbar = tqdm(dataloader)
119 |             total_loss = 0
120 |             tic = time()
121 |             for batch_idx, batch in enumerate(train_pbar):
122 |                 layerwise_optimizer.zero_grad()
123 |                     
124 |                 images, targets, target_weights, __ = batch
125 |                 images = images.to('cuda')
126 |                 targets = targets.to('cuda')
127 |                 target_weights = target_weights.to('cuda')
128 |                 
129 |                 if cfg.use_amp:
130 |                     with autocast():
131 |                         outputs = model(images)
132 |                         loss = criterion(outputs, targets, target_weights)
133 |                     scaler.scale(loss).backward()
134 |                     clip_grad_norm_(model.parameters(), **cfg.optimizer_config['grad_clip'])
135 |                     scaler.step(layerwise_optimizer)
136 |                     scaler.update()
137 |                 else:
138 |                     outputs = model(images)
139 |                     loss = criterion(outputs, targets, target_weights)
140 |                     loss.backward()
141 |                     clip_grad_norm_(model.parameters(), **cfg.optimizer_config['grad_clip'])
142 |                     layerwise_optimizer.step()
143 |                 
144 |                 if global_step < num_warmup_steps:
145 |                     warmup_scheduler.step()
146 |                 global_step += 1
147 |                 
148 |                 total_loss += loss.item()
149 |                 train_pbar.set_description(f"🏋️> Epoch [{str(epoch).zfill(3)}/{str(cfg.total_epochs).zfill(3)}] | Loss {loss.item():.4f} | LR {optimizer.param_groups[0]['lr']:.6f} | Step")
150 |             scheduler.step()
151 |             
152 |             avg_loss_train = total_loss/len(dataloader)
153 |             logger.info(f"[Summary-train] Epoch [{str(epoch).zfill(3)}/{str(cfg.total_epochs).zfill(3)}] | Average Loss (train) {avg_loss_train:.4f} --- {time()-tic:.5f} sec. elapsed")
154 |             ckpt_name = f"epoch{str(epoch).zfill(3)}.pth"
155 |             ckpt_path = osp.join(cfg.work_dir, ckpt_name)
156 |             torch.save(model.module.state_dict(), ckpt_path)
157 | 
158 |             # validation
159 |             if validate:
160 |                 tic2 = time()
161 |                 avg_loss_valid = valid_model(model, dataloaders_valid, criterion, cfg)
162 |                 logger.info(f"[Summary-valid] Epoch [{str(epoch).zfill(3)}/{str(cfg.total_epochs).zfill(3)}] | Average Loss (valid) {avg_loss_valid:.4f} --- {time()-tic2:.5f} sec. elapsed")
163 | 


--------------------------------------------------------------------------------
/utils/transform.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import cv2
 3 | import munkres
 4 | import numpy as np
 5 | import torch
 6 | 
 7 | 
 8 | # solution proposed in https://github.com/pytorch/pytorch/issues/229#issuecomment-299424875 
 9 | def flip_tensor(tensor, dim=0):
10 |     """
11 |     flip the tensor on the dimension dim
12 |     """
13 |     inv_idx = torch.arange(tensor.shape[dim] - 1, -1, -1).to(tensor.device)
14 |     return tensor.index_select(dim, inv_idx)
15 | 
16 | 
17 | #
18 | # derived from https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
19 | def flip_back(output_flipped, matched_parts):
20 |     assert len(output_flipped.shape) == 4, 'output_flipped has to be [batch_size, num_joints, height, width]'
21 | 
22 |     output_flipped = flip_tensor(output_flipped, dim=-1)
23 | 
24 |     for pair in matched_parts:
25 |         tmp = output_flipped[:, pair[0]].clone()
26 |         output_flipped[:, pair[0]] = output_flipped[:, pair[1]]
27 |         output_flipped[:, pair[1]] = tmp
28 | 
29 |     return output_flipped
30 | 
31 | 
32 | def fliplr_joints(joints, joints_vis, width, matched_parts):
33 |     # Flip horizontal
34 |     joints[:, 0] = width - joints[:, 0] - 1
35 | 
36 |     # Change left-right parts
37 |     for pair in matched_parts:
38 |         joints[pair[0], :], joints[pair[1], :] = \
39 |             joints[pair[1], :], joints[pair[0], :].copy()
40 |         joints_vis[pair[0], :], joints_vis[pair[1], :] = \
41 |             joints_vis[pair[1], :], joints_vis[pair[0], :].copy()
42 | 
43 |     return joints * joints_vis, joints_vis
44 | 
45 | 
46 | def get_affine_transform(center, scale, pixel_std, rot, output_size, shift=np.array([0, 0], dtype=np.float32), inv=0):
47 |     if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
48 |         print(scale)
49 |         scale = np.array([scale, scale])
50 | 
51 |     scale_tmp = scale * 1.0 * pixel_std  # It was scale_tmp = scale * 200.0
52 |     src_w = scale_tmp[0]
53 |     dst_w = output_size[0]
54 |     dst_h = output_size[1]
55 | 
56 |     rot_rad = np.pi * rot / 180
57 |     src_dir = get_dir([0, src_w * -0.5], rot_rad)
58 |     dst_dir = np.array([0, dst_w * -0.5], np.float32)
59 | 
60 |     src = np.zeros((3, 2), dtype=np.float32)
61 |     dst = np.zeros((3, 2), dtype=np.float32)
62 |     src[0, :] = center + scale_tmp * shift
63 |     src[1, :] = center + src_dir + scale_tmp * shift
64 |     dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
65 |     dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
66 | 
67 |     src[2:, :] = get_3rd_point(src[0, :], src[1, :])
68 |     dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
69 | 
70 |     if inv:
71 |         trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
72 |     else:
73 |         trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
74 | 
75 |     return trans
76 | 
77 | 
78 | def affine_transform(pt, t):
79 |     new_pt = np.array([pt[0], pt[1], 1.]).T
80 |     new_pt = np.dot(t, new_pt)
81 |     return new_pt[:2]
82 | 
83 | 
84 | def get_3rd_point(a, b):
85 |     direct = a - b
86 |     return b + np.array([-direct[1], direct[0]], dtype=np.float32)
87 | 
88 | 
89 | def get_dir(src_point, rot_rad):
90 |     sn, cs = np.sin(rot_rad), np.cos(rot_rad)
91 | 
92 |     src_result = [0, 0]
93 |     src_result[0] = src_point[0] * cs - src_point[1] * sn
94 |     src_result[1] = src_point[0] * sn + src_point[1] * cs
95 | 
96 |     return src_result


--------------------------------------------------------------------------------
/utils/util.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import warnings
  3 | import random
  4 | import numpy as np
  5 | 
  6 | from collections import OrderedDict
  7 | import os.path as osp
  8 | 
  9 | import torch
 10 | import torch.nn as nn
 11 | 
 12 | from torch import distributed as dist
 13 | from torch.nn.parallel import DataParallel, DistributedDataParallel
 14 | 
 15 | from .dist_util import get_dist_info
 16 | 
 17 | MODULE_WRAPPERS = [DataParallel, DistributedDataParallel]
 18 | 
 19 | 
 20 | def init_random_seed(seed=None, device='cuda'):
 21 |     """Initialize random seed.
 22 | 
 23 |     If the seed is not set, the seed will be automatically randomized,
 24 |     and then broadcast to all processes to prevent some potential bugs.
 25 | 
 26 |     Args:
 27 |         seed (int, Optional): The seed. Default to None.
 28 |         device (str): The device where the seed will be put on.
 29 |             Default to 'cuda'.
 30 | 
 31 |     Returns:
 32 |         int: Seed to be used.
 33 |     """
 34 |     if seed is not None:
 35 |         return seed
 36 | 
 37 |     # Make sure all ranks share the same random seed to prevent
 38 |     # some potential bugs. Please refer to
 39 |     # https://github.com/open-mmlab/mmdetection/issues/6339
 40 |     rank, world_size = get_dist_info()
 41 |     seed = np.random.randint(2**31)
 42 |     if world_size == 1:
 43 |         return seed
 44 | 
 45 |     if rank == 0:
 46 |         random_num = torch.tensor(seed, dtype=torch.int32, device=device)
 47 |     else:
 48 |         random_num = torch.tensor(0, dtype=torch.int32, device=device)
 49 |     dist.broadcast(random_num, src=0)
 50 |     return random_num.item()
 51 | 
 52 | 
 53 | def set_random_seed(seed: int,
 54 |                     deterministic: bool = False,
 55 |                     use_rank_shift: bool = False) -> None:
 56 |     """Set random seed.
 57 | 
 58 |     Args:
 59 |         seed (int): Seed to be used.
 60 |         deterministic (bool): Whether to set the deterministic option for
 61 |             CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
 62 |             to True and `torch.backends.cudnn.benchmark` to False.
 63 |             Default: False.
 64 |         rank_shift (bool): Whether to add rank number to the random seed to
 65 |             have different random seed in different threads. Default: False.
 66 |     """
 67 |     if use_rank_shift:
 68 |         rank, _ = get_dist_info()
 69 |         seed += rank
 70 |     random.seed(seed)
 71 |     np.random.seed(seed)
 72 |     torch.manual_seed(seed)
 73 |     torch.cuda.manual_seed(seed)
 74 |     torch.cuda.manual_seed_all(seed)
 75 |     os.environ['PYTHONHASHSEED'] = str(seed)
 76 |     if deterministic:
 77 |         torch.backends.cudnn.deterministic = True
 78 |         torch.backends.cudnn.benchmark = False
 79 | 
 80 | def is_module_wrapper(module: nn.Module) -> bool:
 81 |     """ Check if module wrrapper exists recursively """
 82 |     def is_module_in_wrapper(module, module_wrapper):
 83 |         module_wrappers = tuple(module_wrapper.module_dict.values())
 84 |         if isinstance(module, module_wrappers):
 85 |             return True
 86 |         for child in module_wrapper.children.values():
 87 |             if is_module_in_wrapper(module, child):
 88 |                 return True
 89 |     return is_module_in_wrapper(module, MODULE_WRAPPERS)
 90 | 
 91 | 
 92 | def load_state_dict(module, state_dict, strict=False, logger=None):
 93 |     """Load state_dict to a module.
 94 | 
 95 |     This method is modified from :meth:`torch.nn.Module.load_state_dict`.
 96 |     Default value for ``strict`` is set to ``False`` and the message for
 97 |     param mismatch will be shown even if strict is False.
 98 | 
 99 |     Args:
100 |         module (Module): Module that receives the state_dict.
101 |         state_dict (OrderedDict): Weights.
102 |         strict (bool): whether to strictly enforce that the keys
103 |             in :attr:`state_dict` match the keys returned by this module's
104 |             :meth:`~torch.nn.Module.state_dict` function. Default: ``False``.
105 |         logger (:obj:`logging.Logger`, optional): Logger to log the error
106 |             message. If not specified, print function will be used.
107 |     """
108 |     unexpected_keys = []
109 |     all_missing_keys = []
110 |     err_msg = []
111 | 
112 |     metadata = getattr(state_dict, '_metadata', None)
113 |     state_dict = state_dict.copy()
114 |     if metadata is not None:
115 |         state_dict._metadata = metadata
116 | 
117 |     # use _load_from_state_dict to enable checkpoint version control
118 |     def load(module, prefix=''):
119 |         # recursively check parallel module in case that the model has a
120 |         # complicated structure, e.g., nn.Module(nn.Module(DDP))
121 |         if is_module_wrapper(module):
122 |             module = module.module
123 |         local_metadata = {} if metadata is None else metadata.get(
124 |             prefix[:-1], {})
125 |         module._load_from_state_dict(state_dict, prefix, local_metadata, True,
126 |                                      all_missing_keys, unexpected_keys,
127 |                                      err_msg)
128 |         for name, child in module._modules.items():
129 |             if child is not None:
130 |                 load(child, prefix + name + '.')
131 | 
132 |     load(module)
133 |     load = None  # break load->load reference cycle
134 | 
135 |     # ignore "num_batches_tracked" of BN layers
136 |     missing_keys = [
137 |         key for key in all_missing_keys if 'num_batches_tracked' not in key
138 |     ]
139 | 
140 |     if unexpected_keys:
141 |         err_msg.append('unexpected key in source '
142 |                        f'state_dict: {", ".join(unexpected_keys)}\n')
143 |     if missing_keys:
144 |         err_msg.append(
145 |             f'missing keys in source state_dict: {", ".join(missing_keys)}\n')
146 | 
147 |     rank, _ = get_dist_info()
148 |     if len(err_msg) > 0 and rank == 0:
149 |         err_msg.insert(
150 |             0, 'The model and loaded state dict do not match exactly\n')
151 |         err_msg = '\n'.join(err_msg)
152 |         if strict:
153 |             raise RuntimeError(err_msg)
154 |         elif logger is not None:
155 |             logger.warning(err_msg)
156 |         else:
157 |             print(err_msg)
158 | 
159 | 
160 | def load_checkpoint(model,
161 |                     filename,
162 |                     map_location='cpu',
163 |                     strict=False,
164 |                     logger=None):
165 |     """Load checkpoint from a file or URI.
166 | 
167 |     Args:
168 |         model (Module): Module to load checkpoint.
169 |         filename (str): Accept local filepath, URL, ``torchvision://xxx``,
170 |             ``open-mmlab://xxx``.
171 |         map_location (str): Same as :func:`torch.load`.
172 |         strict (bool): Whether to allow different params for the model and
173 |             checkpoint.
174 |         logger (:mod:`logging.Logger` or None): The logger for error message.
175 | 
176 |     Returns:
177 |         dict or OrderedDict: The loaded checkpoint.
178 |     """
179 |     checkpoint = torch.load(filename, map_location=map_location)
180 |     # OrderedDict is a subclass of dict
181 |     if not isinstance(checkpoint, dict):
182 |         raise RuntimeError(
183 |             f'No state_dict found in checkpoint file {filename}')
184 |     # get state_dict from checkpoint
185 |     if 'state_dict' in checkpoint:
186 |         state_dict_tmp = checkpoint['state_dict']
187 |     else:
188 |         state_dict_tmp = checkpoint
189 | 
190 |     state_dict = OrderedDict()
191 |     # strip prefix of state_dict
192 |     for k, v in state_dict_tmp.items():
193 |         if k.startswith('module.backbone.'):
194 |             state_dict[k[16:]] = v
195 |         elif k.startswith('module.'):
196 |             state_dict[k[7:]] = v
197 |         elif k.startswith('backbone.'):
198 |             state_dict[k[9:]] = v
199 |         else:
200 |             state_dict[k] = v
201 |     # load state_dict
202 |     load_state_dict(model, state_dict, strict, logger)
203 |     return checkpoint
204 | 
205 | 
206 | def resize(input,
207 |            size=None,
208 |            scale_factor=None,
209 |            mode='nearest',
210 |            align_corners=None,
211 |            warning=True):
212 |     if warning:
213 |         if size is not None and align_corners:
214 |             input_h, input_w = tuple(int(x) for x in input.shape[2:])
215 |             output_h, output_w = tuple(int(x) for x in size)
216 |             if output_h > input_h or output_w > output_h:
217 |                 if ((output_h > 1 and output_w > 1 and input_h > 1
218 |                      and input_w > 1) and (output_h - 1) % (input_h - 1)
219 |                         and (output_w - 1) % (input_w - 1)):
220 |                     warnings.warn(
221 |                         f'When align_corners={align_corners}, '
222 |                         'the output would more aligned if '
223 |                         f'input size {(input_h, input_w)} is `x+1` and '
224 |                         f'out size {(output_h, output_w)} is `nx+1`')
225 |     if isinstance(size, torch.Size):
226 |         size = tuple(int(x) for x in size)
227 |         
228 | def constant_init(module: nn.Module, val: float, bias: float = 0) -> None:
229 |     if hasattr(module, 'weight') and module.weight is not None:
230 |         nn.init.constant_(module.weight, val)
231 |     if hasattr(module, 'bias') and module.bias is not None:
232 |         nn.init.constant_(module.bias, bias)
233 | 
234 | def normal_init(module: nn.Module,
235 |                 mean: float = 0,
236 |                 std: float = 1,
237 |                 bias: float = 0) -> None:
238 |     if hasattr(module, 'weight') and module.weight is not None:
239 |         nn.init.normal_(module.weight, mean, std)
240 |     if hasattr(module, 'bias') and module.bias is not None:
241 |         nn.init.constant_(module.bias, bias)


--------------------------------------------------------------------------------
/utils/visualization.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import matplotlib.pyplot as plt
  3 | import numpy as np
  4 | import torch
  5 | import torchvision
  6 | import ffmpeg
  7 | 
  8 | 
  9 | __all__ = ["joints_dict", "draw_points_and_skeleton"]
 10 | 
 11 | 
 12 | def joints_dict():
 13 |     joints = {
 14 |         "coco": {
 15 |             "keypoints": {
 16 |                 0: "nose",
 17 |                 1: "left_eye",
 18 |                 2: "right_eye",
 19 |                 3: "left_ear",
 20 |                 4: "right_ear",
 21 |                 5: "left_shoulder",
 22 |                 6: "right_shoulder",
 23 |                 7: "left_elbow",
 24 |                 8: "right_elbow",
 25 |                 9: "left_wrist",
 26 |                 10: "right_wrist",
 27 |                 11: "left_hip",
 28 |                 12: "right_hip",
 29 |                 13: "left_knee",
 30 |                 14: "right_knee",
 31 |                 15: "left_ankle",
 32 |                 16: "right_ankle"
 33 |             },
 34 |             "skeleton": [
 35 |                 # # [16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8],
 36 |                 # # [7, 9], [8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]
 37 |                 # [15, 13], [13, 11], [16, 14], [14, 12], [11, 12], [5, 11], [6, 12], [5, 6], [5, 7],
 38 |                 # [6, 8], [7, 9], [8, 10], [1, 2], [0, 1], [0, 2], [1, 3], [2, 4], [3, 5], [4, 6]
 39 |                 [15, 13], [13, 11], [16, 14], [14, 12], [11, 12], [5, 11], [6, 12], [5, 6], [5, 7],
 40 |                 [6, 8], [7, 9], [8, 10], [1, 2], [0, 1], [0, 2], [1, 3], [2, 4],  # [3, 5], [4, 6]
 41 |                 [0, 5], [0, 6]
 42 |             ]
 43 |         },
 44 |         "mpii": {
 45 |             "keypoints": {
 46 |                 0: "right_ankle",
 47 |                 1: "right_knee",
 48 |                 2: "right_hip",
 49 |                 3: "left_hip",
 50 |                 4: "left_knee",
 51 |                 5: "left_ankle",
 52 |                 6: "pelvis",
 53 |                 7: "thorax",
 54 |                 8: "upper_neck",
 55 |                 9: "head top",
 56 |                 10: "right_wrist",
 57 |                 11: "right_elbow",
 58 |                 12: "right_shoulder",
 59 |                 13: "left_shoulder",
 60 |                 14: "left_elbow",
 61 |                 15: "left_wrist"
 62 |             },
 63 |             "skeleton": [
 64 |                 # [5, 4], [4, 3], [0, 1], [1, 2], [3, 2], [13, 3], [12, 2], [13, 12], [13, 14],
 65 |                 # [12, 11], [14, 15], [11, 10], # [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]
 66 |                 [5, 4], [4, 3], [0, 1], [1, 2], [3, 2], [3, 6], [2, 6], [6, 7], [7, 8], [8, 9],
 67 |                 [13, 7], [12, 7], [13, 14], [12, 11], [14, 15], [11, 10],
 68 |             ]
 69 |         },
 70 |     }
 71 |     return joints
 72 | 
 73 | 
 74 | def draw_points(image, points, color_palette='tab20', palette_samples=16, confidence_threshold=0.5):
 75 |     """
 76 |     Draws `points` on `image`.
 77 | 
 78 |     Args:
 79 |         image: image in opencv format
 80 |         points: list of points to be drawn.
 81 |             Shape: (nof_points, 3)
 82 |             Format: each point should contain (y, x, confidence)
 83 |         color_palette: name of a matplotlib color palette
 84 |             Default: 'tab20'
 85 |         palette_samples: number of different colors sampled from the `color_palette`
 86 |             Default: 16
 87 |         confidence_threshold: only points with a confidence higher than this threshold will be drawn. Range: [0, 1]
 88 |             Default: 0.5
 89 | 
 90 |     Returns:
 91 |         A new image with overlaid points
 92 | 
 93 |     """
 94 |     try:
 95 |         colors = np.round(
 96 |             np.array(plt.get_cmap(color_palette).colors) * 255
 97 |         ).astype(np.uint8)[:, ::-1].tolist()
 98 |     except AttributeError:  # if palette has not pre-defined colors
 99 |         colors = np.round(
100 |             np.array(plt.get_cmap(color_palette)(np.linspace(0, 1, palette_samples))) * 255
101 |         ).astype(np.uint8)[:, -2::-1].tolist()
102 | 
103 |     circle_size = max(1, min(image.shape[:2]) // 150)  # ToDo Shape it taking into account the size of the detection
104 |     # circle_size = max(2, int(np.sqrt(np.max(np.max(points, axis=0) - np.min(points, axis=0)) // 16)))
105 | 
106 |     for i, pt in enumerate(points):
107 |         if pt[2] > confidence_threshold:
108 |             image = cv2.circle(image, (int(pt[1]), int(pt[0])), circle_size, tuple(colors[i % len(colors)]), -1)
109 | 
110 |     return image
111 | 
112 | 
113 | def draw_skeleton(image, points, skeleton, color_palette='Set2', palette_samples=8, person_index=0,
114 |                   confidence_threshold=0.5):
115 |     """
116 |     Draws a `skeleton` on `image`.
117 | 
118 |     Args:
119 |         image: image in opencv format
120 |         points: list of points to be drawn.
121 |             Shape: (nof_points, 3)
122 |             Format: each point should contain (y, x, confidence)
123 |         skeleton: list of joints to be drawn
124 |             Shape: (nof_joints, 2)
125 |             Format: each joint should contain (point_a, point_b) where `point_a` and `point_b` are an index in `points`
126 |         color_palette: name of a matplotlib color palette
127 |             Default: 'Set2'
128 |         palette_samples: number of different colors sampled from the `color_palette`
129 |             Default: 8
130 |         person_index: index of the person in `image`
131 |             Default: 0
132 |         confidence_threshold: only points with a confidence higher than this threshold will be drawn. Range: [0, 1]
133 |             Default: 0.5
134 | 
135 |     Returns:
136 |         A new image with overlaid joints
137 | 
138 |     """
139 |     try:
140 |         colors = np.round(
141 |             np.array(plt.get_cmap(color_palette).colors) * 255
142 |         ).astype(np.uint8)[:, ::-1].tolist()
143 |     except AttributeError:  # if palette has not pre-defined colors
144 |         colors = np.round(
145 |             np.array(plt.get_cmap(color_palette)(np.linspace(0, 1, palette_samples))) * 255
146 |         ).astype(np.uint8)[:, -2::-1].tolist()
147 | 
148 |     for i, joint in enumerate(skeleton):
149 |         pt1, pt2 = points[joint]
150 |         if pt1[2] > confidence_threshold and pt2[2] > confidence_threshold:
151 |             image = cv2.line(
152 |                 image, (int(pt1[1]), int(pt1[0])), (int(pt2[1]), int(pt2[0])),
153 |                 tuple(colors[person_index % len(colors)]), 2
154 |             )
155 | 
156 |     return image
157 | 
158 | 
159 | def draw_points_and_skeleton(image, points, skeleton, points_color_palette='tab20', points_palette_samples=16,
160 |                              skeleton_color_palette='Set2', skeleton_palette_samples=8, person_index=0,
161 |                              confidence_threshold=0.5):
162 |     """
163 |     Draws `points` and `skeleton` on `image`.
164 | 
165 |     Args:
166 |         image: image in opencv format
167 |         points: list of points to be drawn.
168 |             Shape: (nof_points, 3)
169 |             Format: each point should contain (y, x, confidence)
170 |         skeleton: list of joints to be drawn
171 |             Shape: (nof_joints, 2)
172 |             Format: each joint should contain (point_a, point_b) where `point_a` and `point_b` are an index in `points`
173 |         points_color_palette: name of a matplotlib color palette
174 |             Default: 'tab20'
175 |         points_palette_samples: number of different colors sampled from the `color_palette`
176 |             Default: 16
177 |         skeleton_color_palette: name of a matplotlib color palette
178 |             Default: 'Set2'
179 |         skeleton_palette_samples: number of different colors sampled from the `color_palette`
180 |             Default: 8
181 |         person_index: index of the person in `image`
182 |             Default: 0
183 |         confidence_threshold: only points with a confidence higher than this threshold will be drawn. Range: [0, 1]
184 |             Default: 0.5
185 | 
186 |     Returns:
187 |         A new image with overlaid joints
188 | 
189 |     """
190 |     image = draw_skeleton(image, points, skeleton, color_palette=skeleton_color_palette,
191 |                           palette_samples=skeleton_palette_samples, person_index=person_index,
192 |                           confidence_threshold=confidence_threshold)
193 |     image = draw_points(image, points, color_palette=points_color_palette, palette_samples=points_palette_samples,
194 |                         confidence_threshold=confidence_threshold)
195 |     return image
196 | 
197 | 
198 | def save_images(images, target, joint_target, output, joint_output, joint_visibility, summary_writer=None, step=0,
199 |                 prefix=''):
200 |     """
201 |     Creates a grid of images with gt joints and a grid with predicted joints.
202 |     This is a basic function for debugging purposes only.
203 | 
204 |     If summary_writer is not None, the grid will be written in that SummaryWriter with name "{prefix}_images" and
205 |     "{prefix}_predictions".
206 | 
207 |     Args:
208 |         images (torch.Tensor): a tensor of images with shape (batch x channels x height x width).
209 |         target (torch.Tensor): a tensor of gt heatmaps with shape (batch x channels x height x width).
210 |         joint_target (torch.Tensor): a tensor of gt joints with shape (batch x joints x 2).
211 |         output (torch.Tensor): a tensor of predicted heatmaps with shape (batch x channels x height x width).
212 |         joint_output (torch.Tensor): a tensor of predicted joints with shape (batch x joints x 2).
213 |         joint_visibility (torch.Tensor): a tensor of joint visibility with shape (batch x joints).
214 |         summary_writer (tb.SummaryWriter): a SummaryWriter where write the grids.
215 |             Default: None
216 |         step (int): summary_writer step.
217 |             Default: 0
218 |         prefix (str): summary_writer name prefix.
219 |             Default: ""
220 | 
221 |     Returns:
222 |         A pair of images which are built from torchvision.utils.make_grid
223 |     """
224 |     # Input images with gt
225 |     images_ok = images.detach().clone()
226 |     images_ok[:, 0].mul_(0.229).add_(0.485)
227 |     images_ok[:, 1].mul_(0.224).add_(0.456)
228 |     images_ok[:, 2].mul_(0.225).add_(0.406)
229 |     for i in range(images.shape[0]):
230 |         joints = joint_target[i] * 4.
231 |         joints_vis = joint_visibility[i]
232 | 
233 |         for joint, joint_vis in zip(joints, joints_vis):
234 |             if joint_vis[0]:
235 |                 a = int(joint[1].item())
236 |                 b = int(joint[0].item())
237 |                 # images_ok[i][:, a-1:a+1, b-1:b+1] = torch.tensor([1, 0, 0])
238 |                 images_ok[i][0, a - 1:a + 1, b - 1:b + 1] = 1
239 |                 images_ok[i][1:, a - 1:a + 1, b - 1:b + 1] = 0
240 |     grid_gt = torchvision.utils.make_grid(images_ok, nrow=int(images_ok.shape[0] ** 0.5), padding=2, normalize=False)
241 |     if summary_writer is not None:
242 |         summary_writer.add_image(prefix + 'images', grid_gt, global_step=step)
243 | 
244 |     # Input images with prediction
245 |     images_ok = images.detach().clone()
246 |     images_ok[:, 0].mul_(0.229).add_(0.485)
247 |     images_ok[:, 1].mul_(0.224).add_(0.456)
248 |     images_ok[:, 2].mul_(0.225).add_(0.406)
249 |     for i in range(images.shape[0]):
250 |         joints = joint_output[i] * 4.
251 |         joints_vis = joint_visibility[i]
252 | 
253 |         for joint, joint_vis in zip(joints, joints_vis):
254 |             if joint_vis[0]:
255 |                 a = int(joint[1].item())
256 |                 b = int(joint[0].item())
257 |                 # images_ok[i][:, a-1:a+1, b-1:b+1] = torch.tensor([1, 0, 0])
258 |                 images_ok[i][0, a - 1:a + 1, b - 1:b + 1] = 1
259 |                 images_ok[i][1:, a - 1:a + 1, b - 1:b + 1] = 0
260 |     grid_pred = torchvision.utils.make_grid(images_ok, nrow=int(images_ok.shape[0] ** 0.5), padding=2, normalize=False)
261 |     if summary_writer is not None:
262 |         summary_writer.add_image(prefix + 'predictions', grid_pred, global_step=step)
263 | 
264 |     # Heatmaps
265 |     # ToDo
266 |     # for h in range(0,17):
267 |     #     heatmap = torchvision.utils.make_grid(output[h].detach(), nrow=int(np.sqrt(output.shape[0])),
268 |     #                                            padding=2, normalize=True, range=(0, 1))
269 |     #     summary_writer.add_image('train_heatmap_%d' % h, heatmap, global_step=step + epoch*len_dl_train)
270 | 
271 |     return grid_gt, grid_pred
272 | 
273 | 
274 | def check_video_rotation(filename):
275 |     # thanks to
276 |     # https://stackoverflow.com/questions/53097092/frame-from-video-is-upside-down-after-extracting/55747773#55747773
277 | 
278 |     # this returns meta-data of the video file in form of a dictionary
279 |     meta_dict = ffmpeg.probe(filename)
280 | 
281 |     # from the dictionary, meta_dict['streams'][0]['tags']['rotate'] is the key
282 |     # we are looking for
283 |     rotation_code = None
284 |     try:
285 |         if int(meta_dict['streams'][0]['tags']['rotate']) == 90:
286 |             rotation_code = cv2.ROTATE_90_CLOCKWISE
287 |         elif int(meta_dict['streams'][0]['tags']['rotate']) == 180:
288 |             rotation_code = cv2.ROTATE_180
289 |         elif int(meta_dict['streams'][0]['tags']['rotate']) == 270:
290 |             rotation_code = cv2.ROTATE_90_COUNTERCLOCKWISE
291 |         else:
292 |             raise ValueError
293 |     except KeyError:
294 |         pass
295 | 
296 |     return rotation_code
297 | 


--------------------------------------------------------------------------------