├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── SECURITY.md ├── experiments ├── coco │ ├── resnet101 │ │ ├── 256x192_d256x3_adam_lr1e-3.yaml │ │ ├── 256x192_d256x3_adam_lr1e-3_caffe.yaml │ │ └── 384x288_d256x3_adam_lr1e-3.yaml │ ├── resnet152 │ │ ├── 256x192_d256x3_adam_lr1e-3.yaml │ │ ├── 256x192_d256x3_adam_lr1e-3_caffe.yaml │ │ └── 384x288_d256x3_adam_lr1e-3.yaml │ └── resnet50 │ │ ├── 256x192_d256x3_adam_lr1e-3.yaml │ │ ├── 256x192_d256x3_adam_lr1e-3_caffe.yaml │ │ └── 384x288_d256x3_adam_lr1e-3.yaml └── mpii │ ├── resnet101 │ ├── 256x256_d256x3_adam_lr1e-3.yaml │ └── 384x384_d256x3_adam_lr1e-3.yaml │ ├── resnet152 │ ├── 256x256_d256x3_adam_lr1e-3.yaml │ └── 384x384_d256x3_adam_lr1e-3.yaml │ └── resnet50 │ ├── 256x256_d256x3_adam_lr1e-3.yaml │ └── 384x384_d256x3_adam_lr1e-3.yaml ├── lib ├── Makefile ├── core │ ├── config.py │ ├── evaluate.py │ ├── function.py │ ├── inference.py │ └── loss.py ├── dataset │ ├── JointsDataset.py │ ├── __init__.py │ ├── coco.py │ └── mpii.py ├── models │ ├── __init__.py │ └── pose_resnet.py ├── nms │ ├── __init__.py │ ├── cpu_nms.pyx │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── nms.py │ ├── nms_kernel.cu │ └── setup.py └── utils │ ├── __init__.py │ ├── transforms.py │ ├── utils.py │ ├── vis.py │ └── zipreader.py ├── pose_estimation ├── _init_paths.py ├── train.py └── valid.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | parts/ 18 | sdist/ 19 | var/ 20 | wheels/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | MANIFEST 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *.cover 45 | .hypothesis/ 46 | .pytest_cache/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | db.sqlite3 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # Environments 83 | .env 84 | .venv 85 | env/ 86 | venv/ 87 | ENV/ 88 | env.bak/ 89 | venv.bak/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | .spyproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | 98 | # mkdocs documentation 99 | /site 100 | 101 | # mypy 102 | .mypy_cache/ 103 | 104 | /data 105 | /output 106 | /models 107 | /log 108 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 2 | # Contributing 3 | 4 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 5 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 6 | the rights to use your contribution. For details, visit https://cla.microsoft.com. 7 | 8 | When you submit a pull request, a CLA-bot will automatically determine whether you need to provide 9 | a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions 10 | provided by the bot. You will only need to do this once across all repos using our CLA. 11 | 12 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 13 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 14 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Simple Baselines for Human Pose Estimation and Tracking 2 | 3 | ## News 4 | - Our new work [High-Resolution Representations for Labeling Pixels and Regions](https://arxiv.org/abs/1904.04514) is available at [HRNet](https://github.com/HRNet). Our HRNet has been applied to a wide range of vision tasks, such as [image classification](https://github.com/HRNet/HRNet-Image-Classification), [objection detection](https://github.com/HRNet/HRNet-Object-Detection), [semantic segmentation](https://github.com/HRNet/HRNet-Semantic-Segmentation) and [facial landmark](https://github.com/HRNet/HRNet-Facial-Landmark-Detection). 5 | - Our new work [Deep High-Resolution Representation Learning for Human Pose Estimation](https://github.com/leoxiaobin/deep-high-resolution-net.pytorch) has already been released at . The best single HRNet can obtain an **AP of 77.0** on COCO test-dev2017 dataset and **92.3% of PCKh@0.5** on MPII test set. The new repositoty also support the SimpleBaseline method, and you are welcomed to try it.
6 | - Our entry using this repo has won the winner of [PoseTrack2018 Multi-person Pose Tracking Challenge](https://posetrack.net/workshops/eccv2018/posetrack_eccv_2018_results.html)!
7 | - Our entry using this repo ranked 2nd place in the [keypoint detection task of COCO 2018](http://cocodataset.org/#keypoints-leaderboard)! 8 | 9 | ## Introduction 10 | This is an official pytorch implementation of [*Simple Baselines for Human Pose Estimation and Tracking*](https://arxiv.org/abs/1804.06208). This work provides baseline methods that are surprisingly simple and effective, thus helpful for inspiring and evaluating new ideas for the field. State-of-the-art results are achieved on challenging benchmarks. On COCO keypoints valid dataset, our best **single model** achieves **74.3 of mAP**. You can reproduce our results using this repo. All models are provided for research purpose.
11 | 12 | ## Main Results 13 | ### Results on MPII val 14 | | Arch | Head | Shoulder | Elbow | Wrist | Hip | Knee | Ankle | Mean | Mean@0.1| 15 | |---|---|---|---|---|---|---|---|---|---| 16 | | 256x256_pose_resnet_50_d256d256d256 | 96.351 | 95.329 | 88.989 | 83.176 | 88.420 | 83.960 | 79.594 | 88.532 | 33.911 | 17 | | 384x384_pose_resnet_50_d256d256d256 | 96.658 | 95.754 | 89.790 | 84.614 | 88.523 | 84.666 | 79.287 | 89.066 | 38.046 | 18 | | 256x256_pose_resnet_101_d256d256d256 | 96.862 | 95.873 | 89.518 | 84.376 | 88.437 | 84.486 | 80.703 | 89.131 | 34.020 | 19 | | 384x384_pose_resnet_101_d256d256d256 | 96.965 | 95.907 | 90.268 | 85.780 | 89.597 | 85.935 | 82.098 | 90.003 | 38.860 | 20 | | 256x256_pose_resnet_152_d256d256d256 | 97.033 | 95.941 | 90.046 | 84.976 | 89.164 | 85.311 | 81.271 | 89.620 | 35.025 | 21 | | 384x384_pose_resnet_152_d256d256d256 | 96.794 | 95.618 | 90.080 | 86.225 | 89.700 | 86.862 | 82.853 | 90.200 | 39.433 | 22 | 23 | ### Note: 24 | - Flip test is used. 25 | 26 | ### Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset 27 | | Arch | AP | Ap .5 | AP .75 | AP (M) | AP (L) | AR | AR .5 | AR .75 | AR (M) | AR (L) | 28 | |---|---|---|---|---|---|---|---|---|---|---| 29 | | 256x192_pose_resnet_50_d256d256d256 | 0.704 | 0.886 | 0.783 | 0.671 | 0.772 | 0.763 | 0.929 | 0.834 | 0.721 | 0.824 | 30 | | 384x288_pose_resnet_50_d256d256d256 | 0.722 | 0.893 | 0.789 | 0.681 | 0.797 | 0.776 | 0.932 | 0.838 | 0.728 | 0.846 | 31 | | 256x192_pose_resnet_101_d256d256d256 | 0.714 | 0.893 | 0.793 | 0.681 | 0.781 | 0.771 | 0.934 | 0.840 | 0.730 | 0.832 | 32 | | 384x288_pose_resnet_101_d256d256d256 | 0.736 | 0.896 | 0.803 | 0.699 | 0.811 | 0.791 | 0.936 | 0.851 | 0.745 | 0.858 | 33 | | 256x192_pose_resnet_152_d256d256d256 | 0.720 | 0.893 | 0.798 | 0.687 | 0.789 | 0.778 | 0.934 | 0.846 | 0.736 | 0.839 | 34 | | 384x288_pose_resnet_152_d256d256d256 | 0.743 | 0.896 | 0.811 | 0.705 | 0.816 | 0.797 | 0.937 | 0.858 | 0.751 | 0.863 | 35 | 36 | 37 | ### Results on *Caffe-style* ResNet 38 | | Arch | AP | Ap .5 | AP .75 | AP (M) | AP (L) | AR | AR .5 | AR .75 | AR (M) | AR (L) | 39 | |---|---|---|---|---|---|---|---|---|---|---| 40 | | 256x192_pose_resnet_50_*caffe*_d256d256d256 | 0.704 | 0.914 | 0.782 | 0.677 | 0.744 | 0.735 | 0.921 | 0.805 | 0.704 | 0.783 | 41 | | 256x192_pose_resnet_101_*caffe*_d256d256d256 | 0.720 | 0.915 | 0.803 | 0.693 | 0.764 | 0.753 | 0.928 | 0.821 | 0.720 | 0.802 | 42 | | 256x192_pose_resnet_152_*caffe*_d256d256d256 | 0.728 | 0.925 | 0.804 | 0.702 | 0.766 | 0.760 | 0.931 | 0.828 | 0.729 | 0.806 | 43 | 44 | 45 | ### Note: 46 | - Flip test is used. 47 | - Person detector has person AP of 56.4 on COCO val2017 dataset. 48 | - Difference between *PyTorch-style* and *Caffe-style* ResNet is the position of stride=2 convolution 49 | 50 | ## Environment 51 | The code is developed using python 3.6 on Ubuntu 16.04. NVIDIA GPUs are needed. The code is developed and tested using 4 NVIDIA P100 GPU cards. Other platforms or GPU cards are not fully tested. 52 | 53 | ## Quick start 54 | ### Installation 55 | 1. Install pytorch >= v0.4.0 following [official instruction](https://pytorch.org/). 56 | 2. Disable cudnn for batch_norm: 57 | ``` 58 | # PYTORCH=/path/to/pytorch 59 | # for pytorch v0.4.0 60 | sed -i "1194s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py 61 | # for pytorch v0.4.1 62 | sed -i "1254s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py 63 | ``` 64 | Note that instructions like # PYTORCH=/path/to/pytorch indicate that you should pick a path where you'd like to have pytorch installed and then set an environment variable (PYTORCH in this case) accordingly. 65 | 1. Clone this repo, and we'll call the directory that you cloned as ${POSE_ROOT}. 66 | 2. Install dependencies: 67 | ``` 68 | pip install -r requirements.txt 69 | ``` 70 | 3. Make libs: 71 | ``` 72 | cd ${POSE_ROOT}/lib 73 | make 74 | ``` 75 | 3. Install [COCOAPI](https://github.com/cocodataset/cocoapi): 76 | ``` 77 | # COCOAPI=/path/to/clone/cocoapi 78 | git clone https://github.com/cocodataset/cocoapi.git $COCOAPI 79 | cd $COCOAPI/PythonAPI 80 | # Install into global site-packages 81 | make install 82 | # Alternatively, if you do not have permissions or prefer 83 | # not to install the COCO API into global site-packages 84 | python3 setup.py install --user 85 | ``` 86 | Note that instructions like # COCOAPI=/path/to/install/cocoapi indicate that you should pick a path where you'd like to have the software cloned and then set an environment variable (COCOAPI in this case) accordingly. 87 | 3. Download pytorch imagenet pretrained models from [pytorch model zoo](https://pytorch.org/docs/stable/model_zoo.html#module-torch.utils.model_zoo) and caffe-style pretrained models from [GoogleDrive](https://drive.google.com/drive/folders/1yJMSFOnmzwhA4YYQS71Uy7X1Kl_xq9fN?usp=sharing). 88 | 4. Download mpii and coco pretrained models from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blW0D5ZE4ArK9wk_fvw) or [GoogleDrive](https://drive.google.com/drive/folders/13_wJ6nC7my1KKouMkQMqyr9r1ZnLnukP?usp=sharing). Please download them under ${POSE_ROOT}/models/pytorch, and make them look like this: 89 | 90 | ``` 91 | ${POSE_ROOT} 92 | `-- models 93 | `-- pytorch 94 | |-- imagenet 95 | | |-- resnet50-19c8e357.pth 96 | | |-- resnet50-caffe.pth.tar 97 | | |-- resnet101-5d3b4d8f.pth 98 | | |-- resnet101-caffe.pth.tar 99 | | |-- resnet152-b121ed2d.pth 100 | | `-- resnet152-caffe.pth.tar 101 | |-- pose_coco 102 | | |-- pose_resnet_101_256x192.pth.tar 103 | | |-- pose_resnet_101_384x288.pth.tar 104 | | |-- pose_resnet_152_256x192.pth.tar 105 | | |-- pose_resnet_152_384x288.pth.tar 106 | | |-- pose_resnet_50_256x192.pth.tar 107 | | `-- pose_resnet_50_384x288.pth.tar 108 | `-- pose_mpii 109 | |-- pose_resnet_101_256x256.pth.tar 110 | |-- pose_resnet_101_384x384.pth.tar 111 | |-- pose_resnet_152_256x256.pth.tar 112 | |-- pose_resnet_152_384x384.pth.tar 113 | |-- pose_resnet_50_256x256.pth.tar 114 | `-- pose_resnet_50_384x384.pth.tar 115 | 116 | ``` 117 | 118 | 4. Init output(training model output directory) and log(tensorboard log directory) directory: 119 | 120 | ``` 121 | mkdir output 122 | mkdir log 123 | ``` 124 | 125 | Your directory tree should look like this: 126 | 127 | ``` 128 | ${POSE_ROOT} 129 | ├── data 130 | ├── experiments 131 | ├── lib 132 | ├── log 133 | ├── models 134 | ├── output 135 | ├── pose_estimation 136 | ├── README.md 137 | └── requirements.txt 138 | ``` 139 | 140 | ### Data preparation 141 | **For MPII data**, please download from [MPII Human Pose Dataset](http://human-pose.mpi-inf.mpg.de/). The original annotation files are in matlab format. We have converted them into json format, you also need to download them from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blW00SqrairNetmeVu4) or [GoogleDrive](https://drive.google.com/drive/folders/1En_VqmStnsXMdldXA6qpqEyDQulnmS3a?usp=sharing). 142 | Extract them under {POSE_ROOT}/data, and make them look like this: 143 | ``` 144 | ${POSE_ROOT} 145 | |-- data 146 | `-- |-- mpii 147 | `-- |-- annot 148 | | |-- gt_valid.mat 149 | | |-- test.json 150 | | |-- train.json 151 | | |-- trainval.json 152 | | `-- valid.json 153 | `-- images 154 | |-- 000001163.jpg 155 | |-- 000003072.jpg 156 | ``` 157 | 158 | **For COCO data**, please download from [COCO download](http://cocodataset.org/#download), 2017 Train/Val is needed for COCO keypoints training and validation. We also provide person detection result of COCO val2017 to reproduce our multi-person pose estimation results. Please download from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blWzzDXoz5BeFl8sWM-) or [GoogleDrive](https://drive.google.com/drive/folders/1fRUDNUDxe9fjqcRZ2bnF_TKMlO0nB_dk?usp=sharing). 159 | Download and extract them under {POSE_ROOT}/data, and make them look like this: 160 | ``` 161 | ${POSE_ROOT} 162 | |-- data 163 | `-- |-- coco 164 | `-- |-- annotations 165 | | |-- person_keypoints_train2017.json 166 | | `-- person_keypoints_val2017.json 167 | |-- person_detection_results 168 | | |-- COCO_val2017_detections_AP_H_56_person.json 169 | `-- images 170 | |-- train2017 171 | | |-- 000000000009.jpg 172 | | |-- 000000000025.jpg 173 | | |-- 000000000030.jpg 174 | | |-- ... 175 | `-- val2017 176 | |-- 000000000139.jpg 177 | |-- 000000000285.jpg 178 | |-- 000000000632.jpg 179 | |-- ... 180 | ``` 181 | 182 | ### Valid on MPII using pretrained models 183 | 184 | ``` 185 | python pose_estimation/valid.py \ 186 | --cfg experiments/mpii/resnet50/256x256_d256x3_adam_lr1e-3.yaml \ 187 | --flip-test \ 188 | --model-file models/pytorch/pose_mpii/pose_resnet_50_256x256.pth.tar 189 | ``` 190 | 191 | ### Training on MPII 192 | 193 | ``` 194 | python pose_estimation/train.py \ 195 | --cfg experiments/mpii/resnet50/256x256_d256x3_adam_lr1e-3.yaml 196 | ``` 197 | 198 | ### Valid on COCO val2017 using pretrained models 199 | 200 | ``` 201 | python pose_estimation/valid.py \ 202 | --cfg experiments/coco/resnet50/256x192_d256x3_adam_lr1e-3.yaml \ 203 | --flip-test \ 204 | --model-file models/pytorch/pose_coco/pose_resnet_50_256x192.pth.tar 205 | ``` 206 | 207 | ### Training on COCO train2017 208 | 209 | ``` 210 | python pose_estimation/train.py \ 211 | --cfg experiments/coco/resnet50/256x192_d256x3_adam_lr1e-3.yaml 212 | ``` 213 | 214 | ### Other Implementations 215 | - TensorFlow [[Version1](https://github.com/mks0601/TF-SimpleHumanPose)] 216 | - PaddlePaddle [[Version1](https://github.com/PaddlePaddle/models/tree/develop/fluid/PaddleCV/human_pose_estimation)] 217 | - Gluon [[Version1](https://gluon-cv.mxnet.io/model_zoo/pose.html)] 218 | 219 | ### Citation 220 | If you use our code or models in your research, please cite with: 221 | ``` 222 | @inproceedings{xiao2018simple, 223 | author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, 224 | title={Simple Baselines for Human Pose Estimation and Tracking}, 225 | booktitle = {European Conference on Computer Vision (ECCV)}, 226 | year = {2018} 227 | } 228 | ``` 229 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /experiments/coco/resnet101/256x192_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | GPUS: '0' 2 | DATA_DIR: '' 3 | OUTPUT_DIR: 'output' 4 | LOG_DIR: 'log' 5 | WORKERS: 4 6 | PRINT_FREQ: 100 7 | 8 | DATASET: 9 | DATASET: 'coco' 10 | ROOT: 'data/coco/' 11 | TEST_SET: 'val2017' 12 | TRAIN_SET: 'train2017' 13 | FLIP: true 14 | ROT_FACTOR: 40 15 | SCALE_FACTOR: 0.3 16 | MODEL: 17 | NAME: 'pose_resnet' 18 | PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth' 19 | IMAGE_SIZE: 20 | - 192 21 | - 256 22 | NUM_JOINTS: 17 23 | EXTRA: 24 | TARGET_TYPE: 'gaussian' 25 | HEATMAP_SIZE: 26 | - 48 27 | - 64 28 | SIGMA: 2 29 | FINAL_CONV_KERNEL: 1 30 | DECONV_WITH_BIAS: false 31 | NUM_DECONV_LAYERS: 3 32 | NUM_DECONV_FILTERS: 33 | - 256 34 | - 256 35 | - 256 36 | NUM_DECONV_KERNELS: 37 | - 4 38 | - 4 39 | - 4 40 | NUM_LAYERS: 101 41 | LOSS: 42 | USE_TARGET_WEIGHT: true 43 | TRAIN: 44 | BATCH_SIZE: 32 45 | SHUFFLE: true 46 | BEGIN_EPOCH: 0 47 | END_EPOCH: 140 48 | RESUME: false 49 | OPTIMIZER: 'adam' 50 | LR: 0.001 51 | LR_FACTOR: 0.1 52 | LR_STEP: 53 | - 90 54 | - 120 55 | WD: 0.0001 56 | GAMMA1: 0.99 57 | GAMMA2: 0.0 58 | MOMENTUM: 0.9 59 | NESTEROV: false 60 | TEST: 61 | BATCH_SIZE: 32 62 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 63 | BBOX_THRE: 1.0 64 | FLIP_TEST: false 65 | IMAGE_THRE: 0.0 66 | IN_VIS_THRE: 0.2 67 | MODEL_FILE: '' 68 | NMS_THRE: 1.0 69 | OKS_THRE: 0.9 70 | USE_GT_BBOX: true 71 | DEBUG: 72 | DEBUG: true 73 | SAVE_BATCH_IMAGES_GT: true 74 | SAVE_BATCH_IMAGES_PRED: true 75 | SAVE_HEATMAPS_GT: true 76 | SAVE_HEATMAPS_PRED: true 77 | -------------------------------------------------------------------------------- /experiments/coco/resnet101/256x192_d256x3_adam_lr1e-3_caffe.yaml: -------------------------------------------------------------------------------- 1 | GPUS: '0' 2 | DATA_DIR: '' 3 | OUTPUT_DIR: 'output' 4 | LOG_DIR: 'log' 5 | WORKERS: 4 6 | PRINT_FREQ: 100 7 | 8 | DATASET: 9 | DATASET: 'coco' 10 | ROOT: 'data/coco/' 11 | TEST_SET: 'val2017' 12 | TRAIN_SET: 'train2017' 13 | FLIP: true 14 | ROT_FACTOR: 40 15 | SCALE_FACTOR: 0.3 16 | MODEL: 17 | NAME: 'pose_resnet' 18 | PRETRAINED: 'models/pytorch/imagenet/resnet101-caffe.pth.tar' 19 | STYLE: 'caffe' 20 | IMAGE_SIZE: 21 | - 192 22 | - 256 23 | NUM_JOINTS: 17 24 | EXTRA: 25 | TARGET_TYPE: 'gaussian' 26 | HEATMAP_SIZE: 27 | - 48 28 | - 64 29 | SIGMA: 2 30 | FINAL_CONV_KERNEL: 1 31 | DECONV_WITH_BIAS: false 32 | NUM_DECONV_LAYERS: 3 33 | NUM_DECONV_FILTERS: 34 | - 256 35 | - 256 36 | - 256 37 | NUM_DECONV_KERNELS: 38 | - 4 39 | - 4 40 | - 4 41 | NUM_LAYERS: 101 42 | LOSS: 43 | USE_TARGET_WEIGHT: true 44 | TRAIN: 45 | BATCH_SIZE: 32 46 | SHUFFLE: true 47 | BEGIN_EPOCH: 0 48 | END_EPOCH: 140 49 | RESUME: false 50 | OPTIMIZER: 'adam' 51 | LR: 0.001 52 | LR_FACTOR: 0.1 53 | LR_STEP: 54 | - 90 55 | - 120 56 | WD: 0.0001 57 | GAMMA1: 0.99 58 | GAMMA2: 0.0 59 | MOMENTUM: 0.9 60 | NESTEROV: false 61 | TEST: 62 | BATCH_SIZE: 32 63 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 64 | BBOX_THRE: 1.0 65 | FLIP_TEST: false 66 | IMAGE_THRE: 0.0 67 | IN_VIS_THRE: 0.2 68 | MODEL_FILE: '' 69 | NMS_THRE: 1.0 70 | OKS_THRE: 0.9 71 | USE_GT_BBOX: true 72 | DEBUG: 73 | DEBUG: true 74 | SAVE_BATCH_IMAGES_GT: true 75 | SAVE_BATCH_IMAGES_PRED: true 76 | SAVE_HEATMAPS_GT: true 77 | SAVE_HEATMAPS_PRED: true 78 | -------------------------------------------------------------------------------- /experiments/coco/resnet101/384x288_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | GPUS: '0' 2 | DATA_DIR: '' 3 | OUTPUT_DIR: 'output' 4 | LOG_DIR: 'log' 5 | WORKERS: 4 6 | PRINT_FREQ: 100 7 | 8 | DATASET: 9 | DATASET: 'coco' 10 | ROOT: 'data/coco/' 11 | TEST_SET: 'val2017' 12 | TRAIN_SET: 'train2017' 13 | FLIP: true 14 | ROT_FACTOR: 40 15 | SCALE_FACTOR: 0.3 16 | MODEL: 17 | NAME: 'pose_resnet' 18 | PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth' 19 | IMAGE_SIZE: 20 | - 288 21 | - 384 22 | NUM_JOINTS: 17 23 | EXTRA: 24 | TARGET_TYPE: 'gaussian' 25 | HEATMAP_SIZE: 26 | - 72 27 | - 96 28 | SIGMA: 3 29 | FINAL_CONV_KERNEL: 1 30 | DECONV_WITH_BIAS: false 31 | NUM_DECONV_LAYERS: 3 32 | NUM_DECONV_FILTERS: 33 | - 256 34 | - 256 35 | - 256 36 | NUM_DECONV_KERNELS: 37 | - 4 38 | - 4 39 | - 4 40 | NUM_LAYERS: 101 41 | LOSS: 42 | USE_TARGET_WEIGHT: true 43 | TRAIN: 44 | BATCH_SIZE: 32 45 | SHUFFLE: true 46 | BEGIN_EPOCH: 0 47 | END_EPOCH: 140 48 | RESUME: false 49 | OPTIMIZER: 'adam' 50 | LR: 0.001 51 | LR_FACTOR: 0.1 52 | LR_STEP: 53 | - 90 54 | - 120 55 | WD: 0.0001 56 | GAMMA1: 0.99 57 | GAMMA2: 0.0 58 | MOMENTUM: 0.9 59 | NESTEROV: false 60 | TEST: 61 | BATCH_SIZE: 32 62 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 63 | BBOX_THRE: 1.0 64 | FLIP_TEST: false 65 | IMAGE_THRE: 0.0 66 | IN_VIS_THRE: 0.2 67 | MODEL_FILE: '' 68 | NMS_THRE: 1.0 69 | OKS_THRE: 0.9 70 | USE_GT_BBOX: true 71 | DEBUG: 72 | DEBUG: true 73 | SAVE_BATCH_IMAGES_GT: true 74 | SAVE_BATCH_IMAGES_PRED: true 75 | SAVE_HEATMAPS_GT: true 76 | SAVE_HEATMAPS_PRED: true 77 | -------------------------------------------------------------------------------- /experiments/coco/resnet152/256x192_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | GPUS: '0' 2 | DATA_DIR: '' 3 | OUTPUT_DIR: 'output' 4 | LOG_DIR: 'log' 5 | WORKERS: 4 6 | PRINT_FREQ: 100 7 | 8 | DATASET: 9 | DATASET: 'coco' 10 | ROOT: 'data/coco/' 11 | TEST_SET: 'val2017' 12 | TRAIN_SET: 'train2017' 13 | FLIP: true 14 | ROT_FACTOR: 40 15 | SCALE_FACTOR: 0.3 16 | MODEL: 17 | NAME: 'pose_resnet' 18 | PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth' 19 | IMAGE_SIZE: 20 | - 192 21 | - 256 22 | NUM_JOINTS: 17 23 | EXTRA: 24 | TARGET_TYPE: 'gaussian' 25 | HEATMAP_SIZE: 26 | - 48 27 | - 64 28 | SIGMA: 2 29 | FINAL_CONV_KERNEL: 1 30 | DECONV_WITH_BIAS: false 31 | NUM_DECONV_LAYERS: 3 32 | NUM_DECONV_FILTERS: 33 | - 256 34 | - 256 35 | - 256 36 | NUM_DECONV_KERNELS: 37 | - 4 38 | - 4 39 | - 4 40 | NUM_LAYERS: 152 41 | LOSS: 42 | USE_TARGET_WEIGHT: true 43 | TRAIN: 44 | BATCH_SIZE: 32 45 | SHUFFLE: true 46 | BEGIN_EPOCH: 0 47 | END_EPOCH: 140 48 | RESUME: false 49 | OPTIMIZER: 'adam' 50 | LR: 0.001 51 | LR_FACTOR: 0.1 52 | LR_STEP: 53 | - 90 54 | - 120 55 | WD: 0.0001 56 | GAMMA1: 0.99 57 | GAMMA2: 0.0 58 | MOMENTUM: 0.9 59 | NESTEROV: false 60 | TEST: 61 | BATCH_SIZE: 32 62 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 63 | BBOX_THRE: 1.0 64 | FLIP_TEST: false 65 | IMAGE_THRE: 0.0 66 | IN_VIS_THRE: 0.2 67 | MODEL_FILE: '' 68 | NMS_THRE: 1.0 69 | OKS_THRE: 0.9 70 | USE_GT_BBOX: true 71 | DEBUG: 72 | DEBUG: true 73 | SAVE_BATCH_IMAGES_GT: true 74 | SAVE_BATCH_IMAGES_PRED: true 75 | SAVE_HEATMAPS_GT: true 76 | SAVE_HEATMAPS_PRED: true 77 | -------------------------------------------------------------------------------- /experiments/coco/resnet152/256x192_d256x3_adam_lr1e-3_caffe.yaml: -------------------------------------------------------------------------------- 1 | GPUS: '0' 2 | DATA_DIR: '' 3 | OUTPUT_DIR: 'output' 4 | LOG_DIR: 'log' 5 | WORKERS: 4 6 | PRINT_FREQ: 100 7 | 8 | DATASET: 9 | DATASET: 'coco' 10 | ROOT: 'data/coco/' 11 | TEST_SET: 'val2017' 12 | TRAIN_SET: 'train2017' 13 | FLIP: true 14 | ROT_FACTOR: 40 15 | SCALE_FACTOR: 0.3 16 | MODEL: 17 | NAME: 'pose_resnet' 18 | PRETRAINED: 'models/pytorch/imagenet/resnet152-caffe.pth.tar' 19 | STYLE: 'caffe' 20 | IMAGE_SIZE: 21 | - 192 22 | - 256 23 | NUM_JOINTS: 17 24 | EXTRA: 25 | TARGET_TYPE: 'gaussian' 26 | HEATMAP_SIZE: 27 | - 48 28 | - 64 29 | SIGMA: 2 30 | FINAL_CONV_KERNEL: 1 31 | DECONV_WITH_BIAS: false 32 | NUM_DECONV_LAYERS: 3 33 | NUM_DECONV_FILTERS: 34 | - 256 35 | - 256 36 | - 256 37 | NUM_DECONV_KERNELS: 38 | - 4 39 | - 4 40 | - 4 41 | NUM_LAYERS: 152 42 | LOSS: 43 | USE_TARGET_WEIGHT: true 44 | TRAIN: 45 | BATCH_SIZE: 32 46 | SHUFFLE: true 47 | BEGIN_EPOCH: 0 48 | END_EPOCH: 140 49 | RESUME: false 50 | OPTIMIZER: 'adam' 51 | LR: 0.001 52 | LR_FACTOR: 0.1 53 | LR_STEP: 54 | - 90 55 | - 120 56 | WD: 0.0001 57 | GAMMA1: 0.99 58 | GAMMA2: 0.0 59 | MOMENTUM: 0.9 60 | NESTEROV: false 61 | TEST: 62 | BATCH_SIZE: 32 63 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 64 | BBOX_THRE: 1.0 65 | FLIP_TEST: false 66 | IMAGE_THRE: 0.0 67 | IN_VIS_THRE: 0.2 68 | MODEL_FILE: '' 69 | NMS_THRE: 1.0 70 | OKS_THRE: 0.9 71 | USE_GT_BBOX: true 72 | DEBUG: 73 | DEBUG: true 74 | SAVE_BATCH_IMAGES_GT: true 75 | SAVE_BATCH_IMAGES_PRED: true 76 | SAVE_HEATMAPS_GT: true 77 | SAVE_HEATMAPS_PRED: true 78 | -------------------------------------------------------------------------------- /experiments/coco/resnet152/384x288_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | GPUS: '0' 2 | DATA_DIR: '' 3 | OUTPUT_DIR: 'output' 4 | LOG_DIR: 'log' 5 | WORKERS: 4 6 | PRINT_FREQ: 100 7 | 8 | DATASET: 9 | DATASET: 'coco' 10 | ROOT: 'data/coco/' 11 | TEST_SET: 'val2017' 12 | TRAIN_SET: 'train2017' 13 | FLIP: true 14 | ROT_FACTOR: 40 15 | SCALE_FACTOR: 0.3 16 | MODEL: 17 | NAME: 'pose_resnet' 18 | PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth' 19 | IMAGE_SIZE: 20 | - 288 21 | - 384 22 | NUM_JOINTS: 17 23 | EXTRA: 24 | TARGET_TYPE: 'gaussian' 25 | HEATMAP_SIZE: 26 | - 72 27 | - 96 28 | SIGMA: 3 29 | FINAL_CONV_KERNEL: 1 30 | DECONV_WITH_BIAS: false 31 | NUM_DECONV_LAYERS: 3 32 | NUM_DECONV_FILTERS: 33 | - 256 34 | - 256 35 | - 256 36 | NUM_DECONV_KERNELS: 37 | - 4 38 | - 4 39 | - 4 40 | NUM_LAYERS: 152 41 | LOSS: 42 | USE_TARGET_WEIGHT: true 43 | TRAIN: 44 | BATCH_SIZE: 32 45 | SHUFFLE: true 46 | BEGIN_EPOCH: 0 47 | END_EPOCH: 140 48 | RESUME: false 49 | OPTIMIZER: 'adam' 50 | LR: 0.001 51 | LR_FACTOR: 0.1 52 | LR_STEP: 53 | - 90 54 | - 120 55 | WD: 0.0001 56 | GAMMA1: 0.99 57 | GAMMA2: 0.0 58 | MOMENTUM: 0.9 59 | NESTEROV: false 60 | TEST: 61 | BATCH_SIZE: 32 62 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 63 | BBOX_THRE: 1.0 64 | FLIP_TEST: false 65 | IMAGE_THRE: 0.0 66 | IN_VIS_THRE: 0.2 67 | MODEL_FILE: '' 68 | NMS_THRE: 1.0 69 | OKS_THRE: 0.9 70 | USE_GT_BBOX: true 71 | DEBUG: 72 | DEBUG: true 73 | SAVE_BATCH_IMAGES_GT: true 74 | SAVE_BATCH_IMAGES_PRED: true 75 | SAVE_HEATMAPS_GT: true 76 | SAVE_HEATMAPS_PRED: true 77 | -------------------------------------------------------------------------------- /experiments/coco/resnet50/256x192_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | GPUS: '0' 2 | DATA_DIR: '' 3 | OUTPUT_DIR: 'output' 4 | LOG_DIR: 'log' 5 | WORKERS: 4 6 | PRINT_FREQ: 100 7 | 8 | DATASET: 9 | DATASET: 'coco' 10 | ROOT: 'data/coco/' 11 | TEST_SET: 'val2017' 12 | TRAIN_SET: 'train2017' 13 | FLIP: true 14 | ROT_FACTOR: 40 15 | SCALE_FACTOR: 0.3 16 | MODEL: 17 | NAME: 'pose_resnet' 18 | PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth' 19 | IMAGE_SIZE: 20 | - 192 21 | - 256 22 | NUM_JOINTS: 17 23 | EXTRA: 24 | TARGET_TYPE: 'gaussian' 25 | HEATMAP_SIZE: 26 | - 48 27 | - 64 28 | SIGMA: 2 29 | FINAL_CONV_KERNEL: 1 30 | DECONV_WITH_BIAS: false 31 | NUM_DECONV_LAYERS: 3 32 | NUM_DECONV_FILTERS: 33 | - 256 34 | - 256 35 | - 256 36 | NUM_DECONV_KERNELS: 37 | - 4 38 | - 4 39 | - 4 40 | NUM_LAYERS: 50 41 | LOSS: 42 | USE_TARGET_WEIGHT: true 43 | TRAIN: 44 | BATCH_SIZE: 32 45 | SHUFFLE: true 46 | BEGIN_EPOCH: 0 47 | END_EPOCH: 140 48 | RESUME: false 49 | OPTIMIZER: 'adam' 50 | LR: 0.001 51 | LR_FACTOR: 0.1 52 | LR_STEP: 53 | - 90 54 | - 120 55 | WD: 0.0001 56 | GAMMA1: 0.99 57 | GAMMA2: 0.0 58 | MOMENTUM: 0.9 59 | NESTEROV: false 60 | TEST: 61 | BATCH_SIZE: 32 62 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 63 | BBOX_THRE: 1.0 64 | FLIP_TEST: false 65 | IMAGE_THRE: 0.0 66 | IN_VIS_THRE: 0.2 67 | MODEL_FILE: '' 68 | NMS_THRE: 1.0 69 | OKS_THRE: 0.9 70 | USE_GT_BBOX: true 71 | DEBUG: 72 | DEBUG: true 73 | SAVE_BATCH_IMAGES_GT: true 74 | SAVE_BATCH_IMAGES_PRED: true 75 | SAVE_HEATMAPS_GT: true 76 | SAVE_HEATMAPS_PRED: true 77 | -------------------------------------------------------------------------------- /experiments/coco/resnet50/256x192_d256x3_adam_lr1e-3_caffe.yaml: -------------------------------------------------------------------------------- 1 | GPUS: '0' 2 | DATA_DIR: '' 3 | OUTPUT_DIR: 'output' 4 | LOG_DIR: 'log' 5 | WORKERS: 4 6 | PRINT_FREQ: 100 7 | 8 | DATASET: 9 | DATASET: 'coco' 10 | ROOT: 'data/coco/' 11 | TEST_SET: 'val2017' 12 | TRAIN_SET: 'train2017' 13 | FLIP: true 14 | ROT_FACTOR: 40 15 | SCALE_FACTOR: 0.3 16 | MODEL: 17 | NAME: 'pose_resnet' 18 | PRETRAINED: 'models/pytorch/imagenet/resnet50-caffe.pth.tar' 19 | STYLE: 'caffe' 20 | IMAGE_SIZE: 21 | - 192 22 | - 256 23 | NUM_JOINTS: 17 24 | EXTRA: 25 | TARGET_TYPE: 'gaussian' 26 | HEATMAP_SIZE: 27 | - 48 28 | - 64 29 | SIGMA: 2 30 | FINAL_CONV_KERNEL: 1 31 | DECONV_WITH_BIAS: false 32 | NUM_DECONV_LAYERS: 3 33 | NUM_DECONV_FILTERS: 34 | - 256 35 | - 256 36 | - 256 37 | NUM_DECONV_KERNELS: 38 | - 4 39 | - 4 40 | - 4 41 | NUM_LAYERS: 50 42 | LOSS: 43 | USE_TARGET_WEIGHT: true 44 | TRAIN: 45 | BATCH_SIZE: 32 46 | SHUFFLE: true 47 | BEGIN_EPOCH: 0 48 | END_EPOCH: 140 49 | RESUME: false 50 | OPTIMIZER: 'adam' 51 | LR: 0.001 52 | LR_FACTOR: 0.1 53 | LR_STEP: 54 | - 90 55 | - 120 56 | WD: 0.0001 57 | GAMMA1: 0.99 58 | GAMMA2: 0.0 59 | MOMENTUM: 0.9 60 | NESTEROV: false 61 | TEST: 62 | BATCH_SIZE: 32 63 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 64 | BBOX_THRE: 1.0 65 | FLIP_TEST: false 66 | IMAGE_THRE: 0.0 67 | IN_VIS_THRE: 0.2 68 | MODEL_FILE: '' 69 | NMS_THRE: 1.0 70 | OKS_THRE: 0.9 71 | USE_GT_BBOX: true 72 | DEBUG: 73 | DEBUG: true 74 | SAVE_BATCH_IMAGES_GT: true 75 | SAVE_BATCH_IMAGES_PRED: true 76 | SAVE_HEATMAPS_GT: true 77 | SAVE_HEATMAPS_PRED: true 78 | -------------------------------------------------------------------------------- /experiments/coco/resnet50/384x288_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | GPUS: '0' 2 | DATA_DIR: '' 3 | OUTPUT_DIR: 'output' 4 | LOG_DIR: 'log' 5 | WORKERS: 4 6 | PRINT_FREQ: 100 7 | 8 | DATASET: 9 | DATASET: 'coco' 10 | ROOT: 'data/coco/' 11 | TEST_SET: 'val2017' 12 | TRAIN_SET: 'train2017' 13 | FLIP: true 14 | ROT_FACTOR: 40 15 | SCALE_FACTOR: 0.3 16 | MODEL: 17 | NAME: 'pose_resnet' 18 | PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth' 19 | IMAGE_SIZE: 20 | - 288 21 | - 384 22 | NUM_JOINTS: 17 23 | EXTRA: 24 | TARGET_TYPE: 'gaussian' 25 | HEATMAP_SIZE: 26 | - 72 27 | - 96 28 | SIGMA: 3 29 | FINAL_CONV_KERNEL: 1 30 | DECONV_WITH_BIAS: false 31 | NUM_DECONV_LAYERS: 3 32 | NUM_DECONV_FILTERS: 33 | - 256 34 | - 256 35 | - 256 36 | NUM_DECONV_KERNELS: 37 | - 4 38 | - 4 39 | - 4 40 | NUM_LAYERS: 50 41 | LOSS: 42 | USE_TARGET_WEIGHT: true 43 | TRAIN: 44 | BATCH_SIZE: 32 45 | SHUFFLE: true 46 | BEGIN_EPOCH: 0 47 | END_EPOCH: 140 48 | RESUME: false 49 | OPTIMIZER: 'adam' 50 | LR: 0.001 51 | LR_FACTOR: 0.1 52 | LR_STEP: 53 | - 90 54 | - 120 55 | WD: 0.0001 56 | GAMMA1: 0.99 57 | GAMMA2: 0.0 58 | MOMENTUM: 0.9 59 | NESTEROV: false 60 | TEST: 61 | BATCH_SIZE: 32 62 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 63 | BBOX_THRE: 1.0 64 | FLIP_TEST: false 65 | IMAGE_THRE: 0.0 66 | IN_VIS_THRE: 0.2 67 | MODEL_FILE: '' 68 | NMS_THRE: 1.0 69 | OKS_THRE: 0.9 70 | USE_GT_BBOX: true 71 | DEBUG: 72 | DEBUG: true 73 | SAVE_BATCH_IMAGES_GT: true 74 | SAVE_BATCH_IMAGES_PRED: true 75 | SAVE_HEATMAPS_GT: true 76 | SAVE_HEATMAPS_PRED: true 77 | -------------------------------------------------------------------------------- /experiments/mpii/resnet101/256x256_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | GPUS: '0' 2 | DATA_DIR: '' 3 | OUTPUT_DIR: 'output' 4 | LOG_DIR: 'log' 5 | WORKERS: 4 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | DATASET: mpii 13 | ROOT: 'data/mpii/' 14 | TEST_SET: valid 15 | TRAIN_SET: train 16 | FLIP: true 17 | ROT_FACTOR: 30 18 | SCALE_FACTOR: 0.25 19 | MODEL: 20 | NAME: pose_resnet 21 | PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth' 22 | IMAGE_SIZE: 23 | - 256 24 | - 256 25 | NUM_JOINTS: 16 26 | EXTRA: 27 | TARGET_TYPE: gaussian 28 | SIGMA: 2 29 | HEATMAP_SIZE: 30 | - 64 31 | - 64 32 | FINAL_CONV_KERNEL: 1 33 | DECONV_WITH_BIAS: false 34 | NUM_DECONV_LAYERS: 3 35 | NUM_DECONV_FILTERS: 36 | - 256 37 | - 256 38 | - 256 39 | NUM_DECONV_KERNELS: 40 | - 4 41 | - 4 42 | - 4 43 | NUM_LAYERS: 101 44 | LOSS: 45 | USE_TARGET_WEIGHT: true 46 | TRAIN: 47 | BATCH_SIZE: 32 48 | SHUFFLE: true 49 | BEGIN_EPOCH: 0 50 | END_EPOCH: 140 51 | RESUME: false 52 | OPTIMIZER: adam 53 | LR: 0.001 54 | LR_FACTOR: 0.1 55 | LR_STEP: 56 | - 90 57 | - 120 58 | WD: 0.0001 59 | GAMMA1: 0.99 60 | GAMMA2: 0.0 61 | MOMENTUM: 0.9 62 | NESTEROV: false 63 | TEST: 64 | BATCH_SIZE: 32 65 | FLIP_TEST: false 66 | MODEL_FILE: '' 67 | DEBUG: 68 | DEBUG: false 69 | SAVE_BATCH_IMAGES_GT: true 70 | SAVE_BATCH_IMAGES_PRED: true 71 | SAVE_HEATMAPS_GT: true 72 | SAVE_HEATMAPS_PRED: true 73 | -------------------------------------------------------------------------------- /experiments/mpii/resnet101/384x384_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | GPUS: '0' 2 | DATA_DIR: '' 3 | OUTPUT_DIR: 'output' 4 | LOG_DIR: 'log' 5 | WORKERS: 4 6 | PRINT_FREQ: 100 7 | 8 | DATASET: 9 | DATASET: mpii 10 | ROOT: 'data/mpii/' 11 | TEST_SET: valid 12 | TRAIN_SET: train 13 | FLIP: true 14 | ROT_FACTOR: 30 15 | SCALE_FACTOR: 0.25 16 | MODEL: 17 | NAME: pose_resnet 18 | PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth' 19 | IMAGE_SIZE: 20 | - 384 21 | - 384 22 | NUM_JOINTS: 16 23 | EXTRA: 24 | TARGET_TYPE: gaussian 25 | HEATMAP_SIZE: 26 | - 96 27 | - 96 28 | SIGMA: 3 29 | FINAL_CONV_KERNEL: 1 30 | DECONV_WITH_BIAS: false 31 | NUM_DECONV_LAYERS: 3 32 | NUM_DECONV_FILTERS: 33 | - 256 34 | - 256 35 | - 256 36 | NUM_DECONV_KERNELS: 37 | - 4 38 | - 4 39 | - 4 40 | NUM_LAYERS: 101 41 | LOSS: 42 | USE_TARGET_WEIGHT: true 43 | TRAIN: 44 | BATCH_SIZE: 32 45 | SHUFFLE: true 46 | BEGIN_EPOCH: 0 47 | END_EPOCH: 140 48 | RESUME: false 49 | OPTIMIZER: adam 50 | LR: 0.001 51 | LR_FACTOR: 0.1 52 | LR_STEP: 53 | - 90 54 | - 120 55 | WD: 0.0001 56 | GAMMA1: 0.99 57 | GAMMA2: 0.0 58 | MOMENTUM: 0.9 59 | NESTEROV: false 60 | TEST: 61 | BATCH_SIZE: 32 62 | FLIP_TEST: false 63 | MODEL_FILE: '' 64 | DEBUG: 65 | DEBUG: false 66 | SAVE_BATCH_IMAGES_GT: true 67 | SAVE_BATCH_IMAGES_PRED: true 68 | SAVE_HEATMAPS_GT: true 69 | SAVE_HEATMAPS_PRED: true 70 | -------------------------------------------------------------------------------- /experiments/mpii/resnet152/256x256_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | GPUS: '0' 2 | DATA_DIR: '' 3 | OUTPUT_DIR: 'output' 4 | LOG_DIR: 'log' 5 | WORKERS: 4 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | DATASET: mpii 13 | ROOT: 'data/mpii/' 14 | TEST_SET: valid 15 | TRAIN_SET: train 16 | FLIP: true 17 | ROT_FACTOR: 30 18 | SCALE_FACTOR: 0.25 19 | MODEL: 20 | NAME: pose_resnet 21 | PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth' 22 | IMAGE_SIZE: 23 | - 256 24 | - 256 25 | NUM_JOINTS: 16 26 | EXTRA: 27 | TARGET_TYPE: gaussian 28 | SIGMA: 2 29 | HEATMAP_SIZE: 30 | - 64 31 | - 64 32 | FINAL_CONV_KERNEL: 1 33 | DECONV_WITH_BIAS: false 34 | NUM_DECONV_LAYERS: 3 35 | NUM_DECONV_FILTERS: 36 | - 256 37 | - 256 38 | - 256 39 | NUM_DECONV_KERNELS: 40 | - 4 41 | - 4 42 | - 4 43 | NUM_LAYERS: 152 44 | LOSS: 45 | USE_TARGET_WEIGHT: true 46 | TRAIN: 47 | BATCH_SIZE: 32 48 | SHUFFLE: true 49 | BEGIN_EPOCH: 0 50 | END_EPOCH: 140 51 | RESUME: false 52 | OPTIMIZER: adam 53 | LR: 0.001 54 | LR_FACTOR: 0.1 55 | LR_STEP: 56 | - 90 57 | - 120 58 | WD: 0.0001 59 | GAMMA1: 0.99 60 | GAMMA2: 0.0 61 | MOMENTUM: 0.9 62 | NESTEROV: false 63 | TEST: 64 | BATCH_SIZE: 32 65 | FLIP_TEST: false 66 | MODEL_FILE: '' 67 | DEBUG: 68 | DEBUG: false 69 | SAVE_BATCH_IMAGES_GT: true 70 | SAVE_BATCH_IMAGES_PRED: true 71 | SAVE_HEATMAPS_GT: true 72 | SAVE_HEATMAPS_PRED: true 73 | -------------------------------------------------------------------------------- /experiments/mpii/resnet152/384x384_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | GPUS: '0' 2 | DATA_DIR: '' 3 | OUTPUT_DIR: 'output' 4 | LOG_DIR: 'log' 5 | WORKERS: 4 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | DATASET: mpii 13 | ROOT: 'data/mpii/' 14 | TEST_SET: valid 15 | TRAIN_SET: train 16 | FLIP: true 17 | ROT_FACTOR: 30 18 | SCALE_FACTOR: 0.25 19 | MODEL: 20 | NAME: pose_resnet 21 | PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth' 22 | IMAGE_SIZE: 23 | - 384 24 | - 384 25 | NUM_JOINTS: 16 26 | EXTRA: 27 | TARGET_TYPE: gaussian 28 | SIGMA: 3 29 | HEATMAP_SIZE: 30 | - 96 31 | - 96 32 | FINAL_CONV_KERNEL: 1 33 | DECONV_WITH_BIAS: false 34 | NUM_DECONV_LAYERS: 3 35 | NUM_DECONV_FILTERS: 36 | - 256 37 | - 256 38 | - 256 39 | NUM_DECONV_KERNELS: 40 | - 4 41 | - 4 42 | - 4 43 | NUM_LAYERS: 152 44 | LOSS: 45 | USE_TARGET_WEIGHT: true 46 | TRAIN: 47 | BATCH_SIZE: 24 48 | SHUFFLE: true 49 | BEGIN_EPOCH: 0 50 | END_EPOCH: 140 51 | RESUME: false 52 | OPTIMIZER: adam 53 | LR: 0.001 54 | LR_FACTOR: 0.1 55 | LR_STEP: 56 | - 90 57 | - 120 58 | WD: 0.0001 59 | GAMMA1: 0.99 60 | GAMMA2: 0.0 61 | MOMENTUM: 0.9 62 | NESTEROV: false 63 | TEST: 64 | BATCH_SIZE: 32 65 | FLIP_TEST: false 66 | MODEL_FILE: '' 67 | DEBUG: 68 | DEBUG: false 69 | SAVE_BATCH_IMAGES_GT: true 70 | SAVE_BATCH_IMAGES_PRED: true 71 | SAVE_HEATMAPS_GT: true 72 | SAVE_HEATMAPS_PRED: true 73 | -------------------------------------------------------------------------------- /experiments/mpii/resnet50/256x256_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | GPUS: '0' 2 | DATA_DIR: '' 3 | OUTPUT_DIR: 'output' 4 | LOG_DIR: 'log' 5 | WORKERS: 4 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | DATASET: mpii 13 | ROOT: 'data/mpii/' 14 | TEST_SET: valid 15 | TRAIN_SET: train 16 | FLIP: true 17 | ROT_FACTOR: 30 18 | SCALE_FACTOR: 0.25 19 | MODEL: 20 | NAME: pose_resnet 21 | PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth' 22 | IMAGE_SIZE: 23 | - 256 24 | - 256 25 | NUM_JOINTS: 16 26 | EXTRA: 27 | TARGET_TYPE: gaussian 28 | SIGMA: 2 29 | HEATMAP_SIZE: 30 | - 64 31 | - 64 32 | FINAL_CONV_KERNEL: 1 33 | DECONV_WITH_BIAS: false 34 | NUM_DECONV_LAYERS: 3 35 | NUM_DECONV_FILTERS: 36 | - 256 37 | - 256 38 | - 256 39 | NUM_DECONV_KERNELS: 40 | - 4 41 | - 4 42 | - 4 43 | NUM_LAYERS: 50 44 | LOSS: 45 | USE_TARGET_WEIGHT: true 46 | TRAIN: 47 | BATCH_SIZE: 32 48 | SHUFFLE: true 49 | BEGIN_EPOCH: 0 50 | END_EPOCH: 140 51 | RESUME: false 52 | OPTIMIZER: adam 53 | LR: 0.001 54 | LR_FACTOR: 0.1 55 | LR_STEP: 56 | - 90 57 | - 120 58 | WD: 0.0001 59 | GAMMA1: 0.99 60 | GAMMA2: 0.0 61 | MOMENTUM: 0.9 62 | NESTEROV: false 63 | TEST: 64 | BATCH_SIZE: 32 65 | FLIP_TEST: false 66 | MODEL_FILE: '' 67 | DEBUG: 68 | DEBUG: false 69 | SAVE_BATCH_IMAGES_GT: true 70 | SAVE_BATCH_IMAGES_PRED: true 71 | SAVE_HEATMAPS_GT: true 72 | SAVE_HEATMAPS_PRED: true 73 | -------------------------------------------------------------------------------- /experiments/mpii/resnet50/384x384_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | GPUS: '0' 2 | DATA_DIR: '' 3 | OUTPUT_DIR: 'output' 4 | LOG_DIR: 'log' 5 | WORKERS: 4 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | DATASET: mpii 13 | ROOT: 'data/mpii/' 14 | TEST_SET: valid 15 | TRAIN_SET: train 16 | FLIP: true 17 | ROT_FACTOR: 30 18 | SCALE_FACTOR: 0.25 19 | MODEL: 20 | NAME: pose_resnet 21 | PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth' 22 | IMAGE_SIZE: 23 | - 384 24 | - 384 25 | NUM_JOINTS: 16 26 | EXTRA: 27 | TARGET_TYPE: gaussian 28 | SIGMA: 3 29 | HEATMAP_SIZE: 30 | - 96 31 | - 96 32 | FINAL_CONV_KERNEL: 1 33 | DECONV_WITH_BIAS: false 34 | NUM_DECONV_LAYERS: 3 35 | NUM_DECONV_FILTERS: 36 | - 256 37 | - 256 38 | - 256 39 | NUM_DECONV_KERNELS: 40 | - 4 41 | - 4 42 | - 4 43 | NUM_LAYERS: 50 44 | LOSS: 45 | USE_TARGET_WEIGHT: true 46 | TRAIN: 47 | BATCH_SIZE: 32 48 | SHUFFLE: true 49 | BEGIN_EPOCH: 0 50 | END_EPOCH: 140 51 | RESUME: false 52 | OPTIMIZER: adam 53 | LR: 0.001 54 | LR_FACTOR: 0.1 55 | LR_STEP: 56 | - 90 57 | - 120 58 | WD: 0.0001 59 | GAMMA1: 0.99 60 | GAMMA2: 0.0 61 | MOMENTUM: 0.9 62 | NESTEROV: false 63 | TEST: 64 | BATCH_SIZE: 32 65 | FLIP_TEST: false 66 | MODEL_FILE: '' 67 | DEBUG: 68 | DEBUG: false 69 | SAVE_BATCH_IMAGES_GT: true 70 | SAVE_BATCH_IMAGES_PRED: true 71 | SAVE_HEATMAPS_GT: true 72 | SAVE_HEATMAPS_PRED: true 73 | -------------------------------------------------------------------------------- /lib/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | cd nms; python setup.py build_ext --inplace; rm -rf build; cd ../../ 3 | clean: 4 | cd nms; rm *.so; cd ../../ 5 | -------------------------------------------------------------------------------- /lib/core/config.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import yaml 13 | 14 | import numpy as np 15 | from easydict import EasyDict as edict 16 | 17 | 18 | config = edict() 19 | 20 | config.OUTPUT_DIR = '' 21 | config.LOG_DIR = '' 22 | config.DATA_DIR = '' 23 | config.GPUS = '0' 24 | config.WORKERS = 4 25 | config.PRINT_FREQ = 20 26 | 27 | # Cudnn related params 28 | config.CUDNN = edict() 29 | config.CUDNN.BENCHMARK = True 30 | config.CUDNN.DETERMINISTIC = False 31 | config.CUDNN.ENABLED = True 32 | 33 | # pose_resnet related params 34 | POSE_RESNET = edict() 35 | POSE_RESNET.NUM_LAYERS = 50 36 | POSE_RESNET.DECONV_WITH_BIAS = False 37 | POSE_RESNET.NUM_DECONV_LAYERS = 3 38 | POSE_RESNET.NUM_DECONV_FILTERS = [256, 256, 256] 39 | POSE_RESNET.NUM_DECONV_KERNELS = [4, 4, 4] 40 | POSE_RESNET.FINAL_CONV_KERNEL = 1 41 | POSE_RESNET.TARGET_TYPE = 'gaussian' 42 | POSE_RESNET.HEATMAP_SIZE = [64, 64] # width * height, ex: 24 * 32 43 | POSE_RESNET.SIGMA = 2 44 | 45 | MODEL_EXTRAS = { 46 | 'pose_resnet': POSE_RESNET, 47 | } 48 | 49 | # common params for NETWORK 50 | config.MODEL = edict() 51 | config.MODEL.NAME = 'pose_resnet' 52 | config.MODEL.INIT_WEIGHTS = True 53 | config.MODEL.PRETRAINED = '' 54 | config.MODEL.NUM_JOINTS = 16 55 | config.MODEL.IMAGE_SIZE = [256, 256] # width * height, ex: 192 * 256 56 | config.MODEL.EXTRA = MODEL_EXTRAS[config.MODEL.NAME] 57 | 58 | config.MODEL.STYLE = 'pytorch' 59 | 60 | config.LOSS = edict() 61 | config.LOSS.USE_TARGET_WEIGHT = True 62 | 63 | # DATASET related params 64 | config.DATASET = edict() 65 | config.DATASET.ROOT = '' 66 | config.DATASET.DATASET = 'mpii' 67 | config.DATASET.TRAIN_SET = 'train' 68 | config.DATASET.TEST_SET = 'valid' 69 | config.DATASET.DATA_FORMAT = 'jpg' 70 | config.DATASET.HYBRID_JOINTS_TYPE = '' 71 | config.DATASET.SELECT_DATA = False 72 | 73 | # training data augmentation 74 | config.DATASET.FLIP = True 75 | config.DATASET.SCALE_FACTOR = 0.25 76 | config.DATASET.ROT_FACTOR = 30 77 | 78 | # train 79 | config.TRAIN = edict() 80 | 81 | config.TRAIN.LR_FACTOR = 0.1 82 | config.TRAIN.LR_STEP = [90, 110] 83 | config.TRAIN.LR = 0.001 84 | 85 | config.TRAIN.OPTIMIZER = 'adam' 86 | config.TRAIN.MOMENTUM = 0.9 87 | config.TRAIN.WD = 0.0001 88 | config.TRAIN.NESTEROV = False 89 | config.TRAIN.GAMMA1 = 0.99 90 | config.TRAIN.GAMMA2 = 0.0 91 | 92 | config.TRAIN.BEGIN_EPOCH = 0 93 | config.TRAIN.END_EPOCH = 140 94 | 95 | config.TRAIN.RESUME = False 96 | config.TRAIN.CHECKPOINT = '' 97 | 98 | config.TRAIN.BATCH_SIZE = 32 99 | config.TRAIN.SHUFFLE = True 100 | 101 | # testing 102 | config.TEST = edict() 103 | 104 | # size of images for each device 105 | config.TEST.BATCH_SIZE = 32 106 | # Test Model Epoch 107 | config.TEST.FLIP_TEST = False 108 | config.TEST.POST_PROCESS = True 109 | config.TEST.SHIFT_HEATMAP = True 110 | 111 | config.TEST.USE_GT_BBOX = False 112 | # nms 113 | config.TEST.OKS_THRE = 0.5 114 | config.TEST.IN_VIS_THRE = 0.0 115 | config.TEST.COCO_BBOX_FILE = '' 116 | config.TEST.BBOX_THRE = 1.0 117 | config.TEST.MODEL_FILE = '' 118 | config.TEST.IMAGE_THRE = 0.0 119 | config.TEST.NMS_THRE = 1.0 120 | 121 | # debug 122 | config.DEBUG = edict() 123 | config.DEBUG.DEBUG = False 124 | config.DEBUG.SAVE_BATCH_IMAGES_GT = False 125 | config.DEBUG.SAVE_BATCH_IMAGES_PRED = False 126 | config.DEBUG.SAVE_HEATMAPS_GT = False 127 | config.DEBUG.SAVE_HEATMAPS_PRED = False 128 | 129 | 130 | def _update_dict(k, v): 131 | if k == 'DATASET': 132 | if 'MEAN' in v and v['MEAN']: 133 | v['MEAN'] = np.array([eval(x) if isinstance(x, str) else x 134 | for x in v['MEAN']]) 135 | if 'STD' in v and v['STD']: 136 | v['STD'] = np.array([eval(x) if isinstance(x, str) else x 137 | for x in v['STD']]) 138 | if k == 'MODEL': 139 | if 'EXTRA' in v and 'HEATMAP_SIZE' in v['EXTRA']: 140 | if isinstance(v['EXTRA']['HEATMAP_SIZE'], int): 141 | v['EXTRA']['HEATMAP_SIZE'] = np.array( 142 | [v['EXTRA']['HEATMAP_SIZE'], v['EXTRA']['HEATMAP_SIZE']]) 143 | else: 144 | v['EXTRA']['HEATMAP_SIZE'] = np.array( 145 | v['EXTRA']['HEATMAP_SIZE']) 146 | if 'IMAGE_SIZE' in v: 147 | if isinstance(v['IMAGE_SIZE'], int): 148 | v['IMAGE_SIZE'] = np.array([v['IMAGE_SIZE'], v['IMAGE_SIZE']]) 149 | else: 150 | v['IMAGE_SIZE'] = np.array(v['IMAGE_SIZE']) 151 | for vk, vv in v.items(): 152 | if vk in config[k]: 153 | config[k][vk] = vv 154 | else: 155 | raise ValueError("{}.{} not exist in config.py".format(k, vk)) 156 | 157 | 158 | def update_config(config_file): 159 | exp_config = None 160 | with open(config_file) as f: 161 | exp_config = edict(yaml.load(f)) 162 | for k, v in exp_config.items(): 163 | if k in config: 164 | if isinstance(v, dict): 165 | _update_dict(k, v) 166 | else: 167 | if k == 'SCALES': 168 | config[k][0] = (tuple(v)) 169 | else: 170 | config[k] = v 171 | else: 172 | raise ValueError("{} not exist in config.py".format(k)) 173 | 174 | 175 | def gen_config(config_file): 176 | cfg = dict(config) 177 | for k, v in cfg.items(): 178 | if isinstance(v, edict): 179 | cfg[k] = dict(v) 180 | 181 | with open(config_file, 'w') as f: 182 | yaml.dump(dict(cfg), f, default_flow_style=False) 183 | 184 | 185 | def update_dir(model_dir, log_dir, data_dir): 186 | if model_dir: 187 | config.OUTPUT_DIR = model_dir 188 | 189 | if log_dir: 190 | config.LOG_DIR = log_dir 191 | 192 | if data_dir: 193 | config.DATA_DIR = data_dir 194 | 195 | config.DATASET.ROOT = os.path.join( 196 | config.DATA_DIR, config.DATASET.ROOT) 197 | 198 | config.TEST.COCO_BBOX_FILE = os.path.join( 199 | config.DATA_DIR, config.TEST.COCO_BBOX_FILE) 200 | 201 | config.MODEL.PRETRAINED = os.path.join( 202 | config.DATA_DIR, config.MODEL.PRETRAINED) 203 | 204 | 205 | def get_model_name(cfg): 206 | name = cfg.MODEL.NAME 207 | full_name = cfg.MODEL.NAME 208 | extra = cfg.MODEL.EXTRA 209 | if name in ['pose_resnet']: 210 | name = '{model}_{num_layers}'.format( 211 | model=name, 212 | num_layers=extra.NUM_LAYERS) 213 | deconv_suffix = ''.join( 214 | 'd{}'.format(num_filters) 215 | for num_filters in extra.NUM_DECONV_FILTERS) 216 | full_name = '{height}x{width}_{name}_{deconv_suffix}'.format( 217 | height=cfg.MODEL.IMAGE_SIZE[1], 218 | width=cfg.MODEL.IMAGE_SIZE[0], 219 | name=name, 220 | deconv_suffix=deconv_suffix) 221 | else: 222 | raise ValueError('Unkown model: {}'.format(cfg.MODEL)) 223 | 224 | return name, full_name 225 | 226 | 227 | if __name__ == '__main__': 228 | import sys 229 | gen_config(sys.argv[1]) 230 | -------------------------------------------------------------------------------- /lib/core/evaluate.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | 13 | from core.inference import get_max_preds 14 | 15 | 16 | def calc_dists(preds, target, normalize): 17 | preds = preds.astype(np.float32) 18 | target = target.astype(np.float32) 19 | dists = np.zeros((preds.shape[1], preds.shape[0])) 20 | for n in range(preds.shape[0]): 21 | for c in range(preds.shape[1]): 22 | if target[n, c, 0] > 1 and target[n, c, 1] > 1: 23 | normed_preds = preds[n, c, :] / normalize[n] 24 | normed_targets = target[n, c, :] / normalize[n] 25 | dists[c, n] = np.linalg.norm(normed_preds - normed_targets) 26 | else: 27 | dists[c, n] = -1 28 | return dists 29 | 30 | 31 | def dist_acc(dists, thr=0.5): 32 | ''' Return percentage below threshold while ignoring values with a -1 ''' 33 | dist_cal = np.not_equal(dists, -1) 34 | num_dist_cal = dist_cal.sum() 35 | if num_dist_cal > 0: 36 | return np.less(dists[dist_cal], thr).sum() * 1.0 / num_dist_cal 37 | else: 38 | return -1 39 | 40 | 41 | def accuracy(output, target, hm_type='gaussian', thr=0.5): 42 | ''' 43 | Calculate accuracy according to PCK, 44 | but uses ground truth heatmap rather than x,y locations 45 | First value to be returned is average accuracy across 'idxs', 46 | followed by individual accuracies 47 | ''' 48 | idx = list(range(output.shape[1])) 49 | norm = 1.0 50 | if hm_type == 'gaussian': 51 | pred, _ = get_max_preds(output) 52 | target, _ = get_max_preds(target) 53 | h = output.shape[2] 54 | w = output.shape[3] 55 | norm = np.ones((pred.shape[0], 2)) * np.array([h, w]) / 10 56 | dists = calc_dists(pred, target, norm) 57 | 58 | acc = np.zeros((len(idx) + 1)) 59 | avg_acc = 0 60 | cnt = 0 61 | 62 | for i in range(len(idx)): 63 | acc[i + 1] = dist_acc(dists[idx[i]]) 64 | if acc[i + 1] >= 0: 65 | avg_acc = avg_acc + acc[i + 1] 66 | cnt += 1 67 | 68 | avg_acc = avg_acc / cnt if cnt != 0 else 0 69 | if cnt != 0: 70 | acc[0] = avg_acc 71 | return acc, avg_acc, cnt, pred 72 | -------------------------------------------------------------------------------- /lib/core/function.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import logging 12 | import time 13 | import os 14 | 15 | import numpy as np 16 | import torch 17 | 18 | from core.config import get_model_name 19 | from core.evaluate import accuracy 20 | from core.inference import get_final_preds 21 | from utils.transforms import flip_back 22 | from utils.vis import save_debug_images 23 | 24 | 25 | logger = logging.getLogger(__name__) 26 | 27 | 28 | def train(config, train_loader, model, criterion, optimizer, epoch, 29 | output_dir, tb_log_dir, writer_dict): 30 | batch_time = AverageMeter() 31 | data_time = AverageMeter() 32 | losses = AverageMeter() 33 | acc = AverageMeter() 34 | 35 | # switch to train mode 36 | model.train() 37 | 38 | end = time.time() 39 | for i, (input, target, target_weight, meta) in enumerate(train_loader): 40 | # measure data loading time 41 | data_time.update(time.time() - end) 42 | 43 | # compute output 44 | output = model(input) 45 | target = target.cuda(non_blocking=True) 46 | target_weight = target_weight.cuda(non_blocking=True) 47 | 48 | loss = criterion(output, target, target_weight) 49 | 50 | # compute gradient and do update step 51 | optimizer.zero_grad() 52 | loss.backward() 53 | optimizer.step() 54 | 55 | # measure accuracy and record loss 56 | losses.update(loss.item(), input.size(0)) 57 | 58 | _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(), 59 | target.detach().cpu().numpy()) 60 | acc.update(avg_acc, cnt) 61 | 62 | # measure elapsed time 63 | batch_time.update(time.time() - end) 64 | end = time.time() 65 | 66 | if i % config.PRINT_FREQ == 0: 67 | msg = 'Epoch: [{0}][{1}/{2}]\t' \ 68 | 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 69 | 'Speed {speed:.1f} samples/s\t' \ 70 | 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 71 | 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 72 | 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( 73 | epoch, i, len(train_loader), batch_time=batch_time, 74 | speed=input.size(0)/batch_time.val, 75 | data_time=data_time, loss=losses, acc=acc) 76 | logger.info(msg) 77 | 78 | writer = writer_dict['writer'] 79 | global_steps = writer_dict['train_global_steps'] 80 | writer.add_scalar('train_loss', losses.val, global_steps) 81 | writer.add_scalar('train_acc', acc.val, global_steps) 82 | writer_dict['train_global_steps'] = global_steps + 1 83 | 84 | prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) 85 | save_debug_images(config, input, meta, target, pred*4, output, 86 | prefix) 87 | 88 | 89 | def validate(config, val_loader, val_dataset, model, criterion, output_dir, 90 | tb_log_dir, writer_dict=None): 91 | batch_time = AverageMeter() 92 | losses = AverageMeter() 93 | acc = AverageMeter() 94 | 95 | # switch to evaluate mode 96 | model.eval() 97 | 98 | num_samples = len(val_dataset) 99 | all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), 100 | dtype=np.float32) 101 | all_boxes = np.zeros((num_samples, 6)) 102 | image_path = [] 103 | filenames = [] 104 | imgnums = [] 105 | idx = 0 106 | with torch.no_grad(): 107 | end = time.time() 108 | for i, (input, target, target_weight, meta) in enumerate(val_loader): 109 | # compute output 110 | output = model(input) 111 | if config.TEST.FLIP_TEST: 112 | # this part is ugly, because pytorch has not supported negative index 113 | # input_flipped = model(input[:, :, :, ::-1]) 114 | input_flipped = np.flip(input.cpu().numpy(), 3).copy() 115 | input_flipped = torch.from_numpy(input_flipped).cuda() 116 | output_flipped = model(input_flipped) 117 | output_flipped = flip_back(output_flipped.cpu().numpy(), 118 | val_dataset.flip_pairs) 119 | output_flipped = torch.from_numpy(output_flipped.copy()).cuda() 120 | 121 | # feature is not aligned, shift flipped heatmap for higher accuracy 122 | if config.TEST.SHIFT_HEATMAP: 123 | output_flipped[:, :, :, 1:] = \ 124 | output_flipped.clone()[:, :, :, 0:-1] 125 | # output_flipped[:, :, :, 0] = 0 126 | 127 | output = (output + output_flipped) * 0.5 128 | 129 | target = target.cuda(non_blocking=True) 130 | target_weight = target_weight.cuda(non_blocking=True) 131 | 132 | loss = criterion(output, target, target_weight) 133 | 134 | num_images = input.size(0) 135 | # measure accuracy and record loss 136 | losses.update(loss.item(), num_images) 137 | _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), 138 | target.cpu().numpy()) 139 | 140 | acc.update(avg_acc, cnt) 141 | 142 | # measure elapsed time 143 | batch_time.update(time.time() - end) 144 | end = time.time() 145 | 146 | c = meta['center'].numpy() 147 | s = meta['scale'].numpy() 148 | score = meta['score'].numpy() 149 | 150 | preds, maxvals = get_final_preds( 151 | config, output.clone().cpu().numpy(), c, s) 152 | 153 | all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] 154 | all_preds[idx:idx + num_images, :, 2:3] = maxvals 155 | # double check this all_boxes parts 156 | all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] 157 | all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] 158 | all_boxes[idx:idx + num_images, 4] = np.prod(s*200, 1) 159 | all_boxes[idx:idx + num_images, 5] = score 160 | image_path.extend(meta['image']) 161 | if config.DATASET.DATASET == 'posetrack': 162 | filenames.extend(meta['filename']) 163 | imgnums.extend(meta['imgnum'].numpy()) 164 | 165 | idx += num_images 166 | 167 | if i % config.PRINT_FREQ == 0: 168 | msg = 'Test: [{0}/{1}]\t' \ 169 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 170 | 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 171 | 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( 172 | i, len(val_loader), batch_time=batch_time, 173 | loss=losses, acc=acc) 174 | logger.info(msg) 175 | 176 | prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i) 177 | save_debug_images(config, input, meta, target, pred*4, output, 178 | prefix) 179 | 180 | name_values, perf_indicator = val_dataset.evaluate( 181 | config, all_preds, output_dir, all_boxes, image_path, 182 | filenames, imgnums) 183 | 184 | _, full_arch_name = get_model_name(config) 185 | if isinstance(name_values, list): 186 | for name_value in name_values: 187 | _print_name_value(name_value, full_arch_name) 188 | else: 189 | _print_name_value(name_values, full_arch_name) 190 | 191 | if writer_dict: 192 | writer = writer_dict['writer'] 193 | global_steps = writer_dict['valid_global_steps'] 194 | writer.add_scalar('valid_loss', losses.avg, global_steps) 195 | writer.add_scalar('valid_acc', acc.avg, global_steps) 196 | if isinstance(name_values, list): 197 | for name_value in name_values: 198 | writer.add_scalars('valid', dict(name_value), global_steps) 199 | else: 200 | writer.add_scalars('valid', dict(name_values), global_steps) 201 | writer_dict['valid_global_steps'] = global_steps + 1 202 | 203 | return perf_indicator 204 | 205 | 206 | # markdown format output 207 | def _print_name_value(name_value, full_arch_name): 208 | names = name_value.keys() 209 | values = name_value.values() 210 | num_values = len(name_value) 211 | logger.info( 212 | '| Arch ' + 213 | ' '.join(['| {}'.format(name) for name in names]) + 214 | ' |' 215 | ) 216 | logger.info('|---' * (num_values+1) + '|') 217 | logger.info( 218 | '| ' + full_arch_name + ' ' + 219 | ' '.join(['| {:.3f}'.format(value) for value in values]) + 220 | ' |' 221 | ) 222 | 223 | 224 | class AverageMeter(object): 225 | """Computes and stores the average and current value""" 226 | def __init__(self): 227 | self.reset() 228 | 229 | def reset(self): 230 | self.val = 0 231 | self.avg = 0 232 | self.sum = 0 233 | self.count = 0 234 | 235 | def update(self, val, n=1): 236 | self.val = val 237 | self.sum += val * n 238 | self.count += n 239 | self.avg = self.sum / self.count if self.count != 0 else 0 240 | -------------------------------------------------------------------------------- /lib/core/inference.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import math 12 | 13 | import numpy as np 14 | 15 | from utils.transforms import transform_preds 16 | 17 | 18 | def get_max_preds(batch_heatmaps): 19 | ''' 20 | get predictions from score maps 21 | heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) 22 | ''' 23 | assert isinstance(batch_heatmaps, np.ndarray), \ 24 | 'batch_heatmaps should be numpy.ndarray' 25 | assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim' 26 | 27 | batch_size = batch_heatmaps.shape[0] 28 | num_joints = batch_heatmaps.shape[1] 29 | width = batch_heatmaps.shape[3] 30 | heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) 31 | idx = np.argmax(heatmaps_reshaped, 2) 32 | maxvals = np.amax(heatmaps_reshaped, 2) 33 | 34 | maxvals = maxvals.reshape((batch_size, num_joints, 1)) 35 | idx = idx.reshape((batch_size, num_joints, 1)) 36 | 37 | preds = np.tile(idx, (1, 1, 2)).astype(np.float32) 38 | 39 | preds[:, :, 0] = (preds[:, :, 0]) % width 40 | preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) 41 | 42 | pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) 43 | pred_mask = pred_mask.astype(np.float32) 44 | 45 | preds *= pred_mask 46 | return preds, maxvals 47 | 48 | 49 | def get_final_preds(config, batch_heatmaps, center, scale): 50 | coords, maxvals = get_max_preds(batch_heatmaps) 51 | 52 | heatmap_height = batch_heatmaps.shape[2] 53 | heatmap_width = batch_heatmaps.shape[3] 54 | 55 | # post-processing 56 | if config.TEST.POST_PROCESS: 57 | for n in range(coords.shape[0]): 58 | for p in range(coords.shape[1]): 59 | hm = batch_heatmaps[n][p] 60 | px = int(math.floor(coords[n][p][0] + 0.5)) 61 | py = int(math.floor(coords[n][p][1] + 0.5)) 62 | if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1: 63 | diff = np.array([hm[py][px+1] - hm[py][px-1], 64 | hm[py+1][px]-hm[py-1][px]]) 65 | coords[n][p] += np.sign(diff) * .25 66 | 67 | preds = coords.copy() 68 | 69 | # Transform back 70 | for i in range(coords.shape[0]): 71 | preds[i] = transform_preds(coords[i], center[i], scale[i], 72 | [heatmap_width, heatmap_height]) 73 | 74 | return preds, maxvals 75 | -------------------------------------------------------------------------------- /lib/core/loss.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import torch.nn as nn 12 | 13 | 14 | class JointsMSELoss(nn.Module): 15 | def __init__(self, use_target_weight): 16 | super(JointsMSELoss, self).__init__() 17 | self.criterion = nn.MSELoss(size_average=True) 18 | self.use_target_weight = use_target_weight 19 | 20 | def forward(self, output, target, target_weight): 21 | batch_size = output.size(0) 22 | num_joints = output.size(1) 23 | heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1) 24 | heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1) 25 | loss = 0 26 | 27 | for idx in range(num_joints): 28 | heatmap_pred = heatmaps_pred[idx].squeeze() 29 | heatmap_gt = heatmaps_gt[idx].squeeze() 30 | if self.use_target_weight: 31 | loss += 0.5 * self.criterion( 32 | heatmap_pred.mul(target_weight[:, idx]), 33 | heatmap_gt.mul(target_weight[:, idx]) 34 | ) 35 | else: 36 | loss += 0.5 * self.criterion(heatmap_pred, heatmap_gt) 37 | 38 | return loss / num_joints 39 | -------------------------------------------------------------------------------- /lib/dataset/JointsDataset.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import copy 12 | import logging 13 | import random 14 | 15 | import cv2 16 | import numpy as np 17 | import torch 18 | from torch.utils.data import Dataset 19 | 20 | from utils.transforms import get_affine_transform 21 | from utils.transforms import affine_transform 22 | from utils.transforms import fliplr_joints 23 | 24 | 25 | logger = logging.getLogger(__name__) 26 | 27 | 28 | class JointsDataset(Dataset): 29 | def __init__(self, cfg, root, image_set, is_train, transform=None): 30 | self.num_joints = 0 31 | self.pixel_std = 200 32 | self.flip_pairs = [] 33 | self.parent_ids = [] 34 | 35 | self.is_train = is_train 36 | self.root = root 37 | self.image_set = image_set 38 | 39 | self.output_path = cfg.OUTPUT_DIR 40 | self.data_format = cfg.DATASET.DATA_FORMAT 41 | 42 | self.scale_factor = cfg.DATASET.SCALE_FACTOR 43 | self.rotation_factor = cfg.DATASET.ROT_FACTOR 44 | self.flip = cfg.DATASET.FLIP 45 | 46 | self.image_size = cfg.MODEL.IMAGE_SIZE 47 | self.target_type = cfg.MODEL.EXTRA.TARGET_TYPE 48 | self.heatmap_size = cfg.MODEL.EXTRA.HEATMAP_SIZE 49 | self.sigma = cfg.MODEL.EXTRA.SIGMA 50 | 51 | self.transform = transform 52 | self.db = [] 53 | 54 | def _get_db(self): 55 | raise NotImplementedError 56 | 57 | def evaluate(self, cfg, preds, output_dir, *args, **kwargs): 58 | raise NotImplementedError 59 | 60 | def __len__(self,): 61 | return len(self.db) 62 | 63 | def __getitem__(self, idx): 64 | db_rec = copy.deepcopy(self.db[idx]) 65 | 66 | image_file = db_rec['image'] 67 | filename = db_rec['filename'] if 'filename' in db_rec else '' 68 | imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' 69 | 70 | if self.data_format == 'zip': 71 | from utils import zipreader 72 | data_numpy = zipreader.imread( 73 | image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) 74 | else: 75 | data_numpy = cv2.imread( 76 | image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) 77 | 78 | if data_numpy is None: 79 | logger.error('=> fail to read {}'.format(image_file)) 80 | raise ValueError('Fail to read {}'.format(image_file)) 81 | 82 | joints = db_rec['joints_3d'] 83 | joints_vis = db_rec['joints_3d_vis'] 84 | 85 | c = db_rec['center'] 86 | s = db_rec['scale'] 87 | score = db_rec['score'] if 'score' in db_rec else 1 88 | r = 0 89 | 90 | if self.is_train: 91 | sf = self.scale_factor 92 | rf = self.rotation_factor 93 | s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) 94 | r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ 95 | if random.random() <= 0.6 else 0 96 | 97 | if self.flip and random.random() <= 0.5: 98 | data_numpy = data_numpy[:, ::-1, :] 99 | joints, joints_vis = fliplr_joints( 100 | joints, joints_vis, data_numpy.shape[1], self.flip_pairs) 101 | c[0] = data_numpy.shape[1] - c[0] - 1 102 | 103 | trans = get_affine_transform(c, s, r, self.image_size) 104 | input = cv2.warpAffine( 105 | data_numpy, 106 | trans, 107 | (int(self.image_size[0]), int(self.image_size[1])), 108 | flags=cv2.INTER_LINEAR) 109 | 110 | if self.transform: 111 | input = self.transform(input) 112 | 113 | for i in range(self.num_joints): 114 | if joints_vis[i, 0] > 0.0: 115 | joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) 116 | 117 | target, target_weight = self.generate_target(joints, joints_vis) 118 | 119 | target = torch.from_numpy(target) 120 | target_weight = torch.from_numpy(target_weight) 121 | 122 | meta = { 123 | 'image': image_file, 124 | 'filename': filename, 125 | 'imgnum': imgnum, 126 | 'joints': joints, 127 | 'joints_vis': joints_vis, 128 | 'center': c, 129 | 'scale': s, 130 | 'rotation': r, 131 | 'score': score 132 | } 133 | 134 | return input, target, target_weight, meta 135 | 136 | def select_data(self, db): 137 | db_selected = [] 138 | for rec in db: 139 | num_vis = 0 140 | joints_x = 0.0 141 | joints_y = 0.0 142 | for joint, joint_vis in zip( 143 | rec['joints_3d'], rec['joints_3d_vis']): 144 | if joint_vis[0] <= 0: 145 | continue 146 | num_vis += 1 147 | 148 | joints_x += joint[0] 149 | joints_y += joint[1] 150 | if num_vis == 0: 151 | continue 152 | 153 | joints_x, joints_y = joints_x / num_vis, joints_y / num_vis 154 | 155 | area = rec['scale'][0] * rec['scale'][1] * (self.pixel_std**2) 156 | joints_center = np.array([joints_x, joints_y]) 157 | bbox_center = np.array(rec['center']) 158 | diff_norm2 = np.linalg.norm((joints_center-bbox_center), 2) 159 | ks = np.exp(-1.0*(diff_norm2**2) / ((0.2)**2*2.0*area)) 160 | 161 | metric = (0.2 / 16) * num_vis + 0.45 - 0.2 / 16 162 | if ks > metric: 163 | db_selected.append(rec) 164 | 165 | logger.info('=> num db: {}'.format(len(db))) 166 | logger.info('=> num selected db: {}'.format(len(db_selected))) 167 | return db_selected 168 | 169 | def generate_target(self, joints, joints_vis): 170 | ''' 171 | :param joints: [num_joints, 3] 172 | :param joints_vis: [num_joints, 3] 173 | :return: target, target_weight(1: visible, 0: invisible) 174 | ''' 175 | target_weight = np.ones((self.num_joints, 1), dtype=np.float32) 176 | target_weight[:, 0] = joints_vis[:, 0] 177 | 178 | assert self.target_type == 'gaussian', \ 179 | 'Only support gaussian map now!' 180 | 181 | if self.target_type == 'gaussian': 182 | target = np.zeros((self.num_joints, 183 | self.heatmap_size[1], 184 | self.heatmap_size[0]), 185 | dtype=np.float32) 186 | 187 | tmp_size = self.sigma * 3 188 | 189 | for joint_id in range(self.num_joints): 190 | feat_stride = self.image_size / self.heatmap_size 191 | mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5) 192 | mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5) 193 | # Check that any part of the gaussian is in-bounds 194 | ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] 195 | br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] 196 | if ul[0] >= self.heatmap_size[0] or ul[1] >= self.heatmap_size[1] \ 197 | or br[0] < 0 or br[1] < 0: 198 | # If not, just return the image as is 199 | target_weight[joint_id] = 0 200 | continue 201 | 202 | # # Generate gaussian 203 | size = 2 * tmp_size + 1 204 | x = np.arange(0, size, 1, np.float32) 205 | y = x[:, np.newaxis] 206 | x0 = y0 = size // 2 207 | # The gaussian is not normalized, we want the center value to equal 1 208 | g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * self.sigma ** 2)) 209 | 210 | # Usable gaussian range 211 | g_x = max(0, -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0] 212 | g_y = max(0, -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1] 213 | # Image range 214 | img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0]) 215 | img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1]) 216 | 217 | v = target_weight[joint_id] 218 | if v > 0.5: 219 | target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \ 220 | g[g_y[0]:g_y[1], g_x[0]:g_x[1]] 221 | 222 | return target, target_weight 223 | -------------------------------------------------------------------------------- /lib/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from .mpii import MPIIDataset as mpii 12 | from .coco import COCODataset as coco 13 | -------------------------------------------------------------------------------- /lib/dataset/coco.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import logging 12 | import os 13 | import pickle 14 | from collections import defaultdict 15 | from collections import OrderedDict 16 | 17 | import json_tricks as json 18 | import numpy as np 19 | from pycocotools.coco import COCO 20 | from pycocotools.cocoeval import COCOeval 21 | 22 | from dataset.JointsDataset import JointsDataset 23 | from nms.nms import oks_nms 24 | 25 | 26 | logger = logging.getLogger(__name__) 27 | 28 | 29 | class COCODataset(JointsDataset): 30 | ''' 31 | "keypoints": { 32 | 0: "nose", 33 | 1: "left_eye", 34 | 2: "right_eye", 35 | 3: "left_ear", 36 | 4: "right_ear", 37 | 5: "left_shoulder", 38 | 6: "right_shoulder", 39 | 7: "left_elbow", 40 | 8: "right_elbow", 41 | 9: "left_wrist", 42 | 10: "right_wrist", 43 | 11: "left_hip", 44 | 12: "right_hip", 45 | 13: "left_knee", 46 | 14: "right_knee", 47 | 15: "left_ankle", 48 | 16: "right_ankle" 49 | }, 50 | "skeleton": [ 51 | [16,14],[14,12],[17,15],[15,13],[12,13],[6,12],[7,13], [6,7],[6,8], 52 | [7,9],[8,10],[9,11],[2,3],[1,2],[1,3],[2,4],[3,5],[4,6],[5,7]] 53 | ''' 54 | def __init__(self, cfg, root, image_set, is_train, transform=None): 55 | super().__init__(cfg, root, image_set, is_train, transform) 56 | self.nms_thre = cfg.TEST.NMS_THRE 57 | self.image_thre = cfg.TEST.IMAGE_THRE 58 | self.oks_thre = cfg.TEST.OKS_THRE 59 | self.in_vis_thre = cfg.TEST.IN_VIS_THRE 60 | self.bbox_file = cfg.TEST.COCO_BBOX_FILE 61 | self.use_gt_bbox = cfg.TEST.USE_GT_BBOX 62 | self.image_width = cfg.MODEL.IMAGE_SIZE[0] 63 | self.image_height = cfg.MODEL.IMAGE_SIZE[1] 64 | self.aspect_ratio = self.image_width * 1.0 / self.image_height 65 | self.pixel_std = 200 66 | self.coco = COCO(self._get_ann_file_keypoint()) 67 | 68 | # deal with class names 69 | cats = [cat['name'] 70 | for cat in self.coco.loadCats(self.coco.getCatIds())] 71 | self.classes = ['__background__'] + cats 72 | logger.info('=> classes: {}'.format(self.classes)) 73 | self.num_classes = len(self.classes) 74 | self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) 75 | self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) 76 | self._coco_ind_to_class_ind = dict([(self._class_to_coco_ind[cls], 77 | self._class_to_ind[cls]) 78 | for cls in self.classes[1:]]) 79 | 80 | # load image file names 81 | self.image_set_index = self._load_image_set_index() 82 | self.num_images = len(self.image_set_index) 83 | logger.info('=> num_images: {}'.format(self.num_images)) 84 | 85 | self.num_joints = 17 86 | self.flip_pairs = [[1, 2], [3, 4], [5, 6], [7, 8], 87 | [9, 10], [11, 12], [13, 14], [15, 16]] 88 | self.parent_ids = None 89 | 90 | self.db = self._get_db() 91 | 92 | if is_train and cfg.DATASET.SELECT_DATA: 93 | self.db = self.select_data(self.db) 94 | 95 | logger.info('=> load {} samples'.format(len(self.db))) 96 | 97 | def _get_ann_file_keypoint(self): 98 | """ self.root / annotations / person_keypoints_train2017.json """ 99 | prefix = 'person_keypoints' \ 100 | if 'test' not in self.image_set else 'image_info' 101 | return os.path.join(self.root, 'annotations', 102 | prefix + '_' + self.image_set + '.json') 103 | 104 | def _load_image_set_index(self): 105 | """ image id: int """ 106 | image_ids = self.coco.getImgIds() 107 | return image_ids 108 | 109 | def _get_db(self): 110 | if self.is_train or self.use_gt_bbox: 111 | # use ground truth bbox 112 | gt_db = self._load_coco_keypoint_annotations() 113 | else: 114 | # use bbox from detection 115 | gt_db = self._load_coco_person_detection_results() 116 | return gt_db 117 | 118 | def _load_coco_keypoint_annotations(self): 119 | """ ground truth bbox and keypoints """ 120 | gt_db = [] 121 | for index in self.image_set_index: 122 | gt_db.extend(self._load_coco_keypoint_annotation_kernal(index)) 123 | return gt_db 124 | 125 | def _load_coco_keypoint_annotation_kernal(self, index): 126 | """ 127 | coco ann: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] 128 | iscrowd: 129 | crowd instances are handled by marking their overlaps with all categories to -1 130 | and later excluded in training 131 | bbox: 132 | [x1, y1, w, h] 133 | :param index: coco image id 134 | :return: db entry 135 | """ 136 | im_ann = self.coco.loadImgs(index)[0] 137 | width = im_ann['width'] 138 | height = im_ann['height'] 139 | 140 | annIds = self.coco.getAnnIds(imgIds=index, iscrowd=False) 141 | objs = self.coco.loadAnns(annIds) 142 | 143 | # sanitize bboxes 144 | valid_objs = [] 145 | for obj in objs: 146 | x, y, w, h = obj['bbox'] 147 | x1 = np.max((0, x)) 148 | y1 = np.max((0, y)) 149 | x2 = np.min((width - 1, x1 + np.max((0, w - 1)))) 150 | y2 = np.min((height - 1, y1 + np.max((0, h - 1)))) 151 | if obj['area'] > 0 and x2 >= x1 and y2 >= y1: 152 | # obj['clean_bbox'] = [x1, y1, x2, y2] 153 | obj['clean_bbox'] = [x1, y1, x2-x1, y2-y1] 154 | valid_objs.append(obj) 155 | objs = valid_objs 156 | 157 | rec = [] 158 | for obj in objs: 159 | cls = self._coco_ind_to_class_ind[obj['category_id']] 160 | if cls != 1: 161 | continue 162 | 163 | # ignore objs without keypoints annotation 164 | if max(obj['keypoints']) == 0: 165 | continue 166 | 167 | joints_3d = np.zeros((self.num_joints, 3), dtype=np.float) 168 | joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float) 169 | for ipt in range(self.num_joints): 170 | joints_3d[ipt, 0] = obj['keypoints'][ipt * 3 + 0] 171 | joints_3d[ipt, 1] = obj['keypoints'][ipt * 3 + 1] 172 | joints_3d[ipt, 2] = 0 173 | t_vis = obj['keypoints'][ipt * 3 + 2] 174 | if t_vis > 1: 175 | t_vis = 1 176 | joints_3d_vis[ipt, 0] = t_vis 177 | joints_3d_vis[ipt, 1] = t_vis 178 | joints_3d_vis[ipt, 2] = 0 179 | 180 | center, scale = self._box2cs(obj['clean_bbox'][:4]) 181 | rec.append({ 182 | 'image': self.image_path_from_index(index), 183 | 'center': center, 184 | 'scale': scale, 185 | 'joints_3d': joints_3d, 186 | 'joints_3d_vis': joints_3d_vis, 187 | 'filename': '', 188 | 'imgnum': 0, 189 | }) 190 | 191 | return rec 192 | 193 | def _box2cs(self, box): 194 | x, y, w, h = box[:4] 195 | return self._xywh2cs(x, y, w, h) 196 | 197 | def _xywh2cs(self, x, y, w, h): 198 | center = np.zeros((2), dtype=np.float32) 199 | center[0] = x + w * 0.5 200 | center[1] = y + h * 0.5 201 | 202 | if w > self.aspect_ratio * h: 203 | h = w * 1.0 / self.aspect_ratio 204 | elif w < self.aspect_ratio * h: 205 | w = h * self.aspect_ratio 206 | scale = np.array( 207 | [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], 208 | dtype=np.float32) 209 | if center[0] != -1: 210 | scale = scale * 1.25 211 | 212 | return center, scale 213 | 214 | def image_path_from_index(self, index): 215 | """ example: images / train2017 / 000000119993.jpg """ 216 | file_name = '%012d.jpg' % index 217 | if '2014' in self.image_set: 218 | file_name = 'COCO_%s_' % self.image_set + file_name 219 | 220 | prefix = 'test2017' if 'test' in self.image_set else self.image_set 221 | 222 | data_name = prefix + '.zip@' if self.data_format == 'zip' else prefix 223 | 224 | image_path = os.path.join( 225 | self.root, 'images', data_name, file_name) 226 | 227 | return image_path 228 | 229 | def _load_coco_person_detection_results(self): 230 | all_boxes = None 231 | with open(self.bbox_file, 'r') as f: 232 | all_boxes = json.load(f) 233 | 234 | if not all_boxes: 235 | logger.error('=> Load %s fail!' % self.bbox_file) 236 | return None 237 | 238 | logger.info('=> Total boxes: {}'.format(len(all_boxes))) 239 | 240 | kpt_db = [] 241 | num_boxes = 0 242 | for n_img in range(0, len(all_boxes)): 243 | det_res = all_boxes[n_img] 244 | if det_res['category_id'] != 1: 245 | continue 246 | img_name = self.image_path_from_index(det_res['image_id']) 247 | box = det_res['bbox'] 248 | score = det_res['score'] 249 | 250 | if score < self.image_thre: 251 | continue 252 | 253 | num_boxes = num_boxes + 1 254 | 255 | center, scale = self._box2cs(box) 256 | joints_3d = np.zeros((self.num_joints, 3), dtype=np.float) 257 | joints_3d_vis = np.ones( 258 | (self.num_joints, 3), dtype=np.float) 259 | kpt_db.append({ 260 | 'image': img_name, 261 | 'center': center, 262 | 'scale': scale, 263 | 'score': score, 264 | 'joints_3d': joints_3d, 265 | 'joints_3d_vis': joints_3d_vis, 266 | }) 267 | 268 | logger.info('=> Total boxes after fliter low score@{}: {}'.format( 269 | self.image_thre, num_boxes)) 270 | return kpt_db 271 | 272 | # need double check this API and classes field 273 | def evaluate(self, cfg, preds, output_dir, all_boxes, img_path, 274 | *args, **kwargs): 275 | res_folder = os.path.join(output_dir, 'results') 276 | if not os.path.exists(res_folder): 277 | os.makedirs(res_folder) 278 | res_file = os.path.join( 279 | res_folder, 'keypoints_%s_results.json' % self.image_set) 280 | 281 | # person x (keypoints) 282 | _kpts = [] 283 | for idx, kpt in enumerate(preds): 284 | _kpts.append({ 285 | 'keypoints': kpt, 286 | 'center': all_boxes[idx][0:2], 287 | 'scale': all_boxes[idx][2:4], 288 | 'area': all_boxes[idx][4], 289 | 'score': all_boxes[idx][5], 290 | 'image': int(img_path[idx][-16:-4]) 291 | }) 292 | # image x person x (keypoints) 293 | kpts = defaultdict(list) 294 | for kpt in _kpts: 295 | kpts[kpt['image']].append(kpt) 296 | 297 | # rescoring and oks nms 298 | num_joints = self.num_joints 299 | in_vis_thre = self.in_vis_thre 300 | oks_thre = self.oks_thre 301 | oks_nmsed_kpts = [] 302 | for img in kpts.keys(): 303 | img_kpts = kpts[img] 304 | for n_p in img_kpts: 305 | box_score = n_p['score'] 306 | kpt_score = 0 307 | valid_num = 0 308 | for n_jt in range(0, num_joints): 309 | t_s = n_p['keypoints'][n_jt][2] 310 | if t_s > in_vis_thre: 311 | kpt_score = kpt_score + t_s 312 | valid_num = valid_num + 1 313 | if valid_num != 0: 314 | kpt_score = kpt_score / valid_num 315 | # rescoring 316 | n_p['score'] = kpt_score * box_score 317 | keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))], 318 | oks_thre) 319 | if len(keep) == 0: 320 | oks_nmsed_kpts.append(img_kpts) 321 | else: 322 | oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) 323 | 324 | self._write_coco_keypoint_results( 325 | oks_nmsed_kpts, res_file) 326 | if 'test' not in self.image_set: 327 | info_str = self._do_python_keypoint_eval( 328 | res_file, res_folder) 329 | name_value = OrderedDict(info_str) 330 | return name_value, name_value['AP'] 331 | else: 332 | return {'Null': 0}, 0 333 | 334 | def _write_coco_keypoint_results(self, keypoints, res_file): 335 | data_pack = [{'cat_id': self._class_to_coco_ind[cls], 336 | 'cls_ind': cls_ind, 337 | 'cls': cls, 338 | 'ann_type': 'keypoints', 339 | 'keypoints': keypoints 340 | } 341 | for cls_ind, cls in enumerate(self.classes) if not cls == '__background__'] 342 | 343 | results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) 344 | logger.info('=> Writing results json to %s' % res_file) 345 | with open(res_file, 'w') as f: 346 | json.dump(results, f, sort_keys=True, indent=4) 347 | try: 348 | json.load(open(res_file)) 349 | except Exception: 350 | content = [] 351 | with open(res_file, 'r') as f: 352 | for line in f: 353 | content.append(line) 354 | content[-1] = ']' 355 | with open(res_file, 'w') as f: 356 | for c in content: 357 | f.write(c) 358 | 359 | def _coco_keypoint_results_one_category_kernel(self, data_pack): 360 | cat_id = data_pack['cat_id'] 361 | keypoints = data_pack['keypoints'] 362 | cat_results = [] 363 | 364 | for img_kpts in keypoints: 365 | if len(img_kpts) == 0: 366 | continue 367 | 368 | _key_points = np.array([img_kpts[k]['keypoints'] 369 | for k in range(len(img_kpts))]) 370 | key_points = np.zeros( 371 | (_key_points.shape[0], self.num_joints * 3), dtype=np.float) 372 | 373 | for ipt in range(self.num_joints): 374 | key_points[:, ipt * 3 + 0] = _key_points[:, ipt, 0] 375 | key_points[:, ipt * 3 + 1] = _key_points[:, ipt, 1] 376 | key_points[:, ipt * 3 + 2] = _key_points[:, ipt, 2] # keypoints score. 377 | 378 | result = [{'image_id': img_kpts[k]['image'], 379 | 'category_id': cat_id, 380 | 'keypoints': list(key_points[k]), 381 | 'score': img_kpts[k]['score'], 382 | 'center': list(img_kpts[k]['center']), 383 | 'scale': list(img_kpts[k]['scale']) 384 | } for k in range(len(img_kpts))] 385 | cat_results.extend(result) 386 | 387 | return cat_results 388 | 389 | def _do_python_keypoint_eval(self, res_file, res_folder): 390 | coco_dt = self.coco.loadRes(res_file) 391 | coco_eval = COCOeval(self.coco, coco_dt, 'keypoints') 392 | coco_eval.params.useSegm = None 393 | coco_eval.evaluate() 394 | coco_eval.accumulate() 395 | coco_eval.summarize() 396 | stats_names = ['AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)'] 397 | 398 | info_str = [] 399 | for ind, name in enumerate(stats_names): 400 | info_str.append((name, coco_eval.stats[ind])) 401 | 402 | eval_file = os.path.join( 403 | res_folder, 'keypoints_%s_results.pkl' % self.image_set) 404 | 405 | with open(eval_file, 'wb') as f: 406 | pickle.dump(coco_eval, f, pickle.HIGHEST_PROTOCOL) 407 | logger.info('=> coco eval results saved to %s' % eval_file) 408 | 409 | return info_str 410 | -------------------------------------------------------------------------------- /lib/dataset/mpii.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from collections import OrderedDict 12 | import logging 13 | import os 14 | import json_tricks as json 15 | 16 | import numpy as np 17 | from scipy.io import loadmat, savemat 18 | 19 | from dataset.JointsDataset import JointsDataset 20 | 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | 25 | class MPIIDataset(JointsDataset): 26 | def __init__(self, cfg, root, image_set, is_train, transform=None): 27 | super().__init__(cfg, root, image_set, is_train, transform) 28 | 29 | self.num_joints = 16 30 | self.flip_pairs = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]] 31 | self.parent_ids = [1, 2, 6, 6, 3, 4, 6, 6, 7, 8, 11, 12, 7, 7, 13, 14] 32 | 33 | self.db = self._get_db() 34 | 35 | if is_train and cfg.DATASET.SELECT_DATA: 36 | self.db = self.select_data(self.db) 37 | 38 | logger.info('=> load {} samples'.format(len(self.db))) 39 | 40 | def _get_db(self): 41 | # create train/val split 42 | file_name = os.path.join(self.root, 43 | 'annot', 44 | self.image_set+'.json') 45 | with open(file_name) as anno_file: 46 | anno = json.load(anno_file) 47 | 48 | gt_db = [] 49 | for a in anno: 50 | image_name = a['image'] 51 | 52 | c = np.array(a['center'], dtype=np.float) 53 | s = np.array([a['scale'], a['scale']], dtype=np.float) 54 | 55 | # Adjust center/scale slightly to avoid cropping limbs 56 | if c[0] != -1: 57 | c[1] = c[1] + 15 * s[1] 58 | s = s * 1.25 59 | 60 | # MPII uses matlab format, index is based 1, 61 | # we should first convert to 0-based index 62 | c = c - 1 63 | 64 | joints_3d = np.zeros((self.num_joints, 3), dtype=np.float) 65 | joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float) 66 | if self.image_set != 'test': 67 | joints = np.array(a['joints']) 68 | joints[:, 0:2] = joints[:, 0:2] - 1 69 | joints_vis = np.array(a['joints_vis']) 70 | assert len(joints) == self.num_joints, \ 71 | 'joint num diff: {} vs {}'.format(len(joints), 72 | self.num_joints) 73 | 74 | joints_3d[:, 0:2] = joints[:, 0:2] 75 | joints_3d_vis[:, 0] = joints_vis[:] 76 | joints_3d_vis[:, 1] = joints_vis[:] 77 | 78 | image_dir = 'images.zip@' if self.data_format == 'zip' else 'images' 79 | gt_db.append({ 80 | 'image': os.path.join(self.root, image_dir, image_name), 81 | 'center': c, 82 | 'scale': s, 83 | 'joints_3d': joints_3d, 84 | 'joints_3d_vis': joints_3d_vis, 85 | 'filename': '', 86 | 'imgnum': 0, 87 | }) 88 | 89 | return gt_db 90 | 91 | def evaluate(self, cfg, preds, output_dir, *args, **kwargs): 92 | # convert 0-based index to 1-based index 93 | preds = preds[:, :, 0:2] + 1.0 94 | 95 | if output_dir: 96 | pred_file = os.path.join(output_dir, 'pred.mat') 97 | savemat(pred_file, mdict={'preds': preds}) 98 | 99 | if 'test' in cfg.DATASET.TEST_SET: 100 | return {'Null': 0.0}, 0.0 101 | 102 | SC_BIAS = 0.6 103 | threshold = 0.5 104 | 105 | gt_file = os.path.join(cfg.DATASET.ROOT, 106 | 'annot', 107 | 'gt_{}.mat'.format(cfg.DATASET.TEST_SET)) 108 | gt_dict = loadmat(gt_file) 109 | dataset_joints = gt_dict['dataset_joints'] 110 | jnt_missing = gt_dict['jnt_missing'] 111 | pos_gt_src = gt_dict['pos_gt_src'] 112 | headboxes_src = gt_dict['headboxes_src'] 113 | 114 | pos_pred_src = np.transpose(preds, [1, 2, 0]) 115 | 116 | head = np.where(dataset_joints == 'head')[1][0] 117 | lsho = np.where(dataset_joints == 'lsho')[1][0] 118 | lelb = np.where(dataset_joints == 'lelb')[1][0] 119 | lwri = np.where(dataset_joints == 'lwri')[1][0] 120 | lhip = np.where(dataset_joints == 'lhip')[1][0] 121 | lkne = np.where(dataset_joints == 'lkne')[1][0] 122 | lank = np.where(dataset_joints == 'lank')[1][0] 123 | 124 | rsho = np.where(dataset_joints == 'rsho')[1][0] 125 | relb = np.where(dataset_joints == 'relb')[1][0] 126 | rwri = np.where(dataset_joints == 'rwri')[1][0] 127 | rkne = np.where(dataset_joints == 'rkne')[1][0] 128 | rank = np.where(dataset_joints == 'rank')[1][0] 129 | rhip = np.where(dataset_joints == 'rhip')[1][0] 130 | 131 | jnt_visible = 1 - jnt_missing 132 | uv_error = pos_pred_src - pos_gt_src 133 | uv_err = np.linalg.norm(uv_error, axis=1) 134 | headsizes = headboxes_src[1, :, :] - headboxes_src[0, :, :] 135 | headsizes = np.linalg.norm(headsizes, axis=0) 136 | headsizes *= SC_BIAS 137 | scale = np.multiply(headsizes, np.ones((len(uv_err), 1))) 138 | scaled_uv_err = np.divide(uv_err, scale) 139 | scaled_uv_err = np.multiply(scaled_uv_err, jnt_visible) 140 | jnt_count = np.sum(jnt_visible, axis=1) 141 | less_than_threshold = np.multiply((scaled_uv_err <= threshold), 142 | jnt_visible) 143 | PCKh = np.divide(100.*np.sum(less_than_threshold, axis=1), jnt_count) 144 | 145 | # save 146 | rng = np.arange(0, 0.5+0.01, 0.01) 147 | pckAll = np.zeros((len(rng), 16)) 148 | 149 | for r in range(len(rng)): 150 | threshold = rng[r] 151 | less_than_threshold = np.multiply(scaled_uv_err <= threshold, 152 | jnt_visible) 153 | pckAll[r, :] = np.divide(100.*np.sum(less_than_threshold, axis=1), 154 | jnt_count) 155 | 156 | PCKh = np.ma.array(PCKh, mask=False) 157 | PCKh.mask[6:8] = True 158 | 159 | jnt_count = np.ma.array(jnt_count, mask=False) 160 | jnt_count.mask[6:8] = True 161 | jnt_ratio = jnt_count / np.sum(jnt_count).astype(np.float64) 162 | 163 | name_value = [ 164 | ('Head', PCKh[head]), 165 | ('Shoulder', 0.5 * (PCKh[lsho] + PCKh[rsho])), 166 | ('Elbow', 0.5 * (PCKh[lelb] + PCKh[relb])), 167 | ('Wrist', 0.5 * (PCKh[lwri] + PCKh[rwri])), 168 | ('Hip', 0.5 * (PCKh[lhip] + PCKh[rhip])), 169 | ('Knee', 0.5 * (PCKh[lkne] + PCKh[rkne])), 170 | ('Ankle', 0.5 * (PCKh[lank] + PCKh[rank])), 171 | ('Mean', np.sum(PCKh * jnt_ratio)), 172 | ('Mean@0.1', np.sum(pckAll[11, :] * jnt_ratio)) 173 | ] 174 | name_value = OrderedDict(name_value) 175 | 176 | return name_value, name_value['Mean'] 177 | -------------------------------------------------------------------------------- /lib/models/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import models.pose_resnet 12 | -------------------------------------------------------------------------------- /lib/models/pose_resnet.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import logging 13 | 14 | import torch 15 | import torch.nn as nn 16 | from collections import OrderedDict 17 | 18 | 19 | BN_MOMENTUM = 0.1 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | def conv3x3(in_planes, out_planes, stride=1): 24 | """3x3 convolution with padding""" 25 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 26 | padding=1, bias=False) 27 | 28 | 29 | class BasicBlock(nn.Module): 30 | expansion = 1 31 | 32 | def __init__(self, inplanes, planes, stride=1, downsample=None): 33 | super(BasicBlock, self).__init__() 34 | self.conv1 = conv3x3(inplanes, planes, stride) 35 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 36 | self.relu = nn.ReLU(inplace=True) 37 | self.conv2 = conv3x3(planes, planes) 38 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 39 | self.downsample = downsample 40 | self.stride = stride 41 | 42 | def forward(self, x): 43 | residual = x 44 | 45 | out = self.conv1(x) 46 | out = self.bn1(out) 47 | out = self.relu(out) 48 | 49 | out = self.conv2(out) 50 | out = self.bn2(out) 51 | 52 | if self.downsample is not None: 53 | residual = self.downsample(x) 54 | 55 | out += residual 56 | out = self.relu(out) 57 | 58 | return out 59 | 60 | 61 | class Bottleneck(nn.Module): 62 | expansion = 4 63 | 64 | def __init__(self, inplanes, planes, stride=1, downsample=None): 65 | super(Bottleneck, self).__init__() 66 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 67 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 68 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 69 | padding=1, bias=False) 70 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 71 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, 72 | bias=False) 73 | self.bn3 = nn.BatchNorm2d(planes * self.expansion, 74 | momentum=BN_MOMENTUM) 75 | self.relu = nn.ReLU(inplace=True) 76 | self.downsample = downsample 77 | self.stride = stride 78 | 79 | def forward(self, x): 80 | residual = x 81 | 82 | out = self.conv1(x) 83 | out = self.bn1(out) 84 | out = self.relu(out) 85 | 86 | out = self.conv2(out) 87 | out = self.bn2(out) 88 | out = self.relu(out) 89 | 90 | out = self.conv3(out) 91 | out = self.bn3(out) 92 | 93 | if self.downsample is not None: 94 | residual = self.downsample(x) 95 | 96 | out += residual 97 | out = self.relu(out) 98 | 99 | return out 100 | 101 | 102 | class Bottleneck_CAFFE(nn.Module): 103 | expansion = 4 104 | 105 | def __init__(self, inplanes, planes, stride=1, downsample=None): 106 | super(Bottleneck_CAFFE, self).__init__() 107 | # add stride to conv1x1 108 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) 109 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 110 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, 111 | padding=1, bias=False) 112 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 113 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, 114 | bias=False) 115 | self.bn3 = nn.BatchNorm2d(planes * self.expansion, 116 | momentum=BN_MOMENTUM) 117 | self.relu = nn.ReLU(inplace=True) 118 | self.downsample = downsample 119 | self.stride = stride 120 | 121 | def forward(self, x): 122 | residual = x 123 | 124 | out = self.conv1(x) 125 | out = self.bn1(out) 126 | out = self.relu(out) 127 | 128 | out = self.conv2(out) 129 | out = self.bn2(out) 130 | out = self.relu(out) 131 | 132 | out = self.conv3(out) 133 | out = self.bn3(out) 134 | 135 | if self.downsample is not None: 136 | residual = self.downsample(x) 137 | 138 | out += residual 139 | out = self.relu(out) 140 | 141 | return out 142 | 143 | 144 | class PoseResNet(nn.Module): 145 | 146 | def __init__(self, block, layers, cfg, **kwargs): 147 | self.inplanes = 64 148 | extra = cfg.MODEL.EXTRA 149 | self.deconv_with_bias = extra.DECONV_WITH_BIAS 150 | 151 | super(PoseResNet, self).__init__() 152 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 153 | bias=False) 154 | self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) 155 | self.relu = nn.ReLU(inplace=True) 156 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 157 | self.layer1 = self._make_layer(block, 64, layers[0]) 158 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 159 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 160 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 161 | 162 | # used for deconv layers 163 | self.deconv_layers = self._make_deconv_layer( 164 | extra.NUM_DECONV_LAYERS, 165 | extra.NUM_DECONV_FILTERS, 166 | extra.NUM_DECONV_KERNELS, 167 | ) 168 | 169 | self.final_layer = nn.Conv2d( 170 | in_channels=extra.NUM_DECONV_FILTERS[-1], 171 | out_channels=cfg.MODEL.NUM_JOINTS, 172 | kernel_size=extra.FINAL_CONV_KERNEL, 173 | stride=1, 174 | padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0 175 | ) 176 | 177 | def _make_layer(self, block, planes, blocks, stride=1): 178 | downsample = None 179 | if stride != 1 or self.inplanes != planes * block.expansion: 180 | downsample = nn.Sequential( 181 | nn.Conv2d(self.inplanes, planes * block.expansion, 182 | kernel_size=1, stride=stride, bias=False), 183 | nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), 184 | ) 185 | 186 | layers = [] 187 | layers.append(block(self.inplanes, planes, stride, downsample)) 188 | self.inplanes = planes * block.expansion 189 | for i in range(1, blocks): 190 | layers.append(block(self.inplanes, planes)) 191 | 192 | return nn.Sequential(*layers) 193 | 194 | def _get_deconv_cfg(self, deconv_kernel, index): 195 | if deconv_kernel == 4: 196 | padding = 1 197 | output_padding = 0 198 | elif deconv_kernel == 3: 199 | padding = 1 200 | output_padding = 1 201 | elif deconv_kernel == 2: 202 | padding = 0 203 | output_padding = 0 204 | 205 | return deconv_kernel, padding, output_padding 206 | 207 | def _make_deconv_layer(self, num_layers, num_filters, num_kernels): 208 | assert num_layers == len(num_filters), \ 209 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 210 | assert num_layers == len(num_kernels), \ 211 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 212 | 213 | layers = [] 214 | for i in range(num_layers): 215 | kernel, padding, output_padding = \ 216 | self._get_deconv_cfg(num_kernels[i], i) 217 | 218 | planes = num_filters[i] 219 | layers.append( 220 | nn.ConvTranspose2d( 221 | in_channels=self.inplanes, 222 | out_channels=planes, 223 | kernel_size=kernel, 224 | stride=2, 225 | padding=padding, 226 | output_padding=output_padding, 227 | bias=self.deconv_with_bias)) 228 | layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) 229 | layers.append(nn.ReLU(inplace=True)) 230 | self.inplanes = planes 231 | 232 | return nn.Sequential(*layers) 233 | 234 | def forward(self, x): 235 | x = self.conv1(x) 236 | x = self.bn1(x) 237 | x = self.relu(x) 238 | x = self.maxpool(x) 239 | 240 | x = self.layer1(x) 241 | x = self.layer2(x) 242 | x = self.layer3(x) 243 | x = self.layer4(x) 244 | 245 | x = self.deconv_layers(x) 246 | x = self.final_layer(x) 247 | 248 | return x 249 | 250 | def init_weights(self, pretrained=''): 251 | if os.path.isfile(pretrained): 252 | logger.info('=> init deconv weights from normal distribution') 253 | for name, m in self.deconv_layers.named_modules(): 254 | if isinstance(m, nn.ConvTranspose2d): 255 | logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 256 | logger.info('=> init {}.bias as 0'.format(name)) 257 | nn.init.normal_(m.weight, std=0.001) 258 | if self.deconv_with_bias: 259 | nn.init.constant_(m.bias, 0) 260 | elif isinstance(m, nn.BatchNorm2d): 261 | logger.info('=> init {}.weight as 1'.format(name)) 262 | logger.info('=> init {}.bias as 0'.format(name)) 263 | nn.init.constant_(m.weight, 1) 264 | nn.init.constant_(m.bias, 0) 265 | logger.info('=> init final conv weights from normal distribution') 266 | for m in self.final_layer.modules(): 267 | if isinstance(m, nn.Conv2d): 268 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 269 | logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 270 | logger.info('=> init {}.bias as 0'.format(name)) 271 | nn.init.normal_(m.weight, std=0.001) 272 | nn.init.constant_(m.bias, 0) 273 | 274 | # pretrained_state_dict = torch.load(pretrained) 275 | logger.info('=> loading pretrained model {}'.format(pretrained)) 276 | # self.load_state_dict(pretrained_state_dict, strict=False) 277 | checkpoint = torch.load(pretrained) 278 | if isinstance(checkpoint, OrderedDict): 279 | state_dict = checkpoint 280 | elif isinstance(checkpoint, dict) and 'state_dict' in checkpoint: 281 | state_dict_old = checkpoint['state_dict'] 282 | state_dict = OrderedDict() 283 | # delete 'module.' because it is saved from DataParallel module 284 | for key in state_dict_old.keys(): 285 | if key.startswith('module.'): 286 | # state_dict[key[7:]] = state_dict[key] 287 | # state_dict.pop(key) 288 | state_dict[key[7:]] = state_dict_old[key] 289 | else: 290 | state_dict[key] = state_dict_old[key] 291 | else: 292 | raise RuntimeError( 293 | 'No state_dict found in checkpoint file {}'.format(pretrained)) 294 | self.load_state_dict(state_dict, strict=False) 295 | else: 296 | logger.error('=> imagenet pretrained model dose not exist') 297 | logger.error('=> please download it first') 298 | raise ValueError('imagenet pretrained model does not exist') 299 | 300 | 301 | resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]), 302 | 34: (BasicBlock, [3, 4, 6, 3]), 303 | 50: (Bottleneck, [3, 4, 6, 3]), 304 | 101: (Bottleneck, [3, 4, 23, 3]), 305 | 152: (Bottleneck, [3, 8, 36, 3])} 306 | 307 | 308 | def get_pose_net(cfg, is_train, **kwargs): 309 | num_layers = cfg.MODEL.EXTRA.NUM_LAYERS 310 | style = cfg.MODEL.STYLE 311 | 312 | block_class, layers = resnet_spec[num_layers] 313 | 314 | if style == 'caffe': 315 | block_class = Bottleneck_CAFFE 316 | 317 | model = PoseResNet(block_class, layers, cfg, **kwargs) 318 | 319 | if is_train and cfg.MODEL.INIT_WEIGHTS: 320 | model.init_weights(cfg.MODEL.PRETRAINED) 321 | 322 | return model 323 | -------------------------------------------------------------------------------- /lib/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/human-pose-estimation.pytorch/49f3f4458c9d5917c75c37a6db48c6a0d7cd89a1/lib/nms/__init__.py -------------------------------------------------------------------------------- /lib/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import numpy as np 8 | cimport numpy as np 9 | 10 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 11 | return a if a >= b else b 12 | 13 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 14 | return a if a <= b else b 15 | 16 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 17 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 18 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 19 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 20 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 21 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 22 | 23 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 24 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1].astype('i') 25 | 26 | cdef int ndets = dets.shape[0] 27 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 28 | np.zeros((ndets), dtype=np.int) 29 | 30 | # nominal indices 31 | cdef int _i, _j 32 | # sorted indices 33 | cdef int i, j 34 | # temp variables for box i's (the box currently under consideration) 35 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 36 | # variables for computing overlap with box j (lower scoring box) 37 | cdef np.float32_t xx1, yy1, xx2, yy2 38 | cdef np.float32_t w, h 39 | cdef np.float32_t inter, ovr 40 | 41 | keep = [] 42 | for _i in range(ndets): 43 | i = order[_i] 44 | if suppressed[i] == 1: 45 | continue 46 | keep.append(i) 47 | ix1 = x1[i] 48 | iy1 = y1[i] 49 | ix2 = x2[i] 50 | iy2 = y2[i] 51 | iarea = areas[i] 52 | for _j in range(_i + 1, ndets): 53 | j = order[_j] 54 | if suppressed[j] == 1: 55 | continue 56 | xx1 = max(ix1, x1[j]) 57 | yy1 = max(iy1, y1[j]) 58 | xx2 = min(ix2, x2[j]) 59 | yy2 = min(iy2, y2[j]) 60 | w = max(0.0, xx2 - xx1 + 1) 61 | h = max(0.0, yy2 - yy1 + 1) 62 | inter = w * h 63 | ovr = inter / (iarea + areas[j] - inter) 64 | if ovr >= thresh: 65 | suppressed[j] = 1 66 | 67 | return keep 68 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import numpy as np 8 | cimport numpy as np 9 | 10 | assert sizeof(int) == sizeof(np.int32_t) 11 | 12 | cdef extern from "gpu_nms.hpp": 13 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 14 | 15 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 16 | np.int32_t device_id=0): 17 | cdef int boxes_num = dets.shape[0] 18 | cdef int boxes_dim = dets.shape[1] 19 | cdef int num_out 20 | cdef np.ndarray[np.int32_t, ndim=1] \ 21 | keep = np.zeros(boxes_num, dtype=np.int32) 22 | cdef np.ndarray[np.float32_t, ndim=1] \ 23 | scores = dets[:, 4] 24 | cdef np.ndarray[np.int32_t, ndim=1] \ 25 | order = scores.argsort()[::-1].astype(np.int32) 26 | cdef np.ndarray[np.float32_t, ndim=2] \ 27 | sorted_dets = dets[order, :] 28 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 29 | keep = keep[:num_out] 30 | return list(order[keep]) 31 | -------------------------------------------------------------------------------- /lib/nms/nms.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | 13 | from .cpu_nms import cpu_nms 14 | from .gpu_nms import gpu_nms 15 | 16 | 17 | def py_nms_wrapper(thresh): 18 | def _nms(dets): 19 | return nms(dets, thresh) 20 | return _nms 21 | 22 | 23 | def cpu_nms_wrapper(thresh): 24 | def _nms(dets): 25 | return cpu_nms(dets, thresh) 26 | return _nms 27 | 28 | 29 | def gpu_nms_wrapper(thresh, device_id): 30 | def _nms(dets): 31 | return gpu_nms(dets, thresh, device_id) 32 | return _nms 33 | 34 | 35 | def nms(dets, thresh): 36 | """ 37 | greedily select boxes with high confidence and overlap with current maximum <= thresh 38 | rule out overlap >= thresh 39 | :param dets: [[x1, y1, x2, y2 score]] 40 | :param thresh: retain overlap < thresh 41 | :return: indexes to keep 42 | """ 43 | if dets.shape[0] == 0: 44 | return [] 45 | 46 | x1 = dets[:, 0] 47 | y1 = dets[:, 1] 48 | x2 = dets[:, 2] 49 | y2 = dets[:, 3] 50 | scores = dets[:, 4] 51 | 52 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 53 | order = scores.argsort()[::-1] 54 | 55 | keep = [] 56 | while order.size > 0: 57 | i = order[0] 58 | keep.append(i) 59 | xx1 = np.maximum(x1[i], x1[order[1:]]) 60 | yy1 = np.maximum(y1[i], y1[order[1:]]) 61 | xx2 = np.minimum(x2[i], x2[order[1:]]) 62 | yy2 = np.minimum(y2[i], y2[order[1:]]) 63 | 64 | w = np.maximum(0.0, xx2 - xx1 + 1) 65 | h = np.maximum(0.0, yy2 - yy1 + 1) 66 | inter = w * h 67 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 68 | 69 | inds = np.where(ovr <= thresh)[0] 70 | order = order[inds + 1] 71 | 72 | return keep 73 | 74 | def oks_iou(g, d, a_g, a_d, sigmas=None, in_vis_thre=None): 75 | if not isinstance(sigmas, np.ndarray): 76 | sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0 77 | vars = (sigmas * 2) ** 2 78 | xg = g[0::3] 79 | yg = g[1::3] 80 | vg = g[2::3] 81 | ious = np.zeros((d.shape[0])) 82 | for n_d in range(0, d.shape[0]): 83 | xd = d[n_d, 0::3] 84 | yd = d[n_d, 1::3] 85 | vd = d[n_d, 2::3] 86 | dx = xd - xg 87 | dy = yd - yg 88 | e = (dx ** 2 + dy ** 2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2 89 | if in_vis_thre is not None: 90 | ind = list(vg > in_vis_thre) and list(vd > in_vis_thre) 91 | e = e[ind] 92 | ious[n_d] = np.sum(np.exp(-e)) / e.shape[0] if e.shape[0] != 0 else 0.0 93 | return ious 94 | 95 | def oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None): 96 | """ 97 | greedily select boxes with high confidence and overlap with current maximum <= thresh 98 | rule out overlap >= thresh, overlap = oks 99 | :param kpts_db 100 | :param thresh: retain overlap < thresh 101 | :return: indexes to keep 102 | """ 103 | if len(kpts_db) == 0: 104 | return [] 105 | 106 | scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))]) 107 | kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))]) 108 | areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))]) 109 | 110 | order = scores.argsort()[::-1] 111 | 112 | keep = [] 113 | while order.size > 0: 114 | i = order[0] 115 | keep.append(i) 116 | 117 | oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre) 118 | 119 | inds = np.where(oks_ovr <= thresh)[0] 120 | order = order[inds + 1] 121 | 122 | return keep 123 | 124 | -------------------------------------------------------------------------------- /lib/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Copyright (c) Microsoft 3 | // Licensed under The MIT License 4 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 5 | // ------------------------------------------------------------------ 6 | 7 | #include "gpu_nms.hpp" 8 | #include 9 | #include 10 | 11 | #define CUDA_CHECK(condition) \ 12 | /* Code block avoids redefinition of cudaError_t error */ \ 13 | do { \ 14 | cudaError_t error = condition; \ 15 | if (error != cudaSuccess) { \ 16 | std::cout << cudaGetErrorString(error) << std::endl; \ 17 | } \ 18 | } while (0) 19 | 20 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 21 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 22 | 23 | __device__ inline float devIoU(float const * const a, float const * const b) { 24 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 25 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 26 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 27 | float interS = width * height; 28 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 29 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 30 | return interS / (Sa + Sb - interS); 31 | } 32 | 33 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 34 | const float *dev_boxes, unsigned long long *dev_mask) { 35 | const int row_start = blockIdx.y; 36 | const int col_start = blockIdx.x; 37 | 38 | // if (row_start > col_start) return; 39 | 40 | const int row_size = 41 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 42 | const int col_size = 43 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 44 | 45 | __shared__ float block_boxes[threadsPerBlock * 5]; 46 | if (threadIdx.x < col_size) { 47 | block_boxes[threadIdx.x * 5 + 0] = 48 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 49 | block_boxes[threadIdx.x * 5 + 1] = 50 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 51 | block_boxes[threadIdx.x * 5 + 2] = 52 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 53 | block_boxes[threadIdx.x * 5 + 3] = 54 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 55 | block_boxes[threadIdx.x * 5 + 4] = 56 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 57 | } 58 | __syncthreads(); 59 | 60 | if (threadIdx.x < row_size) { 61 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 62 | const float *cur_box = dev_boxes + cur_box_idx * 5; 63 | int i = 0; 64 | unsigned long long t = 0; 65 | int start = 0; 66 | if (row_start == col_start) { 67 | start = threadIdx.x + 1; 68 | } 69 | for (i = start; i < col_size; i++) { 70 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 71 | t |= 1ULL << i; 72 | } 73 | } 74 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 75 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 76 | } 77 | } 78 | 79 | void _set_device(int device_id) { 80 | int current_device; 81 | CUDA_CHECK(cudaGetDevice(¤t_device)); 82 | if (current_device == device_id) { 83 | return; 84 | } 85 | // The call to cudaSetDevice must come before any calls to Get, which 86 | // may perform initialization using the GPU. 87 | CUDA_CHECK(cudaSetDevice(device_id)); 88 | } 89 | 90 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 91 | int boxes_dim, float nms_overlap_thresh, int device_id) { 92 | _set_device(device_id); 93 | 94 | float* boxes_dev = NULL; 95 | unsigned long long* mask_dev = NULL; 96 | 97 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 98 | 99 | CUDA_CHECK(cudaMalloc(&boxes_dev, 100 | boxes_num * boxes_dim * sizeof(float))); 101 | CUDA_CHECK(cudaMemcpy(boxes_dev, 102 | boxes_host, 103 | boxes_num * boxes_dim * sizeof(float), 104 | cudaMemcpyHostToDevice)); 105 | 106 | CUDA_CHECK(cudaMalloc(&mask_dev, 107 | boxes_num * col_blocks * sizeof(unsigned long long))); 108 | 109 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 110 | DIVUP(boxes_num, threadsPerBlock)); 111 | dim3 threads(threadsPerBlock); 112 | nms_kernel<<>>(boxes_num, 113 | nms_overlap_thresh, 114 | boxes_dev, 115 | mask_dev); 116 | 117 | std::vector mask_host(boxes_num * col_blocks); 118 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 119 | mask_dev, 120 | sizeof(unsigned long long) * boxes_num * col_blocks, 121 | cudaMemcpyDeviceToHost)); 122 | 123 | std::vector remv(col_blocks); 124 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 125 | 126 | int num_to_keep = 0; 127 | for (int i = 0; i < boxes_num; i++) { 128 | int nblock = i / threadsPerBlock; 129 | int inblock = i % threadsPerBlock; 130 | 131 | if (!(remv[nblock] & (1ULL << inblock))) { 132 | keep_out[num_to_keep++] = i; 133 | unsigned long long *p = &mask_host[0] + i * col_blocks; 134 | for (int j = nblock; j < col_blocks; j++) { 135 | remv[j] |= p[j]; 136 | } 137 | } 138 | } 139 | *num_out = num_to_keep; 140 | 141 | CUDA_CHECK(cudaFree(boxes_dev)); 142 | CUDA_CHECK(cudaFree(mask_dev)); 143 | } 144 | -------------------------------------------------------------------------------- /lib/nms/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) Microsoft 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 5 | # -------------------------------------------------------- 6 | 7 | import os 8 | from os.path import join as pjoin 9 | from setuptools import setup 10 | from distutils.extension import Extension 11 | from Cython.Distutils import build_ext 12 | import numpy as np 13 | 14 | 15 | def find_in_path(name, path): 16 | "Find a file in a search path" 17 | # Adapted fom 18 | # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 19 | for dir in path.split(os.pathsep): 20 | binpath = pjoin(dir, name) 21 | if os.path.exists(binpath): 22 | return os.path.abspath(binpath) 23 | return None 24 | 25 | 26 | def locate_cuda(): 27 | """Locate the CUDA environment on the system 28 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 29 | and values giving the absolute path to each directory. 30 | Starts by looking for the CUDAHOME env variable. If not found, everything 31 | is based on finding 'nvcc' in the PATH. 32 | """ 33 | 34 | # first check if the CUDAHOME env variable is in use 35 | if 'CUDAHOME' in os.environ: 36 | home = os.environ['CUDAHOME'] 37 | nvcc = pjoin(home, 'bin', 'nvcc') 38 | else: 39 | # otherwise, search the PATH for NVCC 40 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 41 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 42 | if nvcc is None: 43 | raise EnvironmentError('The nvcc binary could not be ' 44 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 45 | home = os.path.dirname(os.path.dirname(nvcc)) 46 | 47 | cudaconfig = {'home':home, 'nvcc':nvcc, 48 | 'include': pjoin(home, 'include'), 49 | 'lib64': pjoin(home, 'lib64')} 50 | for k, v in cudaconfig.items(): 51 | if not os.path.exists(v): 52 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 53 | 54 | return cudaconfig 55 | CUDA = locate_cuda() 56 | 57 | 58 | # Obtain the numpy include directory. This logic works across numpy versions. 59 | try: 60 | numpy_include = np.get_include() 61 | except AttributeError: 62 | numpy_include = np.get_numpy_include() 63 | 64 | 65 | def customize_compiler_for_nvcc(self): 66 | """inject deep into distutils to customize how the dispatch 67 | to gcc/nvcc works. 68 | If you subclass UnixCCompiler, it's not trivial to get your subclass 69 | injected in, and still have the right customizations (i.e. 70 | distutils.sysconfig.customize_compiler) run on it. So instead of going 71 | the OO route, I have this. Note, it's kindof like a wierd functional 72 | subclassing going on.""" 73 | 74 | # tell the compiler it can processes .cu 75 | self.src_extensions.append('.cu') 76 | 77 | # save references to the default compiler_so and _comple methods 78 | default_compiler_so = self.compiler_so 79 | super = self._compile 80 | 81 | # now redefine the _compile method. This gets executed for each 82 | # object but distutils doesn't have the ability to change compilers 83 | # based on source extension: we add it. 84 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 85 | if os.path.splitext(src)[1] == '.cu': 86 | # use the cuda for .cu files 87 | self.set_executable('compiler_so', CUDA['nvcc']) 88 | # use only a subset of the extra_postargs, which are 1-1 translated 89 | # from the extra_compile_args in the Extension class 90 | postargs = extra_postargs['nvcc'] 91 | else: 92 | postargs = extra_postargs['gcc'] 93 | 94 | super(obj, src, ext, cc_args, postargs, pp_opts) 95 | # reset the default compiler_so, which we might have changed for cuda 96 | self.compiler_so = default_compiler_so 97 | 98 | # inject our redefined _compile method into the class 99 | self._compile = _compile 100 | 101 | 102 | # run the customize_compiler 103 | class custom_build_ext(build_ext): 104 | def build_extensions(self): 105 | customize_compiler_for_nvcc(self.compiler) 106 | build_ext.build_extensions(self) 107 | 108 | 109 | ext_modules = [ 110 | Extension( 111 | "cpu_nms", 112 | ["cpu_nms.pyx"], 113 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 114 | include_dirs = [numpy_include] 115 | ), 116 | Extension('gpu_nms', 117 | ['nms_kernel.cu', 'gpu_nms.pyx'], 118 | library_dirs=[CUDA['lib64']], 119 | libraries=['cudart'], 120 | language='c++', 121 | runtime_library_dirs=[CUDA['lib64']], 122 | # this syntax is specific to this build system 123 | # we're only going to use certain compiler args with nvcc and not with 124 | # gcc the implementation of this trick is in customize_compiler() below 125 | extra_compile_args={'gcc': ["-Wno-unused-function"], 126 | 'nvcc': ['-arch=sm_35', 127 | '--ptxas-options=-v', 128 | '-c', 129 | '--compiler-options', 130 | "'-fPIC'"]}, 131 | include_dirs = [numpy_include, CUDA['include']] 132 | ), 133 | ] 134 | 135 | setup( 136 | name='nms', 137 | ext_modules=ext_modules, 138 | # inject our custom trigger 139 | cmdclass={'build_ext': custom_build_ext}, 140 | ) 141 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/human-pose-estimation.pytorch/49f3f4458c9d5917c75c37a6db48c6a0d7cd89a1/lib/utils/__init__.py -------------------------------------------------------------------------------- /lib/utils/transforms.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | import cv2 13 | 14 | 15 | def flip_back(output_flipped, matched_parts): 16 | ''' 17 | ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width) 18 | ''' 19 | assert output_flipped.ndim == 4,\ 20 | 'output_flipped should be [batch_size, num_joints, height, width]' 21 | 22 | output_flipped = output_flipped[:, :, :, ::-1] 23 | 24 | for pair in matched_parts: 25 | tmp = output_flipped[:, pair[0], :, :].copy() 26 | output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :] 27 | output_flipped[:, pair[1], :, :] = tmp 28 | 29 | return output_flipped 30 | 31 | 32 | def fliplr_joints(joints, joints_vis, width, matched_parts): 33 | """ 34 | flip coords 35 | """ 36 | # Flip horizontal 37 | joints[:, 0] = width - joints[:, 0] - 1 38 | 39 | # Change left-right parts 40 | for pair in matched_parts: 41 | joints[pair[0], :], joints[pair[1], :] = \ 42 | joints[pair[1], :], joints[pair[0], :].copy() 43 | joints_vis[pair[0], :], joints_vis[pair[1], :] = \ 44 | joints_vis[pair[1], :], joints_vis[pair[0], :].copy() 45 | 46 | return joints*joints_vis, joints_vis 47 | 48 | 49 | def transform_preds(coords, center, scale, output_size): 50 | target_coords = np.zeros(coords.shape) 51 | trans = get_affine_transform(center, scale, 0, output_size, inv=1) 52 | for p in range(coords.shape[0]): 53 | target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) 54 | return target_coords 55 | 56 | 57 | def get_affine_transform(center, 58 | scale, 59 | rot, 60 | output_size, 61 | shift=np.array([0, 0], dtype=np.float32), 62 | inv=0): 63 | if not isinstance(scale, np.ndarray) and not isinstance(scale, list): 64 | print(scale) 65 | scale = np.array([scale, scale]) 66 | 67 | scale_tmp = scale * 200.0 68 | src_w = scale_tmp[0] 69 | dst_w = output_size[0] 70 | dst_h = output_size[1] 71 | 72 | rot_rad = np.pi * rot / 180 73 | src_dir = get_dir([0, src_w * -0.5], rot_rad) 74 | dst_dir = np.array([0, dst_w * -0.5], np.float32) 75 | 76 | src = np.zeros((3, 2), dtype=np.float32) 77 | dst = np.zeros((3, 2), dtype=np.float32) 78 | src[0, :] = center + scale_tmp * shift 79 | src[1, :] = center + src_dir + scale_tmp * shift 80 | dst[0, :] = [dst_w * 0.5, dst_h * 0.5] 81 | dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir 82 | 83 | src[2:, :] = get_3rd_point(src[0, :], src[1, :]) 84 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) 85 | 86 | if inv: 87 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) 88 | else: 89 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) 90 | 91 | return trans 92 | 93 | 94 | def affine_transform(pt, t): 95 | new_pt = np.array([pt[0], pt[1], 1.]).T 96 | new_pt = np.dot(t, new_pt) 97 | return new_pt[:2] 98 | 99 | 100 | def get_3rd_point(a, b): 101 | direct = a - b 102 | return b + np.array([-direct[1], direct[0]], dtype=np.float32) 103 | 104 | 105 | def get_dir(src_point, rot_rad): 106 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 107 | 108 | src_result = [0, 0] 109 | src_result[0] = src_point[0] * cs - src_point[1] * sn 110 | src_result[1] = src_point[0] * sn + src_point[1] * cs 111 | 112 | return src_result 113 | 114 | 115 | def crop(img, center, scale, output_size, rot=0): 116 | trans = get_affine_transform(center, scale, rot, output_size) 117 | 118 | dst_img = cv2.warpAffine(img, 119 | trans, 120 | (int(output_size[0]), int(output_size[1])), 121 | flags=cv2.INTER_LINEAR) 122 | 123 | return dst_img 124 | -------------------------------------------------------------------------------- /lib/utils/utils.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import logging 13 | import time 14 | from pathlib import Path 15 | 16 | import torch 17 | import torch.optim as optim 18 | 19 | from core.config import get_model_name 20 | 21 | 22 | def create_logger(cfg, cfg_name, phase='train'): 23 | root_output_dir = Path(cfg.OUTPUT_DIR) 24 | # set up logger 25 | if not root_output_dir.exists(): 26 | print('=> creating {}'.format(root_output_dir)) 27 | root_output_dir.mkdir() 28 | 29 | dataset = cfg.DATASET.DATASET + '_' + cfg.DATASET.HYBRID_JOINTS_TYPE \ 30 | if cfg.DATASET.HYBRID_JOINTS_TYPE else cfg.DATASET.DATASET 31 | dataset = dataset.replace(':', '_') 32 | model, _ = get_model_name(cfg) 33 | cfg_name = os.path.basename(cfg_name).split('.')[0] 34 | 35 | final_output_dir = root_output_dir / dataset / model / cfg_name 36 | 37 | print('=> creating {}'.format(final_output_dir)) 38 | final_output_dir.mkdir(parents=True, exist_ok=True) 39 | 40 | time_str = time.strftime('%Y-%m-%d-%H-%M') 41 | log_file = '{}_{}_{}.log'.format(cfg_name, time_str, phase) 42 | final_log_file = final_output_dir / log_file 43 | head = '%(asctime)-15s %(message)s' 44 | logging.basicConfig(filename=str(final_log_file), 45 | format=head) 46 | logger = logging.getLogger() 47 | logger.setLevel(logging.INFO) 48 | console = logging.StreamHandler() 49 | logging.getLogger('').addHandler(console) 50 | 51 | tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / \ 52 | (cfg_name + '_' + time_str) 53 | print('=> creating {}'.format(tensorboard_log_dir)) 54 | tensorboard_log_dir.mkdir(parents=True, exist_ok=True) 55 | 56 | return logger, str(final_output_dir), str(tensorboard_log_dir) 57 | 58 | 59 | def get_optimizer(cfg, model): 60 | optimizer = None 61 | if cfg.TRAIN.OPTIMIZER == 'sgd': 62 | optimizer = optim.SGD( 63 | model.parameters(), 64 | lr=cfg.TRAIN.LR, 65 | momentum=cfg.TRAIN.MOMENTUM, 66 | weight_decay=cfg.TRAIN.WD, 67 | nesterov=cfg.TRAIN.NESTEROV 68 | ) 69 | elif cfg.TRAIN.OPTIMIZER == 'adam': 70 | optimizer = optim.Adam( 71 | model.parameters(), 72 | lr=cfg.TRAIN.LR 73 | ) 74 | 75 | return optimizer 76 | 77 | 78 | def save_checkpoint(states, is_best, output_dir, 79 | filename='checkpoint.pth.tar'): 80 | torch.save(states, os.path.join(output_dir, filename)) 81 | if is_best and 'state_dict' in states: 82 | torch.save(states['state_dict'], 83 | os.path.join(output_dir, 'model_best.pth.tar')) 84 | -------------------------------------------------------------------------------- /lib/utils/vis.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import math 12 | 13 | import numpy as np 14 | import torchvision 15 | import cv2 16 | 17 | from core.inference import get_max_preds 18 | 19 | 20 | def save_batch_image_with_joints(batch_image, batch_joints, batch_joints_vis, 21 | file_name, nrow=8, padding=2): 22 | ''' 23 | batch_image: [batch_size, channel, height, width] 24 | batch_joints: [batch_size, num_joints, 3], 25 | batch_joints_vis: [batch_size, num_joints, 1], 26 | } 27 | ''' 28 | grid = torchvision.utils.make_grid(batch_image, nrow, padding, True) 29 | ndarr = grid.mul(255).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy() 30 | ndarr = ndarr.copy() 31 | 32 | nmaps = batch_image.size(0) 33 | xmaps = min(nrow, nmaps) 34 | ymaps = int(math.ceil(float(nmaps) / xmaps)) 35 | height = int(batch_image.size(2) + padding) 36 | width = int(batch_image.size(3) + padding) 37 | k = 0 38 | for y in range(ymaps): 39 | for x in range(xmaps): 40 | if k >= nmaps: 41 | break 42 | joints = batch_joints[k] 43 | joints_vis = batch_joints_vis[k] 44 | 45 | for joint, joint_vis in zip(joints, joints_vis): 46 | joint[0] = x * width + padding + joint[0] 47 | joint[1] = y * height + padding + joint[1] 48 | if joint_vis[0]: 49 | cv2.circle(ndarr, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2) 50 | k = k + 1 51 | cv2.imwrite(file_name, ndarr) 52 | 53 | 54 | def save_batch_heatmaps(batch_image, batch_heatmaps, file_name, 55 | normalize=True): 56 | ''' 57 | batch_image: [batch_size, channel, height, width] 58 | batch_heatmaps: ['batch_size, num_joints, height, width] 59 | file_name: saved file name 60 | ''' 61 | if normalize: 62 | batch_image = batch_image.clone() 63 | min = float(batch_image.min()) 64 | max = float(batch_image.max()) 65 | 66 | batch_image.add_(-min).div_(max - min + 1e-5) 67 | 68 | batch_size = batch_heatmaps.size(0) 69 | num_joints = batch_heatmaps.size(1) 70 | heatmap_height = batch_heatmaps.size(2) 71 | heatmap_width = batch_heatmaps.size(3) 72 | 73 | grid_image = np.zeros((batch_size*heatmap_height, 74 | (num_joints+1)*heatmap_width, 75 | 3), 76 | dtype=np.uint8) 77 | 78 | preds, maxvals = get_max_preds(batch_heatmaps.detach().cpu().numpy()) 79 | 80 | for i in range(batch_size): 81 | image = batch_image[i].mul(255)\ 82 | .clamp(0, 255)\ 83 | .byte()\ 84 | .permute(1, 2, 0)\ 85 | .cpu().numpy() 86 | heatmaps = batch_heatmaps[i].mul(255)\ 87 | .clamp(0, 255)\ 88 | .byte()\ 89 | .cpu().numpy() 90 | 91 | resized_image = cv2.resize(image, 92 | (int(heatmap_width), int(heatmap_height))) 93 | 94 | height_begin = heatmap_height * i 95 | height_end = heatmap_height * (i + 1) 96 | for j in range(num_joints): 97 | cv2.circle(resized_image, 98 | (int(preds[i][j][0]), int(preds[i][j][1])), 99 | 1, [0, 0, 255], 1) 100 | heatmap = heatmaps[j, :, :] 101 | colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) 102 | masked_image = colored_heatmap*0.7 + resized_image*0.3 103 | cv2.circle(masked_image, 104 | (int(preds[i][j][0]), int(preds[i][j][1])), 105 | 1, [0, 0, 255], 1) 106 | 107 | width_begin = heatmap_width * (j+1) 108 | width_end = heatmap_width * (j+2) 109 | grid_image[height_begin:height_end, width_begin:width_end, :] = \ 110 | masked_image 111 | # grid_image[height_begin:height_end, width_begin:width_end, :] = \ 112 | # colored_heatmap*0.7 + resized_image*0.3 113 | 114 | grid_image[height_begin:height_end, 0:heatmap_width, :] = resized_image 115 | 116 | cv2.imwrite(file_name, grid_image) 117 | 118 | 119 | def save_debug_images(config, input, meta, target, joints_pred, output, 120 | prefix): 121 | if not config.DEBUG.DEBUG: 122 | return 123 | 124 | if config.DEBUG.SAVE_BATCH_IMAGES_GT: 125 | save_batch_image_with_joints( 126 | input, meta['joints'], meta['joints_vis'], 127 | '{}_gt.jpg'.format(prefix) 128 | ) 129 | if config.DEBUG.SAVE_BATCH_IMAGES_PRED: 130 | save_batch_image_with_joints( 131 | input, joints_pred, meta['joints_vis'], 132 | '{}_pred.jpg'.format(prefix) 133 | ) 134 | if config.DEBUG.SAVE_HEATMAPS_GT: 135 | save_batch_heatmaps( 136 | input, target, '{}_hm_gt.jpg'.format(prefix) 137 | ) 138 | if config.DEBUG.SAVE_HEATMAPS_PRED: 139 | save_batch_heatmaps( 140 | input, output, '{}_hm_pred.jpg'.format(prefix) 141 | ) 142 | -------------------------------------------------------------------------------- /lib/utils/zipreader.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import zipfile 13 | import xml.etree.ElementTree as ET 14 | 15 | import cv2 16 | import numpy as np 17 | 18 | _im_zfile = [] 19 | _xml_path_zip = [] 20 | _xml_zfile = [] 21 | 22 | 23 | def imread(filename, flags=cv2.IMREAD_COLOR): 24 | global _im_zfile 25 | path = filename 26 | pos_at = path.index('@') 27 | if pos_at == -1: 28 | print("character '@' is not found from the given path '%s'"%(path)) 29 | assert 0 30 | path_zip = path[0: pos_at] 31 | path_img = path[pos_at + 2:] 32 | if not os.path.isfile(path_zip): 33 | print("zip file '%s' is not found"%(path_zip)) 34 | assert 0 35 | for i in range(len(_im_zfile)): 36 | if _im_zfile[i]['path'] == path_zip: 37 | data = _im_zfile[i]['zipfile'].read(path_img) 38 | return cv2.imdecode(np.frombuffer(data, np.uint8), flags) 39 | 40 | _im_zfile.append({ 41 | 'path': path_zip, 42 | 'zipfile': zipfile.ZipFile(path_zip, 'r') 43 | }) 44 | data = _im_zfile[-1]['zipfile'].read(path_img) 45 | 46 | return cv2.imdecode(np.frombuffer(data, np.uint8), flags) 47 | 48 | 49 | def xmlread(filename): 50 | global _xml_path_zip 51 | global _xml_zfile 52 | path = filename 53 | pos_at = path.index('@') 54 | if pos_at == -1: 55 | print("character '@' is not found from the given path '%s'"%(path)) 56 | assert 0 57 | path_zip = path[0: pos_at] 58 | path_xml = path[pos_at + 2:] 59 | if not os.path.isfile(path_zip): 60 | print("zip file '%s' is not found"%(path_zip)) 61 | assert 0 62 | for i in range(len(_xml_path_zip)): 63 | if _xml_path_zip[i] == path_zip: 64 | data = _xml_zfile[i].open(path_xml) 65 | return ET.fromstring(data.read()) 66 | _xml_path_zip.append(path_zip) 67 | print("read new xml file '%s'"%(path_zip)) 68 | _xml_zfile.append(zipfile.ZipFile(path_zip, 'r')) 69 | data = _xml_zfile[-1].open(path_xml) 70 | return ET.fromstring(data.read()) 71 | -------------------------------------------------------------------------------- /pose_estimation/_init_paths.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os.path as osp 12 | import sys 13 | 14 | 15 | def add_path(path): 16 | if path not in sys.path: 17 | sys.path.insert(0, path) 18 | 19 | 20 | this_dir = osp.dirname(__file__) 21 | 22 | lib_path = osp.join(this_dir, '..', 'lib') 23 | add_path(lib_path) 24 | -------------------------------------------------------------------------------- /pose_estimation/train.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import argparse 12 | import os 13 | import pprint 14 | import shutil 15 | 16 | import torch 17 | import torch.nn.parallel 18 | import torch.backends.cudnn as cudnn 19 | import torch.optim 20 | import torch.utils.data 21 | import torch.utils.data.distributed 22 | import torchvision.transforms as transforms 23 | from tensorboardX import SummaryWriter 24 | 25 | import _init_paths 26 | from core.config import config 27 | from core.config import update_config 28 | from core.config import update_dir 29 | from core.config import get_model_name 30 | from core.loss import JointsMSELoss 31 | from core.function import train 32 | from core.function import validate 33 | from utils.utils import get_optimizer 34 | from utils.utils import save_checkpoint 35 | from utils.utils import create_logger 36 | 37 | import dataset 38 | import models 39 | 40 | 41 | def parse_args(): 42 | parser = argparse.ArgumentParser(description='Train keypoints network') 43 | # general 44 | parser.add_argument('--cfg', 45 | help='experiment configure file name', 46 | required=True, 47 | type=str) 48 | 49 | args, rest = parser.parse_known_args() 50 | # update config 51 | update_config(args.cfg) 52 | 53 | # training 54 | parser.add_argument('--frequent', 55 | help='frequency of logging', 56 | default=config.PRINT_FREQ, 57 | type=int) 58 | parser.add_argument('--gpus', 59 | help='gpus', 60 | type=str) 61 | parser.add_argument('--workers', 62 | help='num of dataloader workers', 63 | type=int) 64 | 65 | args = parser.parse_args() 66 | 67 | return args 68 | 69 | 70 | def reset_config(config, args): 71 | if args.gpus: 72 | config.GPUS = args.gpus 73 | if args.workers: 74 | config.WORKERS = args.workers 75 | 76 | 77 | def main(): 78 | args = parse_args() 79 | reset_config(config, args) 80 | 81 | logger, final_output_dir, tb_log_dir = create_logger( 82 | config, args.cfg, 'train') 83 | 84 | logger.info(pprint.pformat(args)) 85 | logger.info(pprint.pformat(config)) 86 | 87 | # cudnn related setting 88 | cudnn.benchmark = config.CUDNN.BENCHMARK 89 | torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC 90 | torch.backends.cudnn.enabled = config.CUDNN.ENABLED 91 | 92 | model = eval('models.'+config.MODEL.NAME+'.get_pose_net')( 93 | config, is_train=True 94 | ) 95 | 96 | # copy model file 97 | this_dir = os.path.dirname(__file__) 98 | shutil.copy2( 99 | os.path.join(this_dir, '../lib/models', config.MODEL.NAME + '.py'), 100 | final_output_dir) 101 | 102 | writer_dict = { 103 | 'writer': SummaryWriter(log_dir=tb_log_dir), 104 | 'train_global_steps': 0, 105 | 'valid_global_steps': 0, 106 | } 107 | 108 | dump_input = torch.rand((config.TRAIN.BATCH_SIZE, 109 | 3, 110 | config.MODEL.IMAGE_SIZE[1], 111 | config.MODEL.IMAGE_SIZE[0])) 112 | writer_dict['writer'].add_graph(model, (dump_input, ), verbose=False) 113 | 114 | gpus = [int(i) for i in config.GPUS.split(',')] 115 | model = torch.nn.DataParallel(model, device_ids=gpus).cuda() 116 | 117 | # define loss function (criterion) and optimizer 118 | criterion = JointsMSELoss( 119 | use_target_weight=config.LOSS.USE_TARGET_WEIGHT 120 | ).cuda() 121 | 122 | optimizer = get_optimizer(config, model) 123 | 124 | lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( 125 | optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR 126 | ) 127 | 128 | # Data loading code 129 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 130 | std=[0.229, 0.224, 0.225]) 131 | train_dataset = eval('dataset.'+config.DATASET.DATASET)( 132 | config, 133 | config.DATASET.ROOT, 134 | config.DATASET.TRAIN_SET, 135 | True, 136 | transforms.Compose([ 137 | transforms.ToTensor(), 138 | normalize, 139 | ]) 140 | ) 141 | valid_dataset = eval('dataset.'+config.DATASET.DATASET)( 142 | config, 143 | config.DATASET.ROOT, 144 | config.DATASET.TEST_SET, 145 | False, 146 | transforms.Compose([ 147 | transforms.ToTensor(), 148 | normalize, 149 | ]) 150 | ) 151 | 152 | train_loader = torch.utils.data.DataLoader( 153 | train_dataset, 154 | batch_size=config.TRAIN.BATCH_SIZE*len(gpus), 155 | shuffle=config.TRAIN.SHUFFLE, 156 | num_workers=config.WORKERS, 157 | pin_memory=True 158 | ) 159 | valid_loader = torch.utils.data.DataLoader( 160 | valid_dataset, 161 | batch_size=config.TEST.BATCH_SIZE*len(gpus), 162 | shuffle=False, 163 | num_workers=config.WORKERS, 164 | pin_memory=True 165 | ) 166 | 167 | best_perf = 0.0 168 | best_model = False 169 | for epoch in range(config.TRAIN.BEGIN_EPOCH, config.TRAIN.END_EPOCH): 170 | lr_scheduler.step() 171 | 172 | # train for one epoch 173 | train(config, train_loader, model, criterion, optimizer, epoch, 174 | final_output_dir, tb_log_dir, writer_dict) 175 | 176 | 177 | # evaluate on validation set 178 | perf_indicator = validate(config, valid_loader, valid_dataset, model, 179 | criterion, final_output_dir, tb_log_dir, 180 | writer_dict) 181 | 182 | if perf_indicator > best_perf: 183 | best_perf = perf_indicator 184 | best_model = True 185 | else: 186 | best_model = False 187 | 188 | logger.info('=> saving checkpoint to {}'.format(final_output_dir)) 189 | save_checkpoint({ 190 | 'epoch': epoch + 1, 191 | 'model': get_model_name(config), 192 | 'state_dict': model.state_dict(), 193 | 'perf': perf_indicator, 194 | 'optimizer': optimizer.state_dict(), 195 | }, best_model, final_output_dir) 196 | 197 | final_model_state_file = os.path.join(final_output_dir, 198 | 'final_state.pth.tar') 199 | logger.info('saving final model state to {}'.format( 200 | final_model_state_file)) 201 | torch.save(model.module.state_dict(), final_model_state_file) 202 | writer_dict['writer'].close() 203 | 204 | 205 | if __name__ == '__main__': 206 | main() 207 | -------------------------------------------------------------------------------- /pose_estimation/valid.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import argparse 13 | import os 14 | import pprint 15 | 16 | import torch 17 | import torch.nn.parallel 18 | import torch.backends.cudnn as cudnn 19 | import torch.optim 20 | import torch.utils.data 21 | import torch.utils.data.distributed 22 | import torchvision.transforms as transforms 23 | 24 | import _init_paths 25 | from core.config import config 26 | from core.config import update_config 27 | from core.config import update_dir 28 | from core.loss import JointsMSELoss 29 | from core.function import validate 30 | from utils.utils import create_logger 31 | 32 | import dataset 33 | import models 34 | 35 | 36 | def parse_args(): 37 | parser = argparse.ArgumentParser(description='Train keypoints network') 38 | # general 39 | parser.add_argument('--cfg', 40 | help='experiment configure file name', 41 | required=True, 42 | type=str) 43 | 44 | args, rest = parser.parse_known_args() 45 | # update config 46 | update_config(args.cfg) 47 | 48 | # training 49 | parser.add_argument('--frequent', 50 | help='frequency of logging', 51 | default=config.PRINT_FREQ, 52 | type=int) 53 | parser.add_argument('--gpus', 54 | help='gpus', 55 | type=str) 56 | parser.add_argument('--workers', 57 | help='num of dataloader workers', 58 | type=int) 59 | parser.add_argument('--model-file', 60 | help='model state file', 61 | type=str) 62 | parser.add_argument('--use-detect-bbox', 63 | help='use detect bbox', 64 | action='store_true') 65 | parser.add_argument('--flip-test', 66 | help='use flip test', 67 | action='store_true') 68 | parser.add_argument('--post-process', 69 | help='use post process', 70 | action='store_true') 71 | parser.add_argument('--shift-heatmap', 72 | help='shift heatmap', 73 | action='store_true') 74 | parser.add_argument('--coco-bbox-file', 75 | help='coco detection bbox file', 76 | type=str) 77 | 78 | args = parser.parse_args() 79 | 80 | return args 81 | 82 | 83 | def reset_config(config, args): 84 | if args.gpus: 85 | config.GPUS = args.gpus 86 | if args.workers: 87 | config.WORKERS = args.workers 88 | if args.use_detect_bbox: 89 | config.TEST.USE_GT_BBOX = not args.use_detect_bbox 90 | if args.flip_test: 91 | config.TEST.FLIP_TEST = args.flip_test 92 | if args.post_process: 93 | config.TEST.POST_PROCESS = args.post_process 94 | if args.shift_heatmap: 95 | config.TEST.SHIFT_HEATMAP = args.shift_heatmap 96 | if args.model_file: 97 | config.TEST.MODEL_FILE = args.model_file 98 | if args.coco_bbox_file: 99 | config.TEST.COCO_BBOX_FILE = args.coco_bbox_file 100 | 101 | 102 | def main(): 103 | args = parse_args() 104 | reset_config(config, args) 105 | 106 | logger, final_output_dir, tb_log_dir = create_logger( 107 | config, args.cfg, 'valid') 108 | 109 | logger.info(pprint.pformat(args)) 110 | logger.info(pprint.pformat(config)) 111 | 112 | # cudnn related setting 113 | cudnn.benchmark = config.CUDNN.BENCHMARK 114 | torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC 115 | torch.backends.cudnn.enabled = config.CUDNN.ENABLED 116 | 117 | model = eval('models.'+config.MODEL.NAME+'.get_pose_net')( 118 | config, is_train=False 119 | ) 120 | 121 | if config.TEST.MODEL_FILE: 122 | logger.info('=> loading model from {}'.format(config.TEST.MODEL_FILE)) 123 | model.load_state_dict(torch.load(config.TEST.MODEL_FILE)) 124 | else: 125 | model_state_file = os.path.join(final_output_dir, 126 | 'final_state.pth.tar') 127 | logger.info('=> loading model from {}'.format(model_state_file)) 128 | model.load_state_dict(torch.load(model_state_file)) 129 | 130 | gpus = [int(i) for i in config.GPUS.split(',')] 131 | model = torch.nn.DataParallel(model, device_ids=gpus).cuda() 132 | 133 | # define loss function (criterion) and optimizer 134 | criterion = JointsMSELoss( 135 | use_target_weight=config.LOSS.USE_TARGET_WEIGHT 136 | ).cuda() 137 | 138 | # Data loading code 139 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 140 | std=[0.229, 0.224, 0.225]) 141 | valid_dataset = eval('dataset.'+config.DATASET.DATASET)( 142 | config, 143 | config.DATASET.ROOT, 144 | config.DATASET.TEST_SET, 145 | False, 146 | transforms.Compose([ 147 | transforms.ToTensor(), 148 | normalize, 149 | ]) 150 | ) 151 | valid_loader = torch.utils.data.DataLoader( 152 | valid_dataset, 153 | batch_size=config.TEST.BATCH_SIZE*len(gpus), 154 | shuffle=False, 155 | num_workers=config.WORKERS, 156 | pin_memory=True 157 | ) 158 | 159 | # evaluate on validation set 160 | validate(config, valid_loader, valid_dataset, model, criterion, 161 | final_output_dir, tb_log_dir) 162 | 163 | 164 | if __name__ == '__main__': 165 | main() 166 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | EasyDict==1.7 2 | opencv-python==3.4.1.15 3 | Cython 4 | scipy 5 | pandas 6 | pyyaml 7 | json_tricks 8 | scikit-image 9 | tensorboardX>=1.2 10 | torchvision 11 | --------------------------------------------------------------------------------