├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── SECURITY.md
├── experiments
    ├── coco
    │   ├── resnet101
    │   │   ├── 256x192_d256x3_adam_lr1e-3.yaml
    │   │   ├── 256x192_d256x3_adam_lr1e-3_caffe.yaml
    │   │   └── 384x288_d256x3_adam_lr1e-3.yaml
    │   ├── resnet152
    │   │   ├── 256x192_d256x3_adam_lr1e-3.yaml
    │   │   ├── 256x192_d256x3_adam_lr1e-3_caffe.yaml
    │   │   └── 384x288_d256x3_adam_lr1e-3.yaml
    │   └── resnet50
    │   │   ├── 256x192_d256x3_adam_lr1e-3.yaml
    │   │   ├── 256x192_d256x3_adam_lr1e-3_caffe.yaml
    │   │   └── 384x288_d256x3_adam_lr1e-3.yaml
    └── mpii
    │   ├── resnet101
    │       ├── 256x256_d256x3_adam_lr1e-3.yaml
    │       └── 384x384_d256x3_adam_lr1e-3.yaml
    │   ├── resnet152
    │       ├── 256x256_d256x3_adam_lr1e-3.yaml
    │       └── 384x384_d256x3_adam_lr1e-3.yaml
    │   └── resnet50
    │       ├── 256x256_d256x3_adam_lr1e-3.yaml
    │       └── 384x384_d256x3_adam_lr1e-3.yaml
├── lib
    ├── Makefile
    ├── core
    │   ├── config.py
    │   ├── evaluate.py
    │   ├── function.py
    │   ├── inference.py
    │   └── loss.py
    ├── dataset
    │   ├── JointsDataset.py
    │   ├── __init__.py
    │   ├── coco.py
    │   └── mpii.py
    ├── models
    │   ├── __init__.py
    │   └── pose_resnet.py
    ├── nms
    │   ├── __init__.py
    │   ├── cpu_nms.pyx
    │   ├── gpu_nms.hpp
    │   ├── gpu_nms.pyx
    │   ├── nms.py
    │   ├── nms_kernel.cu
    │   └── setup.py
    └── utils
    │   ├── __init__.py
    │   ├── transforms.py
    │   ├── utils.py
    │   ├── vis.py
    │   └── zipreader.py
├── pose_estimation
    ├── _init_paths.py
    ├── train.py
    └── valid.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | parts/
 18 | sdist/
 19 | var/
 20 | wheels/
 21 | *.egg-info/
 22 | .installed.cfg
 23 | *.egg
 24 | MANIFEST
 25 | 
 26 | # PyInstaller
 27 | #  Usually these files are written by a python script from a template
 28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 29 | *.manifest
 30 | *.spec
 31 | 
 32 | # Installer logs
 33 | pip-log.txt
 34 | pip-delete-this-directory.txt
 35 | 
 36 | # Unit test / coverage reports
 37 | htmlcov/
 38 | .tox/
 39 | .coverage
 40 | .coverage.*
 41 | .cache
 42 | nosetests.xml
 43 | coverage.xml
 44 | *.cover
 45 | .hypothesis/
 46 | .pytest_cache/
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | local_settings.py
 55 | db.sqlite3
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # Environments
 83 | .env
 84 | .venv
 85 | env/
 86 | venv/
 87 | ENV/
 88 | env.bak/
 89 | venv.bak/
 90 | 
 91 | # Spyder project settings
 92 | .spyderproject
 93 | .spyproject
 94 | 
 95 | # Rope project settings
 96 | .ropeproject
 97 | 
 98 | # mkdocs documentation
 99 | /site
100 | 
101 | # mypy
102 | .mypy_cache/
103 | 
104 | /data
105 | /output
106 | /models
107 | /log
108 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Contributing
 3 | 
 4 | This project welcomes contributions and suggestions.  Most contributions require you to agree to a
 5 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
 6 | the rights to use your contribution. For details, visit https://cla.microsoft.com.
 7 | 
 8 | When you submit a pull request, a CLA-bot will automatically determine whether you need to provide
 9 | a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions
10 | provided by the bot. You will only need to do this once across all repos using our CLA.
11 | 
12 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
13 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
14 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation. All rights reserved.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Simple Baselines for Human Pose Estimation and Tracking
  2 | 
  3 | ## News
  4 | - Our new work [High-Resolution Representations for Labeling Pixels and Regions](https://arxiv.org/abs/1904.04514) is available at [HRNet](https://github.com/HRNet). Our HRNet has been applied to a wide range of vision tasks, such as [image classification](https://github.com/HRNet/HRNet-Image-Classification), [objection detection](https://github.com/HRNet/HRNet-Object-Detection), [semantic segmentation](https://github.com/HRNet/HRNet-Semantic-Segmentation) and [facial landmark](https://github.com/HRNet/HRNet-Facial-Landmark-Detection).
  5 | - Our new work [Deep High-Resolution Representation Learning for Human Pose Estimation](https://github.com/leoxiaobin/deep-high-resolution-net.pytorch) has already been released at <https://github.com/leoxiaobin/deep-high-resolution-net.pytorch>. The best single HRNet can obtain an **AP of 77.0** on COCO test-dev2017 dataset and **92.3% of PCKh@0.5** on MPII test set. The new repositoty also support the SimpleBaseline method, and you are welcomed to try it.<br>
  6 | - Our entry using this repo has won the winner of [PoseTrack2018 Multi-person Pose Tracking Challenge](https://posetrack.net/workshops/eccv2018/posetrack_eccv_2018_results.html)!<br>
  7 | - Our entry using this repo ranked 2nd place in the [keypoint detection task of COCO 2018](http://cocodataset.org/#keypoints-leaderboard)!
  8 | 
  9 | ## Introduction
 10 | This is an official pytorch implementation of [*Simple Baselines for Human Pose Estimation and Tracking*](https://arxiv.org/abs/1804.06208). This work provides baseline methods that are surprisingly simple and effective, thus helpful for inspiring and evaluating new ideas for the field. State-of-the-art results are achieved on challenging benchmarks. On COCO keypoints valid dataset, our best **single model** achieves **74.3 of mAP**. You can reproduce our results using this repo. All models are provided for research purpose.    </br>
 11 | 
 12 | ## Main Results
 13 | ### Results on MPII val
 14 | | Arch | Head | Shoulder | Elbow | Wrist | Hip | Knee | Ankle | Mean | Mean@0.1|
 15 | |---|---|---|---|---|---|---|---|---|---|
 16 | | 256x256_pose_resnet_50_d256d256d256 | 96.351 | 95.329 | 88.989 | 83.176 | 88.420 | 83.960 | 79.594 | 88.532 | 33.911 |
 17 | | 384x384_pose_resnet_50_d256d256d256 | 96.658 | 95.754 | 89.790 | 84.614 | 88.523 | 84.666 | 79.287 | 89.066 | 38.046 |
 18 | | 256x256_pose_resnet_101_d256d256d256 | 96.862 | 95.873 | 89.518 | 84.376 | 88.437 | 84.486 | 80.703 | 89.131 | 34.020 |
 19 | | 384x384_pose_resnet_101_d256d256d256 | 96.965 | 95.907 | 90.268 | 85.780 | 89.597 | 85.935 | 82.098 | 90.003 | 38.860 |
 20 | | 256x256_pose_resnet_152_d256d256d256 | 97.033 | 95.941 | 90.046 | 84.976 | 89.164 | 85.311 | 81.271 | 89.620 | 35.025 |
 21 | | 384x384_pose_resnet_152_d256d256d256 | 96.794 | 95.618 | 90.080 | 86.225 | 89.700 | 86.862 | 82.853 | 90.200 | 39.433 |
 22 | 
 23 | ### Note:
 24 | - Flip test is used.
 25 | 
 26 | ### Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
 27 | | Arch | AP | Ap .5 | AP .75 | AP (M) | AP (L) | AR | AR .5 | AR .75 | AR (M) | AR (L) |
 28 | |---|---|---|---|---|---|---|---|---|---|---|
 29 | | 256x192_pose_resnet_50_d256d256d256 | 0.704 | 0.886 | 0.783 | 0.671 | 0.772 | 0.763 | 0.929 | 0.834 | 0.721 | 0.824 |
 30 | | 384x288_pose_resnet_50_d256d256d256 | 0.722 | 0.893 | 0.789 | 0.681 | 0.797 | 0.776 | 0.932 | 0.838 | 0.728 | 0.846 |
 31 | | 256x192_pose_resnet_101_d256d256d256 | 0.714 | 0.893 | 0.793 | 0.681 | 0.781 | 0.771 | 0.934 | 0.840 | 0.730 | 0.832 |
 32 | | 384x288_pose_resnet_101_d256d256d256 | 0.736 | 0.896 | 0.803 | 0.699 | 0.811 | 0.791 | 0.936 | 0.851 | 0.745 | 0.858 |
 33 | | 256x192_pose_resnet_152_d256d256d256 | 0.720 | 0.893 | 0.798 | 0.687 | 0.789 | 0.778 | 0.934 | 0.846 | 0.736 | 0.839 |
 34 | | 384x288_pose_resnet_152_d256d256d256 | 0.743 | 0.896 | 0.811 | 0.705 | 0.816 | 0.797 | 0.937 | 0.858 | 0.751 | 0.863 |
 35 | 
 36 | 
 37 | ### Results on *Caffe-style* ResNet
 38 | | Arch | AP | Ap .5 | AP .75 | AP (M) | AP (L) | AR | AR .5 | AR .75 | AR (M) | AR (L) |
 39 | |---|---|---|---|---|---|---|---|---|---|---|
 40 | | 256x192_pose_resnet_50_*caffe*_d256d256d256 | 0.704 | 0.914 | 0.782 | 0.677 | 0.744 | 0.735 | 0.921 | 0.805 | 0.704 | 0.783 |
 41 | | 256x192_pose_resnet_101_*caffe*_d256d256d256 | 0.720 | 0.915 | 0.803 | 0.693 | 0.764 | 0.753 | 0.928 | 0.821 | 0.720 | 0.802 |
 42 | | 256x192_pose_resnet_152_*caffe*_d256d256d256 | 0.728 | 0.925 | 0.804 | 0.702 | 0.766 | 0.760 | 0.931 | 0.828 | 0.729 | 0.806 |
 43 | 
 44 | 
 45 | ### Note:
 46 | - Flip test is used.
 47 | - Person detector has person AP of 56.4 on COCO val2017 dataset.
 48 | - Difference between *PyTorch-style* and *Caffe-style* ResNet is the position of stride=2 convolution
 49 | 
 50 | ## Environment
 51 | The code is developed using python 3.6 on Ubuntu 16.04. NVIDIA GPUs are needed. The code is developed and tested using 4 NVIDIA P100 GPU cards. Other platforms or GPU cards are not fully tested.
 52 | 
 53 | ## Quick start
 54 | ### Installation
 55 | 1. Install pytorch >= v0.4.0 following [official instruction](https://pytorch.org/).
 56 | 2. Disable cudnn for batch_norm:
 57 |    ```
 58 |    # PYTORCH=/path/to/pytorch
 59 |    # for pytorch v0.4.0
 60 |    sed -i "1194s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py
 61 |    # for pytorch v0.4.1
 62 |    sed -i "1254s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py
 63 |    ```
 64 |    Note that instructions like # PYTORCH=/path/to/pytorch indicate that you should pick a path where you'd like to have pytorch installed  and then set an environment variable (PYTORCH in this case) accordingly.
 65 | 1. Clone this repo, and we'll call the directory that you cloned as ${POSE_ROOT}.
 66 | 2. Install dependencies:
 67 |    ```
 68 |    pip install -r requirements.txt
 69 |    ```
 70 | 3. Make libs:
 71 |    ```
 72 |    cd ${POSE_ROOT}/lib
 73 |    make
 74 |    ```
 75 | 3. Install [COCOAPI](https://github.com/cocodataset/cocoapi):
 76 |    ```
 77 |    # COCOAPI=/path/to/clone/cocoapi
 78 |    git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
 79 |    cd $COCOAPI/PythonAPI
 80 |    # Install into global site-packages
 81 |    make install
 82 |    # Alternatively, if you do not have permissions or prefer
 83 |    # not to install the COCO API into global site-packages
 84 |    python3 setup.py install --user
 85 |    ```
 86 |    Note that instructions like # COCOAPI=/path/to/install/cocoapi indicate that you should pick a path where you'd like to have the software cloned and then set an environment variable (COCOAPI in this case) accordingly.
 87 | 3. Download pytorch imagenet pretrained models from [pytorch model zoo](https://pytorch.org/docs/stable/model_zoo.html#module-torch.utils.model_zoo) and caffe-style pretrained models from [GoogleDrive](https://drive.google.com/drive/folders/1yJMSFOnmzwhA4YYQS71Uy7X1Kl_xq9fN?usp=sharing). 
 88 | 4. Download mpii and coco pretrained models from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blW0D5ZE4ArK9wk_fvw) or [GoogleDrive](https://drive.google.com/drive/folders/13_wJ6nC7my1KKouMkQMqyr9r1ZnLnukP?usp=sharing). Please download them under ${POSE_ROOT}/models/pytorch, and make them look like this:
 89 | 
 90 |    ```
 91 |    ${POSE_ROOT}
 92 |     `-- models
 93 |         `-- pytorch
 94 |             |-- imagenet
 95 |             |   |-- resnet50-19c8e357.pth
 96 |             |   |-- resnet50-caffe.pth.tar
 97 |             |   |-- resnet101-5d3b4d8f.pth
 98 |             |   |-- resnet101-caffe.pth.tar
 99 |             |   |-- resnet152-b121ed2d.pth
100 |             |   `-- resnet152-caffe.pth.tar
101 |             |-- pose_coco
102 |             |   |-- pose_resnet_101_256x192.pth.tar
103 |             |   |-- pose_resnet_101_384x288.pth.tar
104 |             |   |-- pose_resnet_152_256x192.pth.tar
105 |             |   |-- pose_resnet_152_384x288.pth.tar
106 |             |   |-- pose_resnet_50_256x192.pth.tar
107 |             |   `-- pose_resnet_50_384x288.pth.tar
108 |             `-- pose_mpii
109 |                 |-- pose_resnet_101_256x256.pth.tar
110 |                 |-- pose_resnet_101_384x384.pth.tar
111 |                 |-- pose_resnet_152_256x256.pth.tar
112 |                 |-- pose_resnet_152_384x384.pth.tar
113 |                 |-- pose_resnet_50_256x256.pth.tar
114 |                 `-- pose_resnet_50_384x384.pth.tar
115 | 
116 |    ```
117 | 
118 | 4. Init output(training model output directory) and log(tensorboard log directory) directory:
119 | 
120 |    ```
121 |    mkdir output 
122 |    mkdir log
123 |    ```
124 | 
125 |    Your directory tree should look like this:
126 | 
127 |    ```
128 |    ${POSE_ROOT}
129 |    ├── data
130 |    ├── experiments
131 |    ├── lib
132 |    ├── log
133 |    ├── models
134 |    ├── output
135 |    ├── pose_estimation
136 |    ├── README.md
137 |    └── requirements.txt
138 |    ```
139 |    
140 | ### Data preparation
141 | **For MPII data**, please download from [MPII Human Pose Dataset](http://human-pose.mpi-inf.mpg.de/). The original annotation files are in matlab format. We have converted them into json format, you also need to download them from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blW00SqrairNetmeVu4) or [GoogleDrive](https://drive.google.com/drive/folders/1En_VqmStnsXMdldXA6qpqEyDQulnmS3a?usp=sharing).
142 | Extract them under {POSE_ROOT}/data, and make them look like this:
143 | ```
144 | ${POSE_ROOT}
145 | |-- data
146 | `-- |-- mpii
147 |     `-- |-- annot
148 |         |   |-- gt_valid.mat
149 |         |   |-- test.json
150 |         |   |-- train.json
151 |         |   |-- trainval.json
152 |         |   `-- valid.json
153 |         `-- images
154 |             |-- 000001163.jpg
155 |             |-- 000003072.jpg
156 | ```
157 | 
158 | **For COCO data**, please download from [COCO download](http://cocodataset.org/#download), 2017 Train/Val is needed for COCO keypoints training and validation. We also provide person detection result of COCO val2017 to reproduce our multi-person pose estimation results. Please download from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blWzzDXoz5BeFl8sWM-) or [GoogleDrive](https://drive.google.com/drive/folders/1fRUDNUDxe9fjqcRZ2bnF_TKMlO0nB_dk?usp=sharing).
159 | Download and extract them under {POSE_ROOT}/data, and make them look like this:
160 | ```
161 | ${POSE_ROOT}
162 | |-- data
163 | `-- |-- coco
164 |     `-- |-- annotations
165 |         |   |-- person_keypoints_train2017.json
166 |         |   `-- person_keypoints_val2017.json
167 |         |-- person_detection_results
168 |         |   |-- COCO_val2017_detections_AP_H_56_person.json
169 |         `-- images
170 |             |-- train2017
171 |             |   |-- 000000000009.jpg
172 |             |   |-- 000000000025.jpg
173 |             |   |-- 000000000030.jpg
174 |             |   |-- ... 
175 |             `-- val2017
176 |                 |-- 000000000139.jpg
177 |                 |-- 000000000285.jpg
178 |                 |-- 000000000632.jpg
179 |                 |-- ... 
180 | ```
181 | 
182 | ### Valid on MPII using pretrained models
183 | 
184 | ```
185 | python pose_estimation/valid.py \
186 |     --cfg experiments/mpii/resnet50/256x256_d256x3_adam_lr1e-3.yaml \
187 |     --flip-test \
188 |     --model-file models/pytorch/pose_mpii/pose_resnet_50_256x256.pth.tar
189 | ```
190 | 
191 | ### Training on MPII
192 | 
193 | ```
194 | python pose_estimation/train.py \
195 |     --cfg experiments/mpii/resnet50/256x256_d256x3_adam_lr1e-3.yaml
196 | ```
197 | 
198 | ### Valid on COCO val2017 using pretrained models
199 | 
200 | ```
201 | python pose_estimation/valid.py \
202 |     --cfg experiments/coco/resnet50/256x192_d256x3_adam_lr1e-3.yaml \
203 |     --flip-test \
204 |     --model-file models/pytorch/pose_coco/pose_resnet_50_256x192.pth.tar
205 | ```
206 | 
207 | ### Training on COCO train2017
208 | 
209 | ```
210 | python pose_estimation/train.py \
211 |     --cfg experiments/coco/resnet50/256x192_d256x3_adam_lr1e-3.yaml
212 | ```
213 | 
214 | ### Other Implementations
215 | - TensorFlow [[Version1](https://github.com/mks0601/TF-SimpleHumanPose)]
216 | - PaddlePaddle [[Version1](https://github.com/PaddlePaddle/models/tree/develop/fluid/PaddleCV/human_pose_estimation)]
217 | - Gluon [[Version1](https://gluon-cv.mxnet.io/model_zoo/pose.html)]
218 | 
219 | ### Citation
220 | If you use our code or models in your research, please cite with:
221 | ```
222 | @inproceedings{xiao2018simple,
223 |     author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
224 |     title={Simple Baselines for Human Pose Estimation and Tracking},
225 |     booktitle = {European Conference on Computer Vision (ECCV)},
226 |     year = {2018}
227 | }
228 | ```
229 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.7 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->
42 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet101/256x192_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | GPUS: '0'
 2 | DATA_DIR: ''
 3 | OUTPUT_DIR: 'output'
 4 | LOG_DIR: 'log'
 5 | WORKERS: 4
 6 | PRINT_FREQ: 100
 7 | 
 8 | DATASET:
 9 |   DATASET: 'coco'
10 |   ROOT: 'data/coco/'
11 |   TEST_SET: 'val2017'
12 |   TRAIN_SET: 'train2017'
13 |   FLIP: true
14 |   ROT_FACTOR: 40
15 |   SCALE_FACTOR: 0.3
16 | MODEL:
17 |   NAME: 'pose_resnet'
18 |   PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth'
19 |   IMAGE_SIZE:
20 |   - 192
21 |   - 256
22 |   NUM_JOINTS: 17
23 |   EXTRA:
24 |     TARGET_TYPE: 'gaussian'
25 |     HEATMAP_SIZE:
26 |     - 48
27 |     - 64
28 |     SIGMA: 2
29 |     FINAL_CONV_KERNEL: 1
30 |     DECONV_WITH_BIAS: false
31 |     NUM_DECONV_LAYERS: 3
32 |     NUM_DECONV_FILTERS:
33 |     - 256
34 |     - 256
35 |     - 256
36 |     NUM_DECONV_KERNELS:
37 |     - 4
38 |     - 4
39 |     - 4
40 |     NUM_LAYERS: 101
41 | LOSS:
42 |   USE_TARGET_WEIGHT: true
43 | TRAIN:
44 |   BATCH_SIZE: 32
45 |   SHUFFLE: true
46 |   BEGIN_EPOCH: 0
47 |   END_EPOCH: 140
48 |   RESUME: false
49 |   OPTIMIZER: 'adam'
50 |   LR: 0.001
51 |   LR_FACTOR: 0.1
52 |   LR_STEP:
53 |   - 90
54 |   - 120
55 |   WD: 0.0001
56 |   GAMMA1: 0.99
57 |   GAMMA2: 0.0
58 |   MOMENTUM: 0.9
59 |   NESTEROV: false
60 | TEST:
61 |   BATCH_SIZE: 32
62 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
63 |   BBOX_THRE: 1.0
64 |   FLIP_TEST: false
65 |   IMAGE_THRE: 0.0
66 |   IN_VIS_THRE: 0.2
67 |   MODEL_FILE: ''
68 |   NMS_THRE: 1.0
69 |   OKS_THRE: 0.9
70 |   USE_GT_BBOX: true
71 | DEBUG:
72 |   DEBUG: true
73 |   SAVE_BATCH_IMAGES_GT: true
74 |   SAVE_BATCH_IMAGES_PRED: true
75 |   SAVE_HEATMAPS_GT: true
76 |   SAVE_HEATMAPS_PRED: true
77 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet101/256x192_d256x3_adam_lr1e-3_caffe.yaml:
--------------------------------------------------------------------------------
 1 | GPUS: '0'
 2 | DATA_DIR: ''
 3 | OUTPUT_DIR: 'output'
 4 | LOG_DIR: 'log'
 5 | WORKERS: 4
 6 | PRINT_FREQ: 100
 7 | 
 8 | DATASET:
 9 |   DATASET: 'coco'
10 |   ROOT: 'data/coco/'
11 |   TEST_SET: 'val2017'
12 |   TRAIN_SET: 'train2017'
13 |   FLIP: true
14 |   ROT_FACTOR: 40
15 |   SCALE_FACTOR: 0.3
16 | MODEL:
17 |   NAME: 'pose_resnet'
18 |   PRETRAINED: 'models/pytorch/imagenet/resnet101-caffe.pth.tar'
19 |   STYLE: 'caffe'
20 |   IMAGE_SIZE:
21 |   - 192
22 |   - 256
23 |   NUM_JOINTS: 17
24 |   EXTRA:
25 |     TARGET_TYPE: 'gaussian'
26 |     HEATMAP_SIZE:
27 |     - 48
28 |     - 64
29 |     SIGMA: 2
30 |     FINAL_CONV_KERNEL: 1
31 |     DECONV_WITH_BIAS: false
32 |     NUM_DECONV_LAYERS: 3
33 |     NUM_DECONV_FILTERS:
34 |     - 256
35 |     - 256
36 |     - 256
37 |     NUM_DECONV_KERNELS:
38 |     - 4
39 |     - 4
40 |     - 4
41 |     NUM_LAYERS: 101
42 | LOSS:
43 |   USE_TARGET_WEIGHT: true
44 | TRAIN:
45 |   BATCH_SIZE: 32
46 |   SHUFFLE: true
47 |   BEGIN_EPOCH: 0
48 |   END_EPOCH: 140
49 |   RESUME: false
50 |   OPTIMIZER: 'adam'
51 |   LR: 0.001
52 |   LR_FACTOR: 0.1
53 |   LR_STEP:
54 |   - 90
55 |   - 120
56 |   WD: 0.0001
57 |   GAMMA1: 0.99
58 |   GAMMA2: 0.0
59 |   MOMENTUM: 0.9
60 |   NESTEROV: false
61 | TEST:
62 |   BATCH_SIZE: 32
63 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
64 |   BBOX_THRE: 1.0
65 |   FLIP_TEST: false
66 |   IMAGE_THRE: 0.0
67 |   IN_VIS_THRE: 0.2
68 |   MODEL_FILE: ''
69 |   NMS_THRE: 1.0
70 |   OKS_THRE: 0.9
71 |   USE_GT_BBOX: true
72 | DEBUG:
73 |   DEBUG: true
74 |   SAVE_BATCH_IMAGES_GT: true
75 |   SAVE_BATCH_IMAGES_PRED: true
76 |   SAVE_HEATMAPS_GT: true
77 |   SAVE_HEATMAPS_PRED: true
78 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet101/384x288_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | GPUS: '0'
 2 | DATA_DIR: ''
 3 | OUTPUT_DIR: 'output'
 4 | LOG_DIR: 'log'
 5 | WORKERS: 4
 6 | PRINT_FREQ: 100
 7 | 
 8 | DATASET:
 9 |   DATASET: 'coco'
10 |   ROOT: 'data/coco/'
11 |   TEST_SET: 'val2017'
12 |   TRAIN_SET: 'train2017'
13 |   FLIP: true
14 |   ROT_FACTOR: 40
15 |   SCALE_FACTOR: 0.3
16 | MODEL:
17 |   NAME: 'pose_resnet'
18 |   PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth'
19 |   IMAGE_SIZE:
20 |   - 288
21 |   - 384
22 |   NUM_JOINTS: 17
23 |   EXTRA:
24 |     TARGET_TYPE: 'gaussian'
25 |     HEATMAP_SIZE:
26 |     - 72
27 |     - 96
28 |     SIGMA: 3
29 |     FINAL_CONV_KERNEL: 1
30 |     DECONV_WITH_BIAS: false
31 |     NUM_DECONV_LAYERS: 3
32 |     NUM_DECONV_FILTERS:
33 |     - 256
34 |     - 256
35 |     - 256
36 |     NUM_DECONV_KERNELS:
37 |     - 4
38 |     - 4
39 |     - 4
40 |     NUM_LAYERS: 101
41 | LOSS:
42 |   USE_TARGET_WEIGHT: true
43 | TRAIN:
44 |   BATCH_SIZE: 32
45 |   SHUFFLE: true
46 |   BEGIN_EPOCH: 0
47 |   END_EPOCH: 140
48 |   RESUME: false
49 |   OPTIMIZER: 'adam'
50 |   LR: 0.001
51 |   LR_FACTOR: 0.1
52 |   LR_STEP:
53 |   - 90
54 |   - 120
55 |   WD: 0.0001
56 |   GAMMA1: 0.99
57 |   GAMMA2: 0.0
58 |   MOMENTUM: 0.9
59 |   NESTEROV: false
60 | TEST:
61 |   BATCH_SIZE: 32
62 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
63 |   BBOX_THRE: 1.0
64 |   FLIP_TEST: false
65 |   IMAGE_THRE: 0.0
66 |   IN_VIS_THRE: 0.2
67 |   MODEL_FILE: ''
68 |   NMS_THRE: 1.0
69 |   OKS_THRE: 0.9
70 |   USE_GT_BBOX: true
71 | DEBUG:
72 |   DEBUG: true
73 |   SAVE_BATCH_IMAGES_GT: true
74 |   SAVE_BATCH_IMAGES_PRED: true
75 |   SAVE_HEATMAPS_GT: true
76 |   SAVE_HEATMAPS_PRED: true
77 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet152/256x192_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | GPUS: '0'
 2 | DATA_DIR: ''
 3 | OUTPUT_DIR: 'output'
 4 | LOG_DIR: 'log'
 5 | WORKERS: 4
 6 | PRINT_FREQ: 100
 7 | 
 8 | DATASET:
 9 |   DATASET: 'coco'
10 |   ROOT: 'data/coco/'
11 |   TEST_SET: 'val2017'
12 |   TRAIN_SET: 'train2017'
13 |   FLIP: true
14 |   ROT_FACTOR: 40
15 |   SCALE_FACTOR: 0.3
16 | MODEL:
17 |   NAME: 'pose_resnet'
18 |   PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth'
19 |   IMAGE_SIZE:
20 |   - 192
21 |   - 256
22 |   NUM_JOINTS: 17
23 |   EXTRA:
24 |     TARGET_TYPE: 'gaussian'
25 |     HEATMAP_SIZE:
26 |     - 48
27 |     - 64
28 |     SIGMA: 2
29 |     FINAL_CONV_KERNEL: 1
30 |     DECONV_WITH_BIAS: false
31 |     NUM_DECONV_LAYERS: 3
32 |     NUM_DECONV_FILTERS:
33 |     - 256
34 |     - 256
35 |     - 256
36 |     NUM_DECONV_KERNELS:
37 |     - 4
38 |     - 4
39 |     - 4
40 |     NUM_LAYERS: 152
41 | LOSS:
42 |   USE_TARGET_WEIGHT: true
43 | TRAIN:
44 |   BATCH_SIZE: 32
45 |   SHUFFLE: true
46 |   BEGIN_EPOCH: 0
47 |   END_EPOCH: 140
48 |   RESUME: false
49 |   OPTIMIZER: 'adam'
50 |   LR: 0.001
51 |   LR_FACTOR: 0.1
52 |   LR_STEP:
53 |   - 90
54 |   - 120
55 |   WD: 0.0001
56 |   GAMMA1: 0.99
57 |   GAMMA2: 0.0
58 |   MOMENTUM: 0.9
59 |   NESTEROV: false
60 | TEST:
61 |   BATCH_SIZE: 32
62 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
63 |   BBOX_THRE: 1.0
64 |   FLIP_TEST: false
65 |   IMAGE_THRE: 0.0
66 |   IN_VIS_THRE: 0.2
67 |   MODEL_FILE: ''
68 |   NMS_THRE: 1.0
69 |   OKS_THRE: 0.9
70 |   USE_GT_BBOX: true
71 | DEBUG:
72 |   DEBUG: true
73 |   SAVE_BATCH_IMAGES_GT: true
74 |   SAVE_BATCH_IMAGES_PRED: true
75 |   SAVE_HEATMAPS_GT: true
76 |   SAVE_HEATMAPS_PRED: true
77 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet152/256x192_d256x3_adam_lr1e-3_caffe.yaml:
--------------------------------------------------------------------------------
 1 | GPUS: '0'
 2 | DATA_DIR: ''
 3 | OUTPUT_DIR: 'output'
 4 | LOG_DIR: 'log'
 5 | WORKERS: 4
 6 | PRINT_FREQ: 100
 7 | 
 8 | DATASET:
 9 |   DATASET: 'coco'
10 |   ROOT: 'data/coco/'
11 |   TEST_SET: 'val2017'
12 |   TRAIN_SET: 'train2017'
13 |   FLIP: true
14 |   ROT_FACTOR: 40
15 |   SCALE_FACTOR: 0.3
16 | MODEL:
17 |   NAME: 'pose_resnet'
18 |   PRETRAINED: 'models/pytorch/imagenet/resnet152-caffe.pth.tar'
19 |   STYLE: 'caffe'
20 |   IMAGE_SIZE:
21 |   - 192
22 |   - 256
23 |   NUM_JOINTS: 17
24 |   EXTRA:
25 |     TARGET_TYPE: 'gaussian'
26 |     HEATMAP_SIZE:
27 |     - 48
28 |     - 64
29 |     SIGMA: 2
30 |     FINAL_CONV_KERNEL: 1
31 |     DECONV_WITH_BIAS: false
32 |     NUM_DECONV_LAYERS: 3
33 |     NUM_DECONV_FILTERS:
34 |     - 256
35 |     - 256
36 |     - 256
37 |     NUM_DECONV_KERNELS:
38 |     - 4
39 |     - 4
40 |     - 4
41 |     NUM_LAYERS: 152
42 | LOSS:
43 |   USE_TARGET_WEIGHT: true
44 | TRAIN:
45 |   BATCH_SIZE: 32
46 |   SHUFFLE: true
47 |   BEGIN_EPOCH: 0
48 |   END_EPOCH: 140
49 |   RESUME: false
50 |   OPTIMIZER: 'adam'
51 |   LR: 0.001
52 |   LR_FACTOR: 0.1
53 |   LR_STEP:
54 |   - 90
55 |   - 120
56 |   WD: 0.0001
57 |   GAMMA1: 0.99
58 |   GAMMA2: 0.0
59 |   MOMENTUM: 0.9
60 |   NESTEROV: false
61 | TEST:
62 |   BATCH_SIZE: 32
63 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
64 |   BBOX_THRE: 1.0
65 |   FLIP_TEST: false
66 |   IMAGE_THRE: 0.0
67 |   IN_VIS_THRE: 0.2
68 |   MODEL_FILE: ''
69 |   NMS_THRE: 1.0
70 |   OKS_THRE: 0.9
71 |   USE_GT_BBOX: true
72 | DEBUG:
73 |   DEBUG: true
74 |   SAVE_BATCH_IMAGES_GT: true
75 |   SAVE_BATCH_IMAGES_PRED: true
76 |   SAVE_HEATMAPS_GT: true
77 |   SAVE_HEATMAPS_PRED: true
78 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet152/384x288_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | GPUS: '0'
 2 | DATA_DIR: ''
 3 | OUTPUT_DIR: 'output'
 4 | LOG_DIR: 'log'
 5 | WORKERS: 4
 6 | PRINT_FREQ: 100
 7 | 
 8 | DATASET:
 9 |   DATASET: 'coco'
10 |   ROOT: 'data/coco/'
11 |   TEST_SET: 'val2017'
12 |   TRAIN_SET: 'train2017'
13 |   FLIP: true
14 |   ROT_FACTOR: 40
15 |   SCALE_FACTOR: 0.3
16 | MODEL:
17 |   NAME: 'pose_resnet'
18 |   PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth'
19 |   IMAGE_SIZE:
20 |   - 288
21 |   - 384
22 |   NUM_JOINTS: 17
23 |   EXTRA:
24 |     TARGET_TYPE: 'gaussian'
25 |     HEATMAP_SIZE:
26 |     - 72
27 |     - 96
28 |     SIGMA: 3
29 |     FINAL_CONV_KERNEL: 1
30 |     DECONV_WITH_BIAS: false
31 |     NUM_DECONV_LAYERS: 3
32 |     NUM_DECONV_FILTERS:
33 |     - 256
34 |     - 256
35 |     - 256
36 |     NUM_DECONV_KERNELS:
37 |     - 4
38 |     - 4
39 |     - 4
40 |     NUM_LAYERS: 152
41 | LOSS:
42 |   USE_TARGET_WEIGHT: true
43 | TRAIN:
44 |   BATCH_SIZE: 32
45 |   SHUFFLE: true
46 |   BEGIN_EPOCH: 0
47 |   END_EPOCH: 140
48 |   RESUME: false
49 |   OPTIMIZER: 'adam'
50 |   LR: 0.001
51 |   LR_FACTOR: 0.1
52 |   LR_STEP:
53 |   - 90
54 |   - 120
55 |   WD: 0.0001
56 |   GAMMA1: 0.99
57 |   GAMMA2: 0.0
58 |   MOMENTUM: 0.9
59 |   NESTEROV: false
60 | TEST:
61 |   BATCH_SIZE: 32
62 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
63 |   BBOX_THRE: 1.0
64 |   FLIP_TEST: false
65 |   IMAGE_THRE: 0.0
66 |   IN_VIS_THRE: 0.2
67 |   MODEL_FILE: ''
68 |   NMS_THRE: 1.0
69 |   OKS_THRE: 0.9
70 |   USE_GT_BBOX: true
71 | DEBUG:
72 |   DEBUG: true
73 |   SAVE_BATCH_IMAGES_GT: true
74 |   SAVE_BATCH_IMAGES_PRED: true
75 |   SAVE_HEATMAPS_GT: true
76 |   SAVE_HEATMAPS_PRED: true
77 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet50/256x192_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | GPUS: '0'
 2 | DATA_DIR: ''
 3 | OUTPUT_DIR: 'output'
 4 | LOG_DIR: 'log'
 5 | WORKERS: 4
 6 | PRINT_FREQ: 100
 7 | 
 8 | DATASET:
 9 |   DATASET: 'coco'
10 |   ROOT: 'data/coco/'
11 |   TEST_SET: 'val2017'
12 |   TRAIN_SET: 'train2017'
13 |   FLIP: true
14 |   ROT_FACTOR: 40
15 |   SCALE_FACTOR: 0.3
16 | MODEL:
17 |   NAME: 'pose_resnet'
18 |   PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth'
19 |   IMAGE_SIZE:
20 |   - 192
21 |   - 256
22 |   NUM_JOINTS: 17
23 |   EXTRA:
24 |     TARGET_TYPE: 'gaussian'
25 |     HEATMAP_SIZE:
26 |     - 48
27 |     - 64
28 |     SIGMA: 2
29 |     FINAL_CONV_KERNEL: 1
30 |     DECONV_WITH_BIAS: false
31 |     NUM_DECONV_LAYERS: 3
32 |     NUM_DECONV_FILTERS:
33 |     - 256
34 |     - 256
35 |     - 256
36 |     NUM_DECONV_KERNELS:
37 |     - 4
38 |     - 4
39 |     - 4
40 |     NUM_LAYERS: 50
41 | LOSS:
42 |   USE_TARGET_WEIGHT: true
43 | TRAIN:
44 |   BATCH_SIZE: 32
45 |   SHUFFLE: true
46 |   BEGIN_EPOCH: 0
47 |   END_EPOCH: 140
48 |   RESUME: false
49 |   OPTIMIZER: 'adam'
50 |   LR: 0.001
51 |   LR_FACTOR: 0.1
52 |   LR_STEP:
53 |   - 90
54 |   - 120 
55 |   WD: 0.0001
56 |   GAMMA1: 0.99
57 |   GAMMA2: 0.0
58 |   MOMENTUM: 0.9
59 |   NESTEROV: false
60 | TEST:
61 |   BATCH_SIZE: 32
62 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
63 |   BBOX_THRE: 1.0
64 |   FLIP_TEST: false
65 |   IMAGE_THRE: 0.0
66 |   IN_VIS_THRE: 0.2
67 |   MODEL_FILE: ''
68 |   NMS_THRE: 1.0
69 |   OKS_THRE: 0.9
70 |   USE_GT_BBOX: true
71 | DEBUG:
72 |   DEBUG: true
73 |   SAVE_BATCH_IMAGES_GT: true
74 |   SAVE_BATCH_IMAGES_PRED: true
75 |   SAVE_HEATMAPS_GT: true
76 |   SAVE_HEATMAPS_PRED: true
77 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet50/256x192_d256x3_adam_lr1e-3_caffe.yaml:
--------------------------------------------------------------------------------
 1 | GPUS: '0'
 2 | DATA_DIR: ''
 3 | OUTPUT_DIR: 'output'
 4 | LOG_DIR: 'log'
 5 | WORKERS: 4
 6 | PRINT_FREQ: 100
 7 | 
 8 | DATASET:
 9 |   DATASET: 'coco'
10 |   ROOT: 'data/coco/'
11 |   TEST_SET: 'val2017'
12 |   TRAIN_SET: 'train2017'
13 |   FLIP: true
14 |   ROT_FACTOR: 40
15 |   SCALE_FACTOR: 0.3
16 | MODEL:
17 |   NAME: 'pose_resnet'
18 |   PRETRAINED: 'models/pytorch/imagenet/resnet50-caffe.pth.tar'
19 |   STYLE: 'caffe'
20 |   IMAGE_SIZE:
21 |   - 192
22 |   - 256
23 |   NUM_JOINTS: 17
24 |   EXTRA:
25 |     TARGET_TYPE: 'gaussian'
26 |     HEATMAP_SIZE:
27 |     - 48
28 |     - 64
29 |     SIGMA: 2
30 |     FINAL_CONV_KERNEL: 1
31 |     DECONV_WITH_BIAS: false
32 |     NUM_DECONV_LAYERS: 3
33 |     NUM_DECONV_FILTERS:
34 |     - 256
35 |     - 256
36 |     - 256
37 |     NUM_DECONV_KERNELS:
38 |     - 4
39 |     - 4
40 |     - 4
41 |     NUM_LAYERS: 50
42 | LOSS:
43 |   USE_TARGET_WEIGHT: true
44 | TRAIN:
45 |   BATCH_SIZE: 32
46 |   SHUFFLE: true
47 |   BEGIN_EPOCH: 0
48 |   END_EPOCH: 140
49 |   RESUME: false
50 |   OPTIMIZER: 'adam'
51 |   LR: 0.001
52 |   LR_FACTOR: 0.1
53 |   LR_STEP:
54 |   - 90
55 |   - 120 
56 |   WD: 0.0001
57 |   GAMMA1: 0.99
58 |   GAMMA2: 0.0
59 |   MOMENTUM: 0.9
60 |   NESTEROV: false
61 | TEST:
62 |   BATCH_SIZE: 32
63 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
64 |   BBOX_THRE: 1.0
65 |   FLIP_TEST: false
66 |   IMAGE_THRE: 0.0
67 |   IN_VIS_THRE: 0.2
68 |   MODEL_FILE: ''
69 |   NMS_THRE: 1.0
70 |   OKS_THRE: 0.9
71 |   USE_GT_BBOX: true
72 | DEBUG:
73 |   DEBUG: true
74 |   SAVE_BATCH_IMAGES_GT: true
75 |   SAVE_BATCH_IMAGES_PRED: true
76 |   SAVE_HEATMAPS_GT: true
77 |   SAVE_HEATMAPS_PRED: true
78 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet50/384x288_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | GPUS: '0'
 2 | DATA_DIR: ''
 3 | OUTPUT_DIR: 'output'
 4 | LOG_DIR: 'log'
 5 | WORKERS: 4
 6 | PRINT_FREQ: 100
 7 | 
 8 | DATASET:
 9 |   DATASET: 'coco'
10 |   ROOT: 'data/coco/'
11 |   TEST_SET: 'val2017'
12 |   TRAIN_SET: 'train2017'
13 |   FLIP: true
14 |   ROT_FACTOR: 40
15 |   SCALE_FACTOR: 0.3
16 | MODEL:
17 |   NAME: 'pose_resnet'
18 |   PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth'
19 |   IMAGE_SIZE:
20 |   - 288
21 |   - 384
22 |   NUM_JOINTS: 17
23 |   EXTRA:
24 |     TARGET_TYPE: 'gaussian'
25 |     HEATMAP_SIZE:
26 |     - 72
27 |     - 96
28 |     SIGMA: 3
29 |     FINAL_CONV_KERNEL: 1
30 |     DECONV_WITH_BIAS: false
31 |     NUM_DECONV_LAYERS: 3
32 |     NUM_DECONV_FILTERS:
33 |     - 256
34 |     - 256
35 |     - 256
36 |     NUM_DECONV_KERNELS:
37 |     - 4
38 |     - 4
39 |     - 4
40 |     NUM_LAYERS: 50
41 | LOSS:
42 |   USE_TARGET_WEIGHT: true
43 | TRAIN:
44 |   BATCH_SIZE: 32
45 |   SHUFFLE: true
46 |   BEGIN_EPOCH: 0
47 |   END_EPOCH: 140
48 |   RESUME: false
49 |   OPTIMIZER: 'adam'
50 |   LR: 0.001
51 |   LR_FACTOR: 0.1
52 |   LR_STEP:
53 |   - 90
54 |   - 120
55 |   WD: 0.0001
56 |   GAMMA1: 0.99
57 |   GAMMA2: 0.0
58 |   MOMENTUM: 0.9
59 |   NESTEROV: false
60 | TEST:
61 |   BATCH_SIZE: 32
62 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
63 |   BBOX_THRE: 1.0
64 |   FLIP_TEST: false
65 |   IMAGE_THRE: 0.0
66 |   IN_VIS_THRE: 0.2
67 |   MODEL_FILE: ''
68 |   NMS_THRE: 1.0
69 |   OKS_THRE: 0.9
70 |   USE_GT_BBOX: true
71 | DEBUG:
72 |   DEBUG: true
73 |   SAVE_BATCH_IMAGES_GT: true
74 |   SAVE_BATCH_IMAGES_PRED: true
75 |   SAVE_HEATMAPS_GT: true
76 |   SAVE_HEATMAPS_PRED: true
77 | 


--------------------------------------------------------------------------------
/experiments/mpii/resnet101/256x256_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | GPUS: '0'
 2 | DATA_DIR: ''
 3 | OUTPUT_DIR: 'output'
 4 | LOG_DIR: 'log'
 5 | WORKERS: 4
 6 | PRINT_FREQ: 100
 7 | CUDNN:
 8 |   BENCHMARK: True
 9 |   DETERMINISTIC: False
10 |   ENABLED: True
11 | DATASET:
12 |   DATASET: mpii
13 |   ROOT: 'data/mpii/'
14 |   TEST_SET: valid
15 |   TRAIN_SET: train
16 |   FLIP: true
17 |   ROT_FACTOR: 30
18 |   SCALE_FACTOR: 0.25
19 | MODEL:
20 |   NAME: pose_resnet
21 |   PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth'
22 |   IMAGE_SIZE:
23 |   - 256
24 |   - 256
25 |   NUM_JOINTS: 16
26 |   EXTRA:
27 |     TARGET_TYPE: gaussian
28 |     SIGMA: 2
29 |     HEATMAP_SIZE:
30 |     - 64
31 |     - 64
32 |     FINAL_CONV_KERNEL: 1
33 |     DECONV_WITH_BIAS: false
34 |     NUM_DECONV_LAYERS: 3
35 |     NUM_DECONV_FILTERS:
36 |     - 256
37 |     - 256
38 |     - 256
39 |     NUM_DECONV_KERNELS:
40 |     - 4
41 |     - 4
42 |     - 4
43 |     NUM_LAYERS: 101
44 | LOSS:
45 |   USE_TARGET_WEIGHT: true
46 | TRAIN:
47 |   BATCH_SIZE: 32
48 |   SHUFFLE: true
49 |   BEGIN_EPOCH: 0
50 |   END_EPOCH: 140
51 |   RESUME: false
52 |   OPTIMIZER: adam
53 |   LR: 0.001
54 |   LR_FACTOR: 0.1
55 |   LR_STEP:
56 |   - 90
57 |   - 120
58 |   WD: 0.0001
59 |   GAMMA1: 0.99
60 |   GAMMA2: 0.0
61 |   MOMENTUM: 0.9
62 |   NESTEROV: false
63 | TEST:
64 |   BATCH_SIZE: 32
65 |   FLIP_TEST: false
66 |   MODEL_FILE: ''
67 | DEBUG:
68 |   DEBUG: false
69 |   SAVE_BATCH_IMAGES_GT: true
70 |   SAVE_BATCH_IMAGES_PRED: true
71 |   SAVE_HEATMAPS_GT: true
72 |   SAVE_HEATMAPS_PRED: true
73 | 


--------------------------------------------------------------------------------
/experiments/mpii/resnet101/384x384_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | GPUS: '0'
 2 | DATA_DIR: ''
 3 | OUTPUT_DIR: 'output'
 4 | LOG_DIR: 'log'
 5 | WORKERS: 4
 6 | PRINT_FREQ: 100
 7 | 
 8 | DATASET:
 9 |   DATASET: mpii
10 |   ROOT: 'data/mpii/'
11 |   TEST_SET: valid
12 |   TRAIN_SET: train
13 |   FLIP: true
14 |   ROT_FACTOR: 30
15 |   SCALE_FACTOR: 0.25
16 | MODEL:
17 |   NAME: pose_resnet
18 |   PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth'
19 |   IMAGE_SIZE:
20 |   - 384
21 |   - 384
22 |   NUM_JOINTS: 16
23 |   EXTRA:
24 |     TARGET_TYPE: gaussian
25 |     HEATMAP_SIZE:
26 |     - 96
27 |     - 96
28 |     SIGMA: 3
29 |     FINAL_CONV_KERNEL: 1
30 |     DECONV_WITH_BIAS: false
31 |     NUM_DECONV_LAYERS: 3
32 |     NUM_DECONV_FILTERS:
33 |     - 256
34 |     - 256
35 |     - 256
36 |     NUM_DECONV_KERNELS:
37 |     - 4
38 |     - 4
39 |     - 4
40 |     NUM_LAYERS: 101
41 | LOSS:
42 |   USE_TARGET_WEIGHT: true
43 | TRAIN:
44 |   BATCH_SIZE: 32
45 |   SHUFFLE: true
46 |   BEGIN_EPOCH: 0
47 |   END_EPOCH: 140
48 |   RESUME: false
49 |   OPTIMIZER: adam
50 |   LR: 0.001
51 |   LR_FACTOR: 0.1
52 |   LR_STEP:
53 |   - 90
54 |   - 120
55 |   WD: 0.0001
56 |   GAMMA1: 0.99
57 |   GAMMA2: 0.0
58 |   MOMENTUM: 0.9
59 |   NESTEROV: false
60 | TEST:
61 |   BATCH_SIZE: 32
62 |   FLIP_TEST: false
63 |   MODEL_FILE: ''
64 | DEBUG:
65 |   DEBUG: false
66 |   SAVE_BATCH_IMAGES_GT: true
67 |   SAVE_BATCH_IMAGES_PRED: true
68 |   SAVE_HEATMAPS_GT: true
69 |   SAVE_HEATMAPS_PRED: true
70 | 


--------------------------------------------------------------------------------
/experiments/mpii/resnet152/256x256_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | GPUS: '0'
 2 | DATA_DIR: ''
 3 | OUTPUT_DIR: 'output'
 4 | LOG_DIR: 'log'
 5 | WORKERS: 4
 6 | PRINT_FREQ: 100
 7 | CUDNN:
 8 |   BENCHMARK: True
 9 |   DETERMINISTIC: False
10 |   ENABLED: True
11 | DATASET:
12 |   DATASET: mpii
13 |   ROOT: 'data/mpii/'
14 |   TEST_SET: valid
15 |   TRAIN_SET: train
16 |   FLIP: true
17 |   ROT_FACTOR: 30
18 |   SCALE_FACTOR: 0.25
19 | MODEL:
20 |   NAME: pose_resnet
21 |   PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth'
22 |   IMAGE_SIZE:
23 |   - 256
24 |   - 256
25 |   NUM_JOINTS: 16
26 |   EXTRA:
27 |     TARGET_TYPE: gaussian
28 |     SIGMA: 2
29 |     HEATMAP_SIZE:
30 |     - 64
31 |     - 64
32 |     FINAL_CONV_KERNEL: 1
33 |     DECONV_WITH_BIAS: false
34 |     NUM_DECONV_LAYERS: 3
35 |     NUM_DECONV_FILTERS:
36 |     - 256
37 |     - 256
38 |     - 256
39 |     NUM_DECONV_KERNELS:
40 |     - 4
41 |     - 4
42 |     - 4
43 |     NUM_LAYERS: 152
44 | LOSS:
45 |   USE_TARGET_WEIGHT: true
46 | TRAIN:
47 |   BATCH_SIZE: 32
48 |   SHUFFLE: true
49 |   BEGIN_EPOCH: 0
50 |   END_EPOCH: 140
51 |   RESUME: false
52 |   OPTIMIZER: adam
53 |   LR: 0.001
54 |   LR_FACTOR: 0.1
55 |   LR_STEP:
56 |   - 90
57 |   - 120
58 |   WD: 0.0001
59 |   GAMMA1: 0.99
60 |   GAMMA2: 0.0
61 |   MOMENTUM: 0.9
62 |   NESTEROV: false
63 | TEST:
64 |   BATCH_SIZE: 32
65 |   FLIP_TEST: false
66 |   MODEL_FILE: ''
67 | DEBUG:
68 |   DEBUG: false
69 |   SAVE_BATCH_IMAGES_GT: true
70 |   SAVE_BATCH_IMAGES_PRED: true
71 |   SAVE_HEATMAPS_GT: true
72 |   SAVE_HEATMAPS_PRED: true
73 | 


--------------------------------------------------------------------------------
/experiments/mpii/resnet152/384x384_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | GPUS: '0'
 2 | DATA_DIR: ''
 3 | OUTPUT_DIR: 'output'
 4 | LOG_DIR: 'log'
 5 | WORKERS: 4
 6 | PRINT_FREQ: 100
 7 | CUDNN:
 8 |   BENCHMARK: True
 9 |   DETERMINISTIC: False
10 |   ENABLED: True
11 | DATASET:
12 |   DATASET: mpii
13 |   ROOT: 'data/mpii/'
14 |   TEST_SET: valid
15 |   TRAIN_SET: train
16 |   FLIP: true
17 |   ROT_FACTOR: 30
18 |   SCALE_FACTOR: 0.25
19 | MODEL:
20 |   NAME: pose_resnet
21 |   PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth'
22 |   IMAGE_SIZE:
23 |   - 384
24 |   - 384
25 |   NUM_JOINTS: 16
26 |   EXTRA:
27 |     TARGET_TYPE: gaussian
28 |     SIGMA: 3
29 |     HEATMAP_SIZE:
30 |     - 96
31 |     - 96
32 |     FINAL_CONV_KERNEL: 1
33 |     DECONV_WITH_BIAS: false
34 |     NUM_DECONV_LAYERS: 3
35 |     NUM_DECONV_FILTERS:
36 |     - 256
37 |     - 256
38 |     - 256
39 |     NUM_DECONV_KERNELS:
40 |     - 4
41 |     - 4
42 |     - 4
43 |     NUM_LAYERS: 152
44 | LOSS:
45 |   USE_TARGET_WEIGHT: true
46 | TRAIN:
47 |   BATCH_SIZE: 24
48 |   SHUFFLE: true
49 |   BEGIN_EPOCH: 0
50 |   END_EPOCH: 140
51 |   RESUME: false
52 |   OPTIMIZER: adam
53 |   LR: 0.001
54 |   LR_FACTOR: 0.1
55 |   LR_STEP:
56 |   - 90
57 |   - 120
58 |   WD: 0.0001
59 |   GAMMA1: 0.99
60 |   GAMMA2: 0.0
61 |   MOMENTUM: 0.9
62 |   NESTEROV: false
63 | TEST:
64 |   BATCH_SIZE: 32
65 |   FLIP_TEST: false
66 |   MODEL_FILE: ''
67 | DEBUG:
68 |   DEBUG: false
69 |   SAVE_BATCH_IMAGES_GT: true
70 |   SAVE_BATCH_IMAGES_PRED: true
71 |   SAVE_HEATMAPS_GT: true
72 |   SAVE_HEATMAPS_PRED: true
73 | 


--------------------------------------------------------------------------------
/experiments/mpii/resnet50/256x256_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | GPUS: '0'
 2 | DATA_DIR: ''
 3 | OUTPUT_DIR: 'output'
 4 | LOG_DIR: 'log'
 5 | WORKERS: 4
 6 | PRINT_FREQ: 100
 7 | CUDNN:
 8 |   BENCHMARK: True
 9 |   DETERMINISTIC: False
10 |   ENABLED: True
11 | DATASET:
12 |   DATASET: mpii
13 |   ROOT: 'data/mpii/'
14 |   TEST_SET: valid
15 |   TRAIN_SET: train
16 |   FLIP: true
17 |   ROT_FACTOR: 30
18 |   SCALE_FACTOR: 0.25
19 | MODEL:
20 |   NAME: pose_resnet
21 |   PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth'
22 |   IMAGE_SIZE:
23 |   - 256
24 |   - 256
25 |   NUM_JOINTS: 16
26 |   EXTRA:
27 |     TARGET_TYPE: gaussian
28 |     SIGMA: 2
29 |     HEATMAP_SIZE:
30 |     - 64
31 |     - 64
32 |     FINAL_CONV_KERNEL: 1
33 |     DECONV_WITH_BIAS: false
34 |     NUM_DECONV_LAYERS: 3
35 |     NUM_DECONV_FILTERS:
36 |     - 256
37 |     - 256
38 |     - 256
39 |     NUM_DECONV_KERNELS:
40 |     - 4
41 |     - 4
42 |     - 4
43 |     NUM_LAYERS: 50
44 | LOSS:
45 |   USE_TARGET_WEIGHT: true
46 | TRAIN:
47 |   BATCH_SIZE: 32
48 |   SHUFFLE: true
49 |   BEGIN_EPOCH: 0
50 |   END_EPOCH: 140
51 |   RESUME: false
52 |   OPTIMIZER: adam
53 |   LR: 0.001
54 |   LR_FACTOR: 0.1
55 |   LR_STEP:
56 |   - 90
57 |   - 120
58 |   WD: 0.0001
59 |   GAMMA1: 0.99
60 |   GAMMA2: 0.0
61 |   MOMENTUM: 0.9
62 |   NESTEROV: false
63 | TEST:
64 |   BATCH_SIZE: 32
65 |   FLIP_TEST: false
66 |   MODEL_FILE: ''
67 | DEBUG:
68 |   DEBUG: false
69 |   SAVE_BATCH_IMAGES_GT: true
70 |   SAVE_BATCH_IMAGES_PRED: true
71 |   SAVE_HEATMAPS_GT: true
72 |   SAVE_HEATMAPS_PRED: true
73 | 


--------------------------------------------------------------------------------
/experiments/mpii/resnet50/384x384_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | GPUS: '0'
 2 | DATA_DIR: ''
 3 | OUTPUT_DIR: 'output'
 4 | LOG_DIR: 'log'
 5 | WORKERS: 4
 6 | PRINT_FREQ: 100
 7 | CUDNN:
 8 |   BENCHMARK: True
 9 |   DETERMINISTIC: False
10 |   ENABLED: True
11 | DATASET:
12 |   DATASET: mpii
13 |   ROOT: 'data/mpii/'
14 |   TEST_SET: valid
15 |   TRAIN_SET: train
16 |   FLIP: true
17 |   ROT_FACTOR: 30
18 |   SCALE_FACTOR: 0.25
19 | MODEL:
20 |   NAME: pose_resnet
21 |   PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth'
22 |   IMAGE_SIZE:
23 |   - 384
24 |   - 384
25 |   NUM_JOINTS: 16
26 |   EXTRA:
27 |     TARGET_TYPE: gaussian
28 |     SIGMA: 3
29 |     HEATMAP_SIZE:
30 |     - 96
31 |     - 96
32 |     FINAL_CONV_KERNEL: 1
33 |     DECONV_WITH_BIAS: false
34 |     NUM_DECONV_LAYERS: 3
35 |     NUM_DECONV_FILTERS:
36 |     - 256
37 |     - 256
38 |     - 256
39 |     NUM_DECONV_KERNELS:
40 |     - 4
41 |     - 4
42 |     - 4
43 |     NUM_LAYERS: 50
44 | LOSS:
45 |   USE_TARGET_WEIGHT: true
46 | TRAIN:
47 |   BATCH_SIZE: 32
48 |   SHUFFLE: true
49 |   BEGIN_EPOCH: 0
50 |   END_EPOCH: 140
51 |   RESUME: false
52 |   OPTIMIZER: adam
53 |   LR: 0.001
54 |   LR_FACTOR: 0.1
55 |   LR_STEP:
56 |   - 90
57 |   - 120
58 |   WD: 0.0001
59 |   GAMMA1: 0.99
60 |   GAMMA2: 0.0
61 |   MOMENTUM: 0.9
62 |   NESTEROV: false
63 | TEST:
64 |   BATCH_SIZE: 32
65 |   FLIP_TEST: false
66 |   MODEL_FILE: ''
67 | DEBUG:
68 |   DEBUG: false
69 |   SAVE_BATCH_IMAGES_GT: true
70 |   SAVE_BATCH_IMAGES_PRED: true
71 |   SAVE_HEATMAPS_GT: true
72 |   SAVE_HEATMAPS_PRED: true
73 | 


--------------------------------------------------------------------------------
/lib/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	cd nms; python setup.py build_ext --inplace; rm -rf build; cd ../../
3 | clean:
4 | 	cd nms; rm *.so; cd ../../
5 | 


--------------------------------------------------------------------------------
/lib/core/config.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os
 12 | import yaml
 13 | 
 14 | import numpy as np
 15 | from easydict import EasyDict as edict
 16 | 
 17 | 
 18 | config = edict()
 19 | 
 20 | config.OUTPUT_DIR = ''
 21 | config.LOG_DIR = ''
 22 | config.DATA_DIR = ''
 23 | config.GPUS = '0'
 24 | config.WORKERS = 4
 25 | config.PRINT_FREQ = 20
 26 | 
 27 | # Cudnn related params
 28 | config.CUDNN = edict()
 29 | config.CUDNN.BENCHMARK = True
 30 | config.CUDNN.DETERMINISTIC = False
 31 | config.CUDNN.ENABLED = True
 32 | 
 33 | # pose_resnet related params
 34 | POSE_RESNET = edict()
 35 | POSE_RESNET.NUM_LAYERS = 50
 36 | POSE_RESNET.DECONV_WITH_BIAS = False
 37 | POSE_RESNET.NUM_DECONV_LAYERS = 3
 38 | POSE_RESNET.NUM_DECONV_FILTERS = [256, 256, 256]
 39 | POSE_RESNET.NUM_DECONV_KERNELS = [4, 4, 4]
 40 | POSE_RESNET.FINAL_CONV_KERNEL = 1
 41 | POSE_RESNET.TARGET_TYPE = 'gaussian'
 42 | POSE_RESNET.HEATMAP_SIZE = [64, 64]  # width * height, ex: 24 * 32
 43 | POSE_RESNET.SIGMA = 2
 44 | 
 45 | MODEL_EXTRAS = {
 46 |     'pose_resnet': POSE_RESNET,
 47 | }
 48 | 
 49 | # common params for NETWORK
 50 | config.MODEL = edict()
 51 | config.MODEL.NAME = 'pose_resnet'
 52 | config.MODEL.INIT_WEIGHTS = True
 53 | config.MODEL.PRETRAINED = ''
 54 | config.MODEL.NUM_JOINTS = 16
 55 | config.MODEL.IMAGE_SIZE = [256, 256]  # width * height, ex: 192 * 256
 56 | config.MODEL.EXTRA = MODEL_EXTRAS[config.MODEL.NAME]
 57 | 
 58 | config.MODEL.STYLE = 'pytorch'
 59 | 
 60 | config.LOSS = edict()
 61 | config.LOSS.USE_TARGET_WEIGHT = True
 62 | 
 63 | # DATASET related params
 64 | config.DATASET = edict()
 65 | config.DATASET.ROOT = ''
 66 | config.DATASET.DATASET = 'mpii'
 67 | config.DATASET.TRAIN_SET = 'train'
 68 | config.DATASET.TEST_SET = 'valid'
 69 | config.DATASET.DATA_FORMAT = 'jpg'
 70 | config.DATASET.HYBRID_JOINTS_TYPE = ''
 71 | config.DATASET.SELECT_DATA = False
 72 | 
 73 | # training data augmentation
 74 | config.DATASET.FLIP = True
 75 | config.DATASET.SCALE_FACTOR = 0.25
 76 | config.DATASET.ROT_FACTOR = 30
 77 | 
 78 | # train
 79 | config.TRAIN = edict()
 80 | 
 81 | config.TRAIN.LR_FACTOR = 0.1
 82 | config.TRAIN.LR_STEP = [90, 110]
 83 | config.TRAIN.LR = 0.001
 84 | 
 85 | config.TRAIN.OPTIMIZER = 'adam'
 86 | config.TRAIN.MOMENTUM = 0.9
 87 | config.TRAIN.WD = 0.0001
 88 | config.TRAIN.NESTEROV = False
 89 | config.TRAIN.GAMMA1 = 0.99
 90 | config.TRAIN.GAMMA2 = 0.0
 91 | 
 92 | config.TRAIN.BEGIN_EPOCH = 0
 93 | config.TRAIN.END_EPOCH = 140
 94 | 
 95 | config.TRAIN.RESUME = False
 96 | config.TRAIN.CHECKPOINT = ''
 97 | 
 98 | config.TRAIN.BATCH_SIZE = 32
 99 | config.TRAIN.SHUFFLE = True
100 | 
101 | # testing
102 | config.TEST = edict()
103 | 
104 | # size of images for each device
105 | config.TEST.BATCH_SIZE = 32
106 | # Test Model Epoch
107 | config.TEST.FLIP_TEST = False
108 | config.TEST.POST_PROCESS = True
109 | config.TEST.SHIFT_HEATMAP = True
110 | 
111 | config.TEST.USE_GT_BBOX = False
112 | # nms
113 | config.TEST.OKS_THRE = 0.5
114 | config.TEST.IN_VIS_THRE = 0.0
115 | config.TEST.COCO_BBOX_FILE = ''
116 | config.TEST.BBOX_THRE = 1.0
117 | config.TEST.MODEL_FILE = ''
118 | config.TEST.IMAGE_THRE = 0.0
119 | config.TEST.NMS_THRE = 1.0
120 | 
121 | # debug
122 | config.DEBUG = edict()
123 | config.DEBUG.DEBUG = False
124 | config.DEBUG.SAVE_BATCH_IMAGES_GT = False
125 | config.DEBUG.SAVE_BATCH_IMAGES_PRED = False
126 | config.DEBUG.SAVE_HEATMAPS_GT = False
127 | config.DEBUG.SAVE_HEATMAPS_PRED = False
128 | 
129 | 
130 | def _update_dict(k, v):
131 |     if k == 'DATASET':
132 |         if 'MEAN' in v and v['MEAN']:
133 |             v['MEAN'] = np.array([eval(x) if isinstance(x, str) else x
134 |                                   for x in v['MEAN']])
135 |         if 'STD' in v and v['STD']:
136 |             v['STD'] = np.array([eval(x) if isinstance(x, str) else x
137 |                                  for x in v['STD']])
138 |     if k == 'MODEL':
139 |         if 'EXTRA' in v and 'HEATMAP_SIZE' in v['EXTRA']:
140 |             if isinstance(v['EXTRA']['HEATMAP_SIZE'], int):
141 |                 v['EXTRA']['HEATMAP_SIZE'] = np.array(
142 |                     [v['EXTRA']['HEATMAP_SIZE'], v['EXTRA']['HEATMAP_SIZE']])
143 |             else:
144 |                 v['EXTRA']['HEATMAP_SIZE'] = np.array(
145 |                     v['EXTRA']['HEATMAP_SIZE'])
146 |         if 'IMAGE_SIZE' in v:
147 |             if isinstance(v['IMAGE_SIZE'], int):
148 |                 v['IMAGE_SIZE'] = np.array([v['IMAGE_SIZE'], v['IMAGE_SIZE']])
149 |             else:
150 |                 v['IMAGE_SIZE'] = np.array(v['IMAGE_SIZE'])
151 |     for vk, vv in v.items():
152 |         if vk in config[k]:
153 |             config[k][vk] = vv
154 |         else:
155 |             raise ValueError("{}.{} not exist in config.py".format(k, vk))
156 | 
157 | 
158 | def update_config(config_file):
159 |     exp_config = None
160 |     with open(config_file) as f:
161 |         exp_config = edict(yaml.load(f))
162 |         for k, v in exp_config.items():
163 |             if k in config:
164 |                 if isinstance(v, dict):
165 |                     _update_dict(k, v)
166 |                 else:
167 |                     if k == 'SCALES':
168 |                         config[k][0] = (tuple(v))
169 |                     else:
170 |                         config[k] = v
171 |             else:
172 |                 raise ValueError("{} not exist in config.py".format(k))
173 | 
174 | 
175 | def gen_config(config_file):
176 |     cfg = dict(config)
177 |     for k, v in cfg.items():
178 |         if isinstance(v, edict):
179 |             cfg[k] = dict(v)
180 | 
181 |     with open(config_file, 'w') as f:
182 |         yaml.dump(dict(cfg), f, default_flow_style=False)
183 | 
184 | 
185 | def update_dir(model_dir, log_dir, data_dir):
186 |     if model_dir:
187 |         config.OUTPUT_DIR = model_dir
188 | 
189 |     if log_dir:
190 |         config.LOG_DIR = log_dir
191 | 
192 |     if data_dir:
193 |         config.DATA_DIR = data_dir
194 | 
195 |     config.DATASET.ROOT = os.path.join(
196 |             config.DATA_DIR, config.DATASET.ROOT)
197 | 
198 |     config.TEST.COCO_BBOX_FILE = os.path.join(
199 |             config.DATA_DIR, config.TEST.COCO_BBOX_FILE)
200 | 
201 |     config.MODEL.PRETRAINED = os.path.join(
202 |             config.DATA_DIR, config.MODEL.PRETRAINED)
203 | 
204 | 
205 | def get_model_name(cfg):
206 |     name = cfg.MODEL.NAME
207 |     full_name = cfg.MODEL.NAME
208 |     extra = cfg.MODEL.EXTRA
209 |     if name in ['pose_resnet']:
210 |         name = '{model}_{num_layers}'.format(
211 |             model=name,
212 |             num_layers=extra.NUM_LAYERS)
213 |         deconv_suffix = ''.join(
214 |             'd{}'.format(num_filters)
215 |             for num_filters in extra.NUM_DECONV_FILTERS)
216 |         full_name = '{height}x{width}_{name}_{deconv_suffix}'.format(
217 |             height=cfg.MODEL.IMAGE_SIZE[1],
218 |             width=cfg.MODEL.IMAGE_SIZE[0],
219 |             name=name,
220 |             deconv_suffix=deconv_suffix)
221 |     else:
222 |         raise ValueError('Unkown model: {}'.format(cfg.MODEL))
223 | 
224 |     return name, full_name
225 | 
226 | 
227 | if __name__ == '__main__':
228 |     import sys
229 |     gen_config(sys.argv[1])
230 | 


--------------------------------------------------------------------------------
/lib/core/evaluate.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import numpy as np
12 | 
13 | from core.inference import get_max_preds
14 | 
15 | 
16 | def calc_dists(preds, target, normalize):
17 |     preds = preds.astype(np.float32)
18 |     target = target.astype(np.float32)
19 |     dists = np.zeros((preds.shape[1], preds.shape[0]))
20 |     for n in range(preds.shape[0]):
21 |         for c in range(preds.shape[1]):
22 |             if target[n, c, 0] > 1 and target[n, c, 1] > 1:
23 |                 normed_preds = preds[n, c, :] / normalize[n]
24 |                 normed_targets = target[n, c, :] / normalize[n]
25 |                 dists[c, n] = np.linalg.norm(normed_preds - normed_targets)
26 |             else:
27 |                 dists[c, n] = -1
28 |     return dists
29 | 
30 | 
31 | def dist_acc(dists, thr=0.5):
32 |     ''' Return percentage below threshold while ignoring values with a -1 '''
33 |     dist_cal = np.not_equal(dists, -1)
34 |     num_dist_cal = dist_cal.sum()
35 |     if num_dist_cal > 0:
36 |         return np.less(dists[dist_cal], thr).sum() * 1.0 / num_dist_cal
37 |     else:
38 |         return -1
39 | 
40 | 
41 | def accuracy(output, target, hm_type='gaussian', thr=0.5):
42 |     '''
43 |     Calculate accuracy according to PCK,
44 |     but uses ground truth heatmap rather than x,y locations
45 |     First value to be returned is average accuracy across 'idxs',
46 |     followed by individual accuracies
47 |     '''
48 |     idx = list(range(output.shape[1]))
49 |     norm = 1.0
50 |     if hm_type == 'gaussian':
51 |         pred, _ = get_max_preds(output)
52 |         target, _ = get_max_preds(target)
53 |         h = output.shape[2]
54 |         w = output.shape[3]
55 |         norm = np.ones((pred.shape[0], 2)) * np.array([h, w]) / 10
56 |     dists = calc_dists(pred, target, norm)
57 | 
58 |     acc = np.zeros((len(idx) + 1))
59 |     avg_acc = 0
60 |     cnt = 0
61 | 
62 |     for i in range(len(idx)):
63 |         acc[i + 1] = dist_acc(dists[idx[i]])
64 |         if acc[i + 1] >= 0:
65 |             avg_acc = avg_acc + acc[i + 1]
66 |             cnt += 1
67 | 
68 |     avg_acc = avg_acc / cnt if cnt != 0 else 0
69 |     if cnt != 0:
70 |         acc[0] = avg_acc
71 |     return acc, avg_acc, cnt, pred
72 | 


--------------------------------------------------------------------------------
/lib/core/function.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import logging
 12 | import time
 13 | import os
 14 | 
 15 | import numpy as np
 16 | import torch
 17 | 
 18 | from core.config import get_model_name
 19 | from core.evaluate import accuracy
 20 | from core.inference import get_final_preds
 21 | from utils.transforms import flip_back
 22 | from utils.vis import save_debug_images
 23 | 
 24 | 
 25 | logger = logging.getLogger(__name__)
 26 | 
 27 | 
 28 | def train(config, train_loader, model, criterion, optimizer, epoch,
 29 |           output_dir, tb_log_dir, writer_dict):
 30 |     batch_time = AverageMeter()
 31 |     data_time = AverageMeter()
 32 |     losses = AverageMeter()
 33 |     acc = AverageMeter()
 34 | 
 35 |     # switch to train mode
 36 |     model.train()
 37 | 
 38 |     end = time.time()
 39 |     for i, (input, target, target_weight, meta) in enumerate(train_loader):
 40 |         # measure data loading time
 41 |         data_time.update(time.time() - end)
 42 | 
 43 |         # compute output
 44 |         output = model(input)
 45 |         target = target.cuda(non_blocking=True)
 46 |         target_weight = target_weight.cuda(non_blocking=True)
 47 | 
 48 |         loss = criterion(output, target, target_weight)
 49 | 
 50 |         # compute gradient and do update step
 51 |         optimizer.zero_grad()
 52 |         loss.backward()
 53 |         optimizer.step()
 54 | 
 55 |         # measure accuracy and record loss
 56 |         losses.update(loss.item(), input.size(0))
 57 | 
 58 |         _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(),
 59 |                                          target.detach().cpu().numpy())
 60 |         acc.update(avg_acc, cnt)
 61 | 
 62 |         # measure elapsed time
 63 |         batch_time.update(time.time() - end)
 64 |         end = time.time()
 65 | 
 66 |         if i % config.PRINT_FREQ == 0:
 67 |             msg = 'Epoch: [{0}][{1}/{2}]\t' \
 68 |                   'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \
 69 |                   'Speed {speed:.1f} samples/s\t' \
 70 |                   'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \
 71 |                   'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \
 72 |                   'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
 73 |                       epoch, i, len(train_loader), batch_time=batch_time,
 74 |                       speed=input.size(0)/batch_time.val,
 75 |                       data_time=data_time, loss=losses, acc=acc)
 76 |             logger.info(msg)
 77 | 
 78 |             writer = writer_dict['writer']
 79 |             global_steps = writer_dict['train_global_steps']
 80 |             writer.add_scalar('train_loss', losses.val, global_steps)
 81 |             writer.add_scalar('train_acc', acc.val, global_steps)
 82 |             writer_dict['train_global_steps'] = global_steps + 1
 83 | 
 84 |             prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i)
 85 |             save_debug_images(config, input, meta, target, pred*4, output,
 86 |                               prefix)
 87 | 
 88 | 
 89 | def validate(config, val_loader, val_dataset, model, criterion, output_dir,
 90 |              tb_log_dir, writer_dict=None):
 91 |     batch_time = AverageMeter()
 92 |     losses = AverageMeter()
 93 |     acc = AverageMeter()
 94 | 
 95 |     # switch to evaluate mode
 96 |     model.eval()
 97 | 
 98 |     num_samples = len(val_dataset)
 99 |     all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3),
100 |                          dtype=np.float32)
101 |     all_boxes = np.zeros((num_samples, 6))
102 |     image_path = []
103 |     filenames = []
104 |     imgnums = []
105 |     idx = 0
106 |     with torch.no_grad():
107 |         end = time.time()
108 |         for i, (input, target, target_weight, meta) in enumerate(val_loader):
109 |             # compute output
110 |             output = model(input)
111 |             if config.TEST.FLIP_TEST:
112 |                 # this part is ugly, because pytorch has not supported negative index
113 |                 # input_flipped = model(input[:, :, :, ::-1])
114 |                 input_flipped = np.flip(input.cpu().numpy(), 3).copy()
115 |                 input_flipped = torch.from_numpy(input_flipped).cuda()
116 |                 output_flipped = model(input_flipped)
117 |                 output_flipped = flip_back(output_flipped.cpu().numpy(),
118 |                                            val_dataset.flip_pairs)
119 |                 output_flipped = torch.from_numpy(output_flipped.copy()).cuda()
120 | 
121 |                 # feature is not aligned, shift flipped heatmap for higher accuracy
122 |                 if config.TEST.SHIFT_HEATMAP:
123 |                     output_flipped[:, :, :, 1:] = \
124 |                         output_flipped.clone()[:, :, :, 0:-1]
125 |                     # output_flipped[:, :, :, 0] = 0
126 | 
127 |                 output = (output + output_flipped) * 0.5
128 | 
129 |             target = target.cuda(non_blocking=True)
130 |             target_weight = target_weight.cuda(non_blocking=True)
131 | 
132 |             loss = criterion(output, target, target_weight)
133 | 
134 |             num_images = input.size(0)
135 |             # measure accuracy and record loss
136 |             losses.update(loss.item(), num_images)
137 |             _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(),
138 |                                              target.cpu().numpy())
139 | 
140 |             acc.update(avg_acc, cnt)
141 | 
142 |             # measure elapsed time
143 |             batch_time.update(time.time() - end)
144 |             end = time.time()
145 | 
146 |             c = meta['center'].numpy()
147 |             s = meta['scale'].numpy()
148 |             score = meta['score'].numpy()
149 | 
150 |             preds, maxvals = get_final_preds(
151 |                 config, output.clone().cpu().numpy(), c, s)
152 | 
153 |             all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2]
154 |             all_preds[idx:idx + num_images, :, 2:3] = maxvals
155 |             # double check this all_boxes parts
156 |             all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2]
157 |             all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2]
158 |             all_boxes[idx:idx + num_images, 4] = np.prod(s*200, 1)
159 |             all_boxes[idx:idx + num_images, 5] = score
160 |             image_path.extend(meta['image'])
161 |             if config.DATASET.DATASET == 'posetrack':
162 |                 filenames.extend(meta['filename'])
163 |                 imgnums.extend(meta['imgnum'].numpy())
164 | 
165 |             idx += num_images
166 | 
167 |             if i % config.PRINT_FREQ == 0:
168 |                 msg = 'Test: [{0}/{1}]\t' \
169 |                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \
170 |                       'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \
171 |                       'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
172 |                           i, len(val_loader), batch_time=batch_time,
173 |                           loss=losses, acc=acc)
174 |                 logger.info(msg)
175 | 
176 |                 prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i)
177 |                 save_debug_images(config, input, meta, target, pred*4, output,
178 |                                   prefix)
179 | 
180 |         name_values, perf_indicator = val_dataset.evaluate(
181 |             config, all_preds, output_dir, all_boxes, image_path,
182 |             filenames, imgnums)
183 | 
184 |         _, full_arch_name = get_model_name(config)
185 |         if isinstance(name_values, list):
186 |             for name_value in name_values:
187 |                 _print_name_value(name_value, full_arch_name)
188 |         else:
189 |             _print_name_value(name_values, full_arch_name)
190 | 
191 |         if writer_dict:
192 |             writer = writer_dict['writer']
193 |             global_steps = writer_dict['valid_global_steps']
194 |             writer.add_scalar('valid_loss', losses.avg, global_steps)
195 |             writer.add_scalar('valid_acc', acc.avg, global_steps)
196 |             if isinstance(name_values, list):
197 |                 for name_value in name_values:
198 |                     writer.add_scalars('valid', dict(name_value), global_steps)
199 |             else:
200 |                 writer.add_scalars('valid', dict(name_values), global_steps)
201 |             writer_dict['valid_global_steps'] = global_steps + 1
202 | 
203 |     return perf_indicator
204 | 
205 | 
206 | # markdown format output
207 | def _print_name_value(name_value, full_arch_name):
208 |     names = name_value.keys()
209 |     values = name_value.values()
210 |     num_values = len(name_value)
211 |     logger.info(
212 |         '| Arch ' +
213 |         ' '.join(['| {}'.format(name) for name in names]) +
214 |         ' |'
215 |     )
216 |     logger.info('|---' * (num_values+1) + '|')
217 |     logger.info(
218 |         '| ' + full_arch_name + ' ' +
219 |         ' '.join(['| {:.3f}'.format(value) for value in values]) +
220 |          ' |'
221 |     )
222 | 
223 | 
224 | class AverageMeter(object):
225 |     """Computes and stores the average and current value"""
226 |     def __init__(self):
227 |         self.reset()
228 | 
229 |     def reset(self):
230 |         self.val = 0
231 |         self.avg = 0
232 |         self.sum = 0
233 |         self.count = 0
234 | 
235 |     def update(self, val, n=1):
236 |         self.val = val
237 |         self.sum += val * n
238 |         self.count += n
239 |         self.avg = self.sum / self.count if self.count != 0 else 0
240 | 


--------------------------------------------------------------------------------
/lib/core/inference.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import math
12 | 
13 | import numpy as np
14 | 
15 | from utils.transforms import transform_preds
16 | 
17 | 
18 | def get_max_preds(batch_heatmaps):
19 |     '''
20 |     get predictions from score maps
21 |     heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
22 |     '''
23 |     assert isinstance(batch_heatmaps, np.ndarray), \
24 |         'batch_heatmaps should be numpy.ndarray'
25 |     assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim'
26 | 
27 |     batch_size = batch_heatmaps.shape[0]
28 |     num_joints = batch_heatmaps.shape[1]
29 |     width = batch_heatmaps.shape[3]
30 |     heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
31 |     idx = np.argmax(heatmaps_reshaped, 2)
32 |     maxvals = np.amax(heatmaps_reshaped, 2)
33 | 
34 |     maxvals = maxvals.reshape((batch_size, num_joints, 1))
35 |     idx = idx.reshape((batch_size, num_joints, 1))
36 | 
37 |     preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
38 | 
39 |     preds[:, :, 0] = (preds[:, :, 0]) % width
40 |     preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
41 | 
42 |     pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
43 |     pred_mask = pred_mask.astype(np.float32)
44 | 
45 |     preds *= pred_mask
46 |     return preds, maxvals
47 | 
48 | 
49 | def get_final_preds(config, batch_heatmaps, center, scale):
50 |     coords, maxvals = get_max_preds(batch_heatmaps)
51 | 
52 |     heatmap_height = batch_heatmaps.shape[2]
53 |     heatmap_width = batch_heatmaps.shape[3]
54 | 
55 |     # post-processing
56 |     if config.TEST.POST_PROCESS:
57 |         for n in range(coords.shape[0]):
58 |             for p in range(coords.shape[1]):
59 |                 hm = batch_heatmaps[n][p]
60 |                 px = int(math.floor(coords[n][p][0] + 0.5))
61 |                 py = int(math.floor(coords[n][p][1] + 0.5))
62 |                 if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1:
63 |                     diff = np.array([hm[py][px+1] - hm[py][px-1],
64 |                                      hm[py+1][px]-hm[py-1][px]])
65 |                     coords[n][p] += np.sign(diff) * .25
66 | 
67 |     preds = coords.copy()
68 | 
69 |     # Transform back
70 |     for i in range(coords.shape[0]):
71 |         preds[i] = transform_preds(coords[i], center[i], scale[i],
72 |                                    [heatmap_width, heatmap_height])
73 | 
74 |     return preds, maxvals
75 | 


--------------------------------------------------------------------------------
/lib/core/loss.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import torch.nn as nn
12 | 
13 | 
14 | class JointsMSELoss(nn.Module):
15 |     def __init__(self, use_target_weight):
16 |         super(JointsMSELoss, self).__init__()
17 |         self.criterion = nn.MSELoss(size_average=True)
18 |         self.use_target_weight = use_target_weight
19 | 
20 |     def forward(self, output, target, target_weight):
21 |         batch_size = output.size(0)
22 |         num_joints = output.size(1)
23 |         heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1)
24 |         heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1)
25 |         loss = 0
26 | 
27 |         for idx in range(num_joints):
28 |             heatmap_pred = heatmaps_pred[idx].squeeze()
29 |             heatmap_gt = heatmaps_gt[idx].squeeze()
30 |             if self.use_target_weight:
31 |                 loss += 0.5 * self.criterion(
32 |                     heatmap_pred.mul(target_weight[:, idx]),
33 |                     heatmap_gt.mul(target_weight[:, idx])
34 |                 )
35 |             else:
36 |                 loss += 0.5 * self.criterion(heatmap_pred, heatmap_gt)
37 | 
38 |         return loss / num_joints
39 | 


--------------------------------------------------------------------------------
/lib/dataset/JointsDataset.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import copy
 12 | import logging
 13 | import random
 14 | 
 15 | import cv2
 16 | import numpy as np
 17 | import torch
 18 | from torch.utils.data import Dataset
 19 | 
 20 | from utils.transforms import get_affine_transform
 21 | from utils.transforms import affine_transform
 22 | from utils.transforms import fliplr_joints
 23 | 
 24 | 
 25 | logger = logging.getLogger(__name__)
 26 | 
 27 | 
 28 | class JointsDataset(Dataset):
 29 |     def __init__(self, cfg, root, image_set, is_train, transform=None):
 30 |         self.num_joints = 0
 31 |         self.pixel_std = 200
 32 |         self.flip_pairs = []
 33 |         self.parent_ids = []
 34 | 
 35 |         self.is_train = is_train
 36 |         self.root = root
 37 |         self.image_set = image_set
 38 | 
 39 |         self.output_path = cfg.OUTPUT_DIR
 40 |         self.data_format = cfg.DATASET.DATA_FORMAT
 41 | 
 42 |         self.scale_factor = cfg.DATASET.SCALE_FACTOR
 43 |         self.rotation_factor = cfg.DATASET.ROT_FACTOR
 44 |         self.flip = cfg.DATASET.FLIP
 45 | 
 46 |         self.image_size = cfg.MODEL.IMAGE_SIZE
 47 |         self.target_type = cfg.MODEL.EXTRA.TARGET_TYPE
 48 |         self.heatmap_size = cfg.MODEL.EXTRA.HEATMAP_SIZE
 49 |         self.sigma = cfg.MODEL.EXTRA.SIGMA
 50 | 
 51 |         self.transform = transform
 52 |         self.db = []
 53 | 
 54 |     def _get_db(self):
 55 |         raise NotImplementedError
 56 | 
 57 |     def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
 58 |         raise NotImplementedError
 59 | 
 60 |     def __len__(self,):
 61 |         return len(self.db)
 62 | 
 63 |     def __getitem__(self, idx):
 64 |         db_rec = copy.deepcopy(self.db[idx])
 65 | 
 66 |         image_file = db_rec['image']
 67 |         filename = db_rec['filename'] if 'filename' in db_rec else ''
 68 |         imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''
 69 | 
 70 |         if self.data_format == 'zip':
 71 |             from utils import zipreader
 72 |             data_numpy = zipreader.imread(
 73 |                 image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
 74 |         else:
 75 |             data_numpy = cv2.imread(
 76 |                 image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
 77 | 
 78 |         if data_numpy is None:
 79 |             logger.error('=> fail to read {}'.format(image_file))
 80 |             raise ValueError('Fail to read {}'.format(image_file))
 81 | 
 82 |         joints = db_rec['joints_3d']
 83 |         joints_vis = db_rec['joints_3d_vis']
 84 | 
 85 |         c = db_rec['center']
 86 |         s = db_rec['scale']
 87 |         score = db_rec['score'] if 'score' in db_rec else 1
 88 |         r = 0
 89 | 
 90 |         if self.is_train:
 91 |             sf = self.scale_factor
 92 |             rf = self.rotation_factor
 93 |             s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
 94 |             r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
 95 |                 if random.random() <= 0.6 else 0
 96 | 
 97 |             if self.flip and random.random() <= 0.5:
 98 |                 data_numpy = data_numpy[:, ::-1, :]
 99 |                 joints, joints_vis = fliplr_joints(
100 |                     joints, joints_vis, data_numpy.shape[1], self.flip_pairs)
101 |                 c[0] = data_numpy.shape[1] - c[0] - 1
102 | 
103 |         trans = get_affine_transform(c, s, r, self.image_size)
104 |         input = cv2.warpAffine(
105 |             data_numpy,
106 |             trans,
107 |             (int(self.image_size[0]), int(self.image_size[1])),
108 |             flags=cv2.INTER_LINEAR)
109 | 
110 |         if self.transform:
111 |             input = self.transform(input)
112 | 
113 |         for i in range(self.num_joints):
114 |             if joints_vis[i, 0] > 0.0:
115 |                 joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
116 | 
117 |         target, target_weight = self.generate_target(joints, joints_vis)
118 | 
119 |         target = torch.from_numpy(target)
120 |         target_weight = torch.from_numpy(target_weight)
121 | 
122 |         meta = {
123 |             'image': image_file,
124 |             'filename': filename,
125 |             'imgnum': imgnum,
126 |             'joints': joints,
127 |             'joints_vis': joints_vis,
128 |             'center': c,
129 |             'scale': s,
130 |             'rotation': r,
131 |             'score': score
132 |         }
133 | 
134 |         return input, target, target_weight, meta
135 | 
136 |     def select_data(self, db):
137 |         db_selected = []
138 |         for rec in db:
139 |             num_vis = 0
140 |             joints_x = 0.0
141 |             joints_y = 0.0
142 |             for joint, joint_vis in zip(
143 |                     rec['joints_3d'], rec['joints_3d_vis']):
144 |                 if joint_vis[0] <= 0:
145 |                     continue
146 |                 num_vis += 1
147 | 
148 |                 joints_x += joint[0]
149 |                 joints_y += joint[1]
150 |             if num_vis == 0:
151 |                 continue
152 | 
153 |             joints_x, joints_y = joints_x / num_vis, joints_y / num_vis
154 | 
155 |             area = rec['scale'][0] * rec['scale'][1] * (self.pixel_std**2)
156 |             joints_center = np.array([joints_x, joints_y])
157 |             bbox_center = np.array(rec['center'])
158 |             diff_norm2 = np.linalg.norm((joints_center-bbox_center), 2)
159 |             ks = np.exp(-1.0*(diff_norm2**2) / ((0.2)**2*2.0*area))
160 | 
161 |             metric = (0.2 / 16) * num_vis + 0.45 - 0.2 / 16
162 |             if ks > metric:
163 |                 db_selected.append(rec)
164 | 
165 |         logger.info('=> num db: {}'.format(len(db)))
166 |         logger.info('=> num selected db: {}'.format(len(db_selected)))
167 |         return db_selected
168 | 
169 |     def generate_target(self, joints, joints_vis):
170 |         '''
171 |         :param joints:  [num_joints, 3]
172 |         :param joints_vis: [num_joints, 3]
173 |         :return: target, target_weight(1: visible, 0: invisible)
174 |         '''
175 |         target_weight = np.ones((self.num_joints, 1), dtype=np.float32)
176 |         target_weight[:, 0] = joints_vis[:, 0]
177 | 
178 |         assert self.target_type == 'gaussian', \
179 |             'Only support gaussian map now!'
180 | 
181 |         if self.target_type == 'gaussian':
182 |             target = np.zeros((self.num_joints,
183 |                                self.heatmap_size[1],
184 |                                self.heatmap_size[0]),
185 |                               dtype=np.float32)
186 | 
187 |             tmp_size = self.sigma * 3
188 | 
189 |             for joint_id in range(self.num_joints):
190 |                 feat_stride = self.image_size / self.heatmap_size
191 |                 mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
192 |                 mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
193 |                 # Check that any part of the gaussian is in-bounds
194 |                 ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
195 |                 br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
196 |                 if ul[0] >= self.heatmap_size[0] or ul[1] >= self.heatmap_size[1] \
197 |                         or br[0] < 0 or br[1] < 0:
198 |                     # If not, just return the image as is
199 |                     target_weight[joint_id] = 0
200 |                     continue
201 | 
202 |                 # # Generate gaussian
203 |                 size = 2 * tmp_size + 1
204 |                 x = np.arange(0, size, 1, np.float32)
205 |                 y = x[:, np.newaxis]
206 |                 x0 = y0 = size // 2
207 |                 # The gaussian is not normalized, we want the center value to equal 1
208 |                 g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * self.sigma ** 2))
209 | 
210 |                 # Usable gaussian range
211 |                 g_x = max(0, -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0]
212 |                 g_y = max(0, -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1]
213 |                 # Image range
214 |                 img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0])
215 |                 img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1])
216 | 
217 |                 v = target_weight[joint_id]
218 |                 if v > 0.5:
219 |                     target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
220 |                         g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
221 | 
222 |         return target, target_weight
223 | 


--------------------------------------------------------------------------------
/lib/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from .mpii import MPIIDataset as mpii
12 | from .coco import COCODataset as coco
13 | 


--------------------------------------------------------------------------------
/lib/dataset/coco.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import logging
 12 | import os
 13 | import pickle
 14 | from collections import defaultdict
 15 | from collections import OrderedDict
 16 | 
 17 | import json_tricks as json
 18 | import numpy as np
 19 | from pycocotools.coco import COCO
 20 | from pycocotools.cocoeval import COCOeval
 21 | 
 22 | from dataset.JointsDataset import JointsDataset
 23 | from nms.nms import oks_nms
 24 | 
 25 | 
 26 | logger = logging.getLogger(__name__)
 27 | 
 28 | 
 29 | class COCODataset(JointsDataset):
 30 |     '''
 31 |     "keypoints": {
 32 |         0: "nose",
 33 |         1: "left_eye",
 34 |         2: "right_eye",
 35 |         3: "left_ear",
 36 |         4: "right_ear",
 37 |         5: "left_shoulder",
 38 |         6: "right_shoulder",
 39 |         7: "left_elbow",
 40 |         8: "right_elbow",
 41 |         9: "left_wrist",
 42 |         10: "right_wrist",
 43 |         11: "left_hip",
 44 |         12: "right_hip",
 45 |         13: "left_knee",
 46 |         14: "right_knee",
 47 |         15: "left_ankle",
 48 |         16: "right_ankle"
 49 |     },
 50 | 	"skeleton": [
 51 |         [16,14],[14,12],[17,15],[15,13],[12,13],[6,12],[7,13], [6,7],[6,8],
 52 |         [7,9],[8,10],[9,11],[2,3],[1,2],[1,3],[2,4],[3,5],[4,6],[5,7]]
 53 |     '''
 54 |     def __init__(self, cfg, root, image_set, is_train, transform=None):
 55 |         super().__init__(cfg, root, image_set, is_train, transform)
 56 |         self.nms_thre = cfg.TEST.NMS_THRE
 57 |         self.image_thre = cfg.TEST.IMAGE_THRE
 58 |         self.oks_thre = cfg.TEST.OKS_THRE
 59 |         self.in_vis_thre = cfg.TEST.IN_VIS_THRE
 60 |         self.bbox_file = cfg.TEST.COCO_BBOX_FILE
 61 |         self.use_gt_bbox = cfg.TEST.USE_GT_BBOX
 62 |         self.image_width = cfg.MODEL.IMAGE_SIZE[0]
 63 |         self.image_height = cfg.MODEL.IMAGE_SIZE[1]
 64 |         self.aspect_ratio = self.image_width * 1.0 / self.image_height
 65 |         self.pixel_std = 200
 66 |         self.coco = COCO(self._get_ann_file_keypoint())
 67 | 
 68 |         # deal with class names
 69 |         cats = [cat['name']
 70 |                 for cat in self.coco.loadCats(self.coco.getCatIds())]
 71 |         self.classes = ['__background__'] + cats
 72 |         logger.info('=> classes: {}'.format(self.classes))
 73 |         self.num_classes = len(self.classes)
 74 |         self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
 75 |         self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds()))
 76 |         self._coco_ind_to_class_ind = dict([(self._class_to_coco_ind[cls],
 77 |                                              self._class_to_ind[cls])
 78 |                                             for cls in self.classes[1:]])
 79 | 
 80 |         # load image file names
 81 |         self.image_set_index = self._load_image_set_index()
 82 |         self.num_images = len(self.image_set_index)
 83 |         logger.info('=> num_images: {}'.format(self.num_images))
 84 | 
 85 |         self.num_joints = 17
 86 |         self.flip_pairs = [[1, 2], [3, 4], [5, 6], [7, 8],
 87 |                            [9, 10], [11, 12], [13, 14], [15, 16]]
 88 |         self.parent_ids = None
 89 | 
 90 |         self.db = self._get_db()
 91 | 
 92 |         if is_train and cfg.DATASET.SELECT_DATA:
 93 |             self.db = self.select_data(self.db)
 94 | 
 95 |         logger.info('=> load {} samples'.format(len(self.db)))
 96 | 
 97 |     def _get_ann_file_keypoint(self):
 98 |         """ self.root / annotations / person_keypoints_train2017.json """
 99 |         prefix = 'person_keypoints' \
100 |             if 'test' not in self.image_set else 'image_info'
101 |         return os.path.join(self.root, 'annotations',
102 |                             prefix + '_' + self.image_set + '.json')
103 | 
104 |     def _load_image_set_index(self):
105 |         """ image id: int """
106 |         image_ids = self.coco.getImgIds()
107 |         return image_ids
108 | 
109 |     def _get_db(self):
110 |         if self.is_train or self.use_gt_bbox:
111 |             # use ground truth bbox
112 |             gt_db = self._load_coco_keypoint_annotations()
113 |         else:
114 |             # use bbox from detection
115 |             gt_db = self._load_coco_person_detection_results()
116 |         return gt_db
117 | 
118 |     def _load_coco_keypoint_annotations(self):
119 |         """ ground truth bbox and keypoints """
120 |         gt_db = []
121 |         for index in self.image_set_index:
122 |             gt_db.extend(self._load_coco_keypoint_annotation_kernal(index))
123 |         return gt_db
124 | 
125 |     def _load_coco_keypoint_annotation_kernal(self, index):
126 |         """
127 |         coco ann: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id']
128 |         iscrowd:
129 |             crowd instances are handled by marking their overlaps with all categories to -1
130 |             and later excluded in training
131 |         bbox:
132 |             [x1, y1, w, h]
133 |         :param index: coco image id
134 |         :return: db entry
135 |         """
136 |         im_ann = self.coco.loadImgs(index)[0]
137 |         width = im_ann['width']
138 |         height = im_ann['height']
139 | 
140 |         annIds = self.coco.getAnnIds(imgIds=index, iscrowd=False)
141 |         objs = self.coco.loadAnns(annIds)
142 | 
143 |         # sanitize bboxes
144 |         valid_objs = []
145 |         for obj in objs:
146 |             x, y, w, h = obj['bbox']
147 |             x1 = np.max((0, x))
148 |             y1 = np.max((0, y))
149 |             x2 = np.min((width - 1, x1 + np.max((0, w - 1))))
150 |             y2 = np.min((height - 1, y1 + np.max((0, h - 1))))
151 |             if obj['area'] > 0 and x2 >= x1 and y2 >= y1:
152 |                 # obj['clean_bbox'] = [x1, y1, x2, y2]
153 |                 obj['clean_bbox'] = [x1, y1, x2-x1, y2-y1]
154 |                 valid_objs.append(obj)
155 |         objs = valid_objs
156 | 
157 |         rec = []
158 |         for obj in objs:
159 |             cls = self._coco_ind_to_class_ind[obj['category_id']]
160 |             if cls != 1:
161 |                 continue
162 | 
163 |             # ignore objs without keypoints annotation
164 |             if max(obj['keypoints']) == 0:
165 |                 continue
166 | 
167 |             joints_3d = np.zeros((self.num_joints, 3), dtype=np.float)
168 |             joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float)
169 |             for ipt in range(self.num_joints):
170 |                 joints_3d[ipt, 0] = obj['keypoints'][ipt * 3 + 0]
171 |                 joints_3d[ipt, 1] = obj['keypoints'][ipt * 3 + 1]
172 |                 joints_3d[ipt, 2] = 0
173 |                 t_vis = obj['keypoints'][ipt * 3 + 2]
174 |                 if t_vis > 1:
175 |                     t_vis = 1
176 |                 joints_3d_vis[ipt, 0] = t_vis
177 |                 joints_3d_vis[ipt, 1] = t_vis
178 |                 joints_3d_vis[ipt, 2] = 0
179 | 
180 |             center, scale = self._box2cs(obj['clean_bbox'][:4])
181 |             rec.append({
182 |                 'image': self.image_path_from_index(index),
183 |                 'center': center,
184 |                 'scale': scale,
185 |                 'joints_3d': joints_3d,
186 |                 'joints_3d_vis': joints_3d_vis,
187 |                 'filename': '',
188 |                 'imgnum': 0,
189 |             })
190 | 
191 |         return rec
192 | 
193 |     def _box2cs(self, box):
194 |         x, y, w, h = box[:4]
195 |         return self._xywh2cs(x, y, w, h)
196 | 
197 |     def _xywh2cs(self, x, y, w, h):
198 |         center = np.zeros((2), dtype=np.float32)
199 |         center[0] = x + w * 0.5
200 |         center[1] = y + h * 0.5
201 | 
202 |         if w > self.aspect_ratio * h:
203 |             h = w * 1.0 / self.aspect_ratio
204 |         elif w < self.aspect_ratio * h:
205 |             w = h * self.aspect_ratio
206 |         scale = np.array(
207 |             [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
208 |             dtype=np.float32)
209 |         if center[0] != -1:
210 |             scale = scale * 1.25
211 | 
212 |         return center, scale
213 | 
214 |     def image_path_from_index(self, index):
215 |         """ example: images / train2017 / 000000119993.jpg """
216 |         file_name = '%012d.jpg' % index
217 |         if '2014' in self.image_set:
218 |             file_name = 'COCO_%s_' % self.image_set + file_name
219 | 
220 |         prefix = 'test2017' if 'test' in self.image_set else self.image_set
221 | 
222 |         data_name = prefix + '.zip@' if self.data_format == 'zip' else prefix
223 | 
224 |         image_path = os.path.join(
225 |             self.root, 'images', data_name, file_name)
226 | 
227 |         return image_path
228 | 
229 |     def _load_coco_person_detection_results(self):
230 |         all_boxes = None
231 |         with open(self.bbox_file, 'r') as f:
232 |             all_boxes = json.load(f)
233 | 
234 |         if not all_boxes:
235 |             logger.error('=> Load %s fail!' % self.bbox_file)
236 |             return None
237 | 
238 |         logger.info('=> Total boxes: {}'.format(len(all_boxes)))
239 | 
240 |         kpt_db = []
241 |         num_boxes = 0
242 |         for n_img in range(0, len(all_boxes)):
243 |             det_res = all_boxes[n_img]
244 |             if det_res['category_id'] != 1:
245 |                 continue
246 |             img_name = self.image_path_from_index(det_res['image_id'])
247 |             box = det_res['bbox']
248 |             score = det_res['score']
249 | 
250 |             if score < self.image_thre:
251 |                 continue
252 | 
253 |             num_boxes = num_boxes + 1
254 | 
255 |             center, scale = self._box2cs(box)
256 |             joints_3d = np.zeros((self.num_joints, 3), dtype=np.float)
257 |             joints_3d_vis = np.ones(
258 |                 (self.num_joints, 3), dtype=np.float)
259 |             kpt_db.append({
260 |                 'image': img_name,
261 |                 'center': center,
262 |                 'scale': scale,
263 |                 'score': score,
264 |                 'joints_3d': joints_3d,
265 |                 'joints_3d_vis': joints_3d_vis,
266 |             })
267 | 
268 |         logger.info('=> Total boxes after fliter low score@{}: {}'.format(
269 |             self.image_thre, num_boxes))
270 |         return kpt_db
271 | 
272 |     # need double check this API and classes field
273 |     def evaluate(self, cfg, preds, output_dir, all_boxes, img_path,
274 |                  *args, **kwargs):
275 |         res_folder = os.path.join(output_dir, 'results')
276 |         if not os.path.exists(res_folder):
277 |             os.makedirs(res_folder)
278 |         res_file = os.path.join(
279 |             res_folder, 'keypoints_%s_results.json' % self.image_set)
280 | 
281 |         # person x (keypoints)
282 |         _kpts = []
283 |         for idx, kpt in enumerate(preds):
284 |             _kpts.append({
285 |                 'keypoints': kpt,
286 |                 'center': all_boxes[idx][0:2],
287 |                 'scale': all_boxes[idx][2:4],
288 |                 'area': all_boxes[idx][4],
289 |                 'score': all_boxes[idx][5],
290 |                 'image': int(img_path[idx][-16:-4])
291 |             })
292 |         # image x person x (keypoints)
293 |         kpts = defaultdict(list)
294 |         for kpt in _kpts:
295 |             kpts[kpt['image']].append(kpt)
296 | 
297 |         # rescoring and oks nms
298 |         num_joints = self.num_joints
299 |         in_vis_thre = self.in_vis_thre
300 |         oks_thre = self.oks_thre
301 |         oks_nmsed_kpts = []
302 |         for img in kpts.keys():
303 |             img_kpts = kpts[img]
304 |             for n_p in img_kpts:
305 |                 box_score = n_p['score']
306 |                 kpt_score = 0
307 |                 valid_num = 0
308 |                 for n_jt in range(0, num_joints):
309 |                     t_s = n_p['keypoints'][n_jt][2]
310 |                     if t_s > in_vis_thre:
311 |                         kpt_score = kpt_score + t_s
312 |                         valid_num = valid_num + 1
313 |                 if valid_num != 0:
314 |                     kpt_score = kpt_score / valid_num
315 |                 # rescoring
316 |                 n_p['score'] = kpt_score * box_score
317 |             keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))],
318 |                            oks_thre)
319 |             if len(keep) == 0:
320 |                 oks_nmsed_kpts.append(img_kpts)
321 |             else:
322 |                 oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep])
323 | 
324 |         self._write_coco_keypoint_results(
325 |             oks_nmsed_kpts, res_file)
326 |         if 'test' not in self.image_set:
327 |             info_str = self._do_python_keypoint_eval(
328 |                 res_file, res_folder)
329 |             name_value = OrderedDict(info_str)
330 |             return name_value, name_value['AP']
331 |         else:
332 |             return {'Null': 0}, 0
333 | 
334 |     def _write_coco_keypoint_results(self, keypoints, res_file):
335 |         data_pack = [{'cat_id': self._class_to_coco_ind[cls],
336 |                       'cls_ind': cls_ind,
337 |                       'cls': cls,
338 |                       'ann_type': 'keypoints',
339 |                       'keypoints': keypoints
340 |                       }
341 |                      for cls_ind, cls in enumerate(self.classes) if not cls == '__background__']
342 | 
343 |         results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
344 |         logger.info('=> Writing results json to %s' % res_file)
345 |         with open(res_file, 'w') as f:
346 |             json.dump(results, f, sort_keys=True, indent=4)
347 |         try:
348 |             json.load(open(res_file))
349 |         except Exception:
350 |             content = []
351 |             with open(res_file, 'r') as f:
352 |                 for line in f:
353 |                     content.append(line)
354 |             content[-1] = ']'
355 |             with open(res_file, 'w') as f:
356 |                 for c in content:
357 |                     f.write(c)
358 | 
359 |     def _coco_keypoint_results_one_category_kernel(self, data_pack):
360 |         cat_id = data_pack['cat_id']
361 |         keypoints = data_pack['keypoints']
362 |         cat_results = []
363 | 
364 |         for img_kpts in keypoints:
365 |             if len(img_kpts) == 0:
366 |                 continue
367 | 
368 |             _key_points = np.array([img_kpts[k]['keypoints']
369 |                                     for k in range(len(img_kpts))])
370 |             key_points = np.zeros(
371 |                 (_key_points.shape[0], self.num_joints * 3), dtype=np.float)
372 | 
373 |             for ipt in range(self.num_joints):
374 |                 key_points[:, ipt * 3 + 0] = _key_points[:, ipt, 0]
375 |                 key_points[:, ipt * 3 + 1] = _key_points[:, ipt, 1]
376 |                 key_points[:, ipt * 3 + 2] = _key_points[:, ipt, 2]  # keypoints score.
377 | 
378 |             result = [{'image_id': img_kpts[k]['image'],
379 |                        'category_id': cat_id,
380 |                        'keypoints': list(key_points[k]),
381 |                        'score': img_kpts[k]['score'],
382 |                        'center': list(img_kpts[k]['center']),
383 |                        'scale': list(img_kpts[k]['scale'])
384 |                        } for k in range(len(img_kpts))]
385 |             cat_results.extend(result)
386 | 
387 |         return cat_results
388 | 
389 |     def _do_python_keypoint_eval(self, res_file, res_folder):
390 |         coco_dt = self.coco.loadRes(res_file)
391 |         coco_eval = COCOeval(self.coco, coco_dt, 'keypoints')
392 |         coco_eval.params.useSegm = None
393 |         coco_eval.evaluate()
394 |         coco_eval.accumulate()
395 |         coco_eval.summarize()
396 |         stats_names = ['AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)']
397 | 
398 |         info_str = []
399 |         for ind, name in enumerate(stats_names):
400 |             info_str.append((name, coco_eval.stats[ind]))
401 | 
402 |         eval_file = os.path.join(
403 |             res_folder, 'keypoints_%s_results.pkl' % self.image_set)
404 | 
405 |         with open(eval_file, 'wb') as f:
406 |             pickle.dump(coco_eval, f, pickle.HIGHEST_PROTOCOL)
407 |         logger.info('=> coco eval results saved to %s' % eval_file)
408 | 
409 |         return info_str
410 | 


--------------------------------------------------------------------------------
/lib/dataset/mpii.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | from collections import OrderedDict
 12 | import logging
 13 | import os
 14 | import json_tricks as json
 15 | 
 16 | import numpy as np
 17 | from scipy.io import loadmat, savemat
 18 | 
 19 | from dataset.JointsDataset import JointsDataset
 20 | 
 21 | 
 22 | logger = logging.getLogger(__name__)
 23 | 
 24 | 
 25 | class MPIIDataset(JointsDataset):
 26 |     def __init__(self, cfg, root, image_set, is_train, transform=None):
 27 |         super().__init__(cfg, root, image_set, is_train, transform)
 28 | 
 29 |         self.num_joints = 16
 30 |         self.flip_pairs = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]]
 31 |         self.parent_ids = [1, 2, 6, 6, 3, 4, 6, 6, 7, 8, 11, 12, 7, 7, 13, 14]
 32 | 
 33 |         self.db = self._get_db()
 34 | 
 35 |         if is_train and cfg.DATASET.SELECT_DATA:
 36 |             self.db = self.select_data(self.db)
 37 | 
 38 |         logger.info('=> load {} samples'.format(len(self.db)))
 39 | 
 40 |     def _get_db(self):
 41 |         # create train/val split
 42 |         file_name = os.path.join(self.root,
 43 |                                  'annot',
 44 |                                  self.image_set+'.json')
 45 |         with open(file_name) as anno_file:
 46 |             anno = json.load(anno_file)
 47 | 
 48 |         gt_db = []
 49 |         for a in anno:
 50 |             image_name = a['image']
 51 | 
 52 |             c = np.array(a['center'], dtype=np.float)
 53 |             s = np.array([a['scale'], a['scale']], dtype=np.float)
 54 | 
 55 |             # Adjust center/scale slightly to avoid cropping limbs
 56 |             if c[0] != -1:
 57 |                 c[1] = c[1] + 15 * s[1]
 58 |                 s = s * 1.25
 59 | 
 60 |             # MPII uses matlab format, index is based 1,
 61 |             # we should first convert to 0-based index
 62 |             c = c - 1
 63 | 
 64 |             joints_3d = np.zeros((self.num_joints, 3), dtype=np.float)
 65 |             joints_3d_vis = np.zeros((self.num_joints,  3), dtype=np.float)
 66 |             if self.image_set != 'test':
 67 |                 joints = np.array(a['joints'])
 68 |                 joints[:, 0:2] = joints[:, 0:2] - 1
 69 |                 joints_vis = np.array(a['joints_vis'])
 70 |                 assert len(joints) == self.num_joints, \
 71 |                     'joint num diff: {} vs {}'.format(len(joints),
 72 |                                                       self.num_joints)
 73 | 
 74 |                 joints_3d[:, 0:2] = joints[:, 0:2]
 75 |                 joints_3d_vis[:, 0] = joints_vis[:]
 76 |                 joints_3d_vis[:, 1] = joints_vis[:]
 77 | 
 78 |             image_dir = 'images.zip@' if self.data_format == 'zip' else 'images'
 79 |             gt_db.append({
 80 |                 'image': os.path.join(self.root, image_dir, image_name),
 81 |                 'center': c,
 82 |                 'scale': s,
 83 |                 'joints_3d': joints_3d,
 84 |                 'joints_3d_vis': joints_3d_vis,
 85 |                 'filename': '',
 86 |                 'imgnum': 0,
 87 |                 })
 88 | 
 89 |         return gt_db
 90 | 
 91 |     def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
 92 |         # convert 0-based index to 1-based index
 93 |         preds = preds[:, :, 0:2] + 1.0
 94 | 
 95 |         if output_dir:
 96 |             pred_file = os.path.join(output_dir, 'pred.mat')
 97 |             savemat(pred_file, mdict={'preds': preds})
 98 | 
 99 |         if 'test' in cfg.DATASET.TEST_SET:
100 |             return {'Null': 0.0}, 0.0
101 | 
102 |         SC_BIAS = 0.6
103 |         threshold = 0.5
104 | 
105 |         gt_file = os.path.join(cfg.DATASET.ROOT,
106 |                                'annot',
107 |                                'gt_{}.mat'.format(cfg.DATASET.TEST_SET))
108 |         gt_dict = loadmat(gt_file)
109 |         dataset_joints = gt_dict['dataset_joints']
110 |         jnt_missing = gt_dict['jnt_missing']
111 |         pos_gt_src = gt_dict['pos_gt_src']
112 |         headboxes_src = gt_dict['headboxes_src']
113 | 
114 |         pos_pred_src = np.transpose(preds, [1, 2, 0])
115 | 
116 |         head = np.where(dataset_joints == 'head')[1][0]
117 |         lsho = np.where(dataset_joints == 'lsho')[1][0]
118 |         lelb = np.where(dataset_joints == 'lelb')[1][0]
119 |         lwri = np.where(dataset_joints == 'lwri')[1][0]
120 |         lhip = np.where(dataset_joints == 'lhip')[1][0]
121 |         lkne = np.where(dataset_joints == 'lkne')[1][0]
122 |         lank = np.where(dataset_joints == 'lank')[1][0]
123 | 
124 |         rsho = np.where(dataset_joints == 'rsho')[1][0]
125 |         relb = np.where(dataset_joints == 'relb')[1][0]
126 |         rwri = np.where(dataset_joints == 'rwri')[1][0]
127 |         rkne = np.where(dataset_joints == 'rkne')[1][0]
128 |         rank = np.where(dataset_joints == 'rank')[1][0]
129 |         rhip = np.where(dataset_joints == 'rhip')[1][0]
130 | 
131 |         jnt_visible = 1 - jnt_missing
132 |         uv_error = pos_pred_src - pos_gt_src
133 |         uv_err = np.linalg.norm(uv_error, axis=1)
134 |         headsizes = headboxes_src[1, :, :] - headboxes_src[0, :, :]
135 |         headsizes = np.linalg.norm(headsizes, axis=0)
136 |         headsizes *= SC_BIAS
137 |         scale = np.multiply(headsizes, np.ones((len(uv_err), 1)))
138 |         scaled_uv_err = np.divide(uv_err, scale)
139 |         scaled_uv_err = np.multiply(scaled_uv_err, jnt_visible)
140 |         jnt_count = np.sum(jnt_visible, axis=1)
141 |         less_than_threshold = np.multiply((scaled_uv_err <= threshold),
142 |                                           jnt_visible)
143 |         PCKh = np.divide(100.*np.sum(less_than_threshold, axis=1), jnt_count)
144 | 
145 |         # save
146 |         rng = np.arange(0, 0.5+0.01, 0.01)
147 |         pckAll = np.zeros((len(rng), 16))
148 | 
149 |         for r in range(len(rng)):
150 |             threshold = rng[r]
151 |             less_than_threshold = np.multiply(scaled_uv_err <= threshold,
152 |                                               jnt_visible)
153 |             pckAll[r, :] = np.divide(100.*np.sum(less_than_threshold, axis=1),
154 |                                      jnt_count)
155 | 
156 |         PCKh = np.ma.array(PCKh, mask=False)
157 |         PCKh.mask[6:8] = True
158 | 
159 |         jnt_count = np.ma.array(jnt_count, mask=False)
160 |         jnt_count.mask[6:8] = True
161 |         jnt_ratio = jnt_count / np.sum(jnt_count).astype(np.float64)
162 | 
163 |         name_value = [
164 |             ('Head', PCKh[head]),
165 |             ('Shoulder', 0.5 * (PCKh[lsho] + PCKh[rsho])),
166 |             ('Elbow', 0.5 * (PCKh[lelb] + PCKh[relb])),
167 |             ('Wrist', 0.5 * (PCKh[lwri] + PCKh[rwri])),
168 |             ('Hip', 0.5 * (PCKh[lhip] + PCKh[rhip])),
169 |             ('Knee', 0.5 * (PCKh[lkne] + PCKh[rkne])),
170 |             ('Ankle', 0.5 * (PCKh[lank] + PCKh[rank])),
171 |             ('Mean', np.sum(PCKh * jnt_ratio)),
172 |             ('Mean@0.1', np.sum(pckAll[11, :] * jnt_ratio))
173 |         ]
174 |         name_value = OrderedDict(name_value)
175 | 
176 |         return name_value, name_value['Mean']
177 | 


--------------------------------------------------------------------------------
/lib/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import models.pose_resnet
12 | 


--------------------------------------------------------------------------------
/lib/models/pose_resnet.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os
 12 | import logging
 13 | 
 14 | import torch
 15 | import torch.nn as nn
 16 | from collections import OrderedDict
 17 | 
 18 | 
 19 | BN_MOMENTUM = 0.1
 20 | logger = logging.getLogger(__name__)
 21 | 
 22 | 
 23 | def conv3x3(in_planes, out_planes, stride=1):
 24 |     """3x3 convolution with padding"""
 25 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 26 |                      padding=1, bias=False)
 27 | 
 28 | 
 29 | class BasicBlock(nn.Module):
 30 |     expansion = 1
 31 | 
 32 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 33 |         super(BasicBlock, self).__init__()
 34 |         self.conv1 = conv3x3(inplanes, planes, stride)
 35 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 36 |         self.relu = nn.ReLU(inplace=True)
 37 |         self.conv2 = conv3x3(planes, planes)
 38 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 39 |         self.downsample = downsample
 40 |         self.stride = stride
 41 | 
 42 |     def forward(self, x):
 43 |         residual = x
 44 | 
 45 |         out = self.conv1(x)
 46 |         out = self.bn1(out)
 47 |         out = self.relu(out)
 48 | 
 49 |         out = self.conv2(out)
 50 |         out = self.bn2(out)
 51 | 
 52 |         if self.downsample is not None:
 53 |             residual = self.downsample(x)
 54 | 
 55 |         out += residual
 56 |         out = self.relu(out)
 57 | 
 58 |         return out
 59 | 
 60 | 
 61 | class Bottleneck(nn.Module):
 62 |     expansion = 4
 63 | 
 64 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 65 |         super(Bottleneck, self).__init__()
 66 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 67 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 68 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 69 |                                padding=1, bias=False)
 70 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 71 |         self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
 72 |                                bias=False)
 73 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion,
 74 |                                   momentum=BN_MOMENTUM)
 75 |         self.relu = nn.ReLU(inplace=True)
 76 |         self.downsample = downsample
 77 |         self.stride = stride
 78 | 
 79 |     def forward(self, x):
 80 |         residual = x
 81 | 
 82 |         out = self.conv1(x)
 83 |         out = self.bn1(out)
 84 |         out = self.relu(out)
 85 | 
 86 |         out = self.conv2(out)
 87 |         out = self.bn2(out)
 88 |         out = self.relu(out)
 89 | 
 90 |         out = self.conv3(out)
 91 |         out = self.bn3(out)
 92 | 
 93 |         if self.downsample is not None:
 94 |             residual = self.downsample(x)
 95 | 
 96 |         out += residual
 97 |         out = self.relu(out)
 98 | 
 99 |         return out
100 | 
101 | 
102 | class Bottleneck_CAFFE(nn.Module):
103 |     expansion = 4
104 | 
105 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
106 |         super(Bottleneck_CAFFE, self).__init__()
107 |         # add stride to conv1x1
108 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
109 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
110 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1,
111 |                                padding=1, bias=False)
112 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
113 |         self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
114 |                                bias=False)
115 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion,
116 |                                   momentum=BN_MOMENTUM)
117 |         self.relu = nn.ReLU(inplace=True)
118 |         self.downsample = downsample
119 |         self.stride = stride
120 | 
121 |     def forward(self, x):
122 |         residual = x
123 | 
124 |         out = self.conv1(x)
125 |         out = self.bn1(out)
126 |         out = self.relu(out)
127 | 
128 |         out = self.conv2(out)
129 |         out = self.bn2(out)
130 |         out = self.relu(out)
131 | 
132 |         out = self.conv3(out)
133 |         out = self.bn3(out)
134 | 
135 |         if self.downsample is not None:
136 |             residual = self.downsample(x)
137 | 
138 |         out += residual
139 |         out = self.relu(out)
140 | 
141 |         return out
142 | 
143 | 
144 | class PoseResNet(nn.Module):
145 | 
146 |     def __init__(self, block, layers, cfg, **kwargs):
147 |         self.inplanes = 64
148 |         extra = cfg.MODEL.EXTRA
149 |         self.deconv_with_bias = extra.DECONV_WITH_BIAS
150 | 
151 |         super(PoseResNet, self).__init__()
152 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
153 |                                bias=False)
154 |         self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
155 |         self.relu = nn.ReLU(inplace=True)
156 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
157 |         self.layer1 = self._make_layer(block, 64, layers[0])
158 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
159 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
160 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
161 | 
162 |         # used for deconv layers
163 |         self.deconv_layers = self._make_deconv_layer(
164 |             extra.NUM_DECONV_LAYERS,
165 |             extra.NUM_DECONV_FILTERS,
166 |             extra.NUM_DECONV_KERNELS,
167 |         )
168 | 
169 |         self.final_layer = nn.Conv2d(
170 |             in_channels=extra.NUM_DECONV_FILTERS[-1],
171 |             out_channels=cfg.MODEL.NUM_JOINTS,
172 |             kernel_size=extra.FINAL_CONV_KERNEL,
173 |             stride=1,
174 |             padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0
175 |         )
176 | 
177 |     def _make_layer(self, block, planes, blocks, stride=1):
178 |         downsample = None
179 |         if stride != 1 or self.inplanes != planes * block.expansion:
180 |             downsample = nn.Sequential(
181 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
182 |                           kernel_size=1, stride=stride, bias=False),
183 |                 nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
184 |             )
185 | 
186 |         layers = []
187 |         layers.append(block(self.inplanes, planes, stride, downsample))
188 |         self.inplanes = planes * block.expansion
189 |         for i in range(1, blocks):
190 |             layers.append(block(self.inplanes, planes))
191 | 
192 |         return nn.Sequential(*layers)
193 | 
194 |     def _get_deconv_cfg(self, deconv_kernel, index):
195 |         if deconv_kernel == 4:
196 |             padding = 1
197 |             output_padding = 0
198 |         elif deconv_kernel == 3:
199 |             padding = 1
200 |             output_padding = 1
201 |         elif deconv_kernel == 2:
202 |             padding = 0
203 |             output_padding = 0
204 | 
205 |         return deconv_kernel, padding, output_padding
206 | 
207 |     def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
208 |         assert num_layers == len(num_filters), \
209 |             'ERROR: num_deconv_layers is different len(num_deconv_filters)'
210 |         assert num_layers == len(num_kernels), \
211 |             'ERROR: num_deconv_layers is different len(num_deconv_filters)'
212 | 
213 |         layers = []
214 |         for i in range(num_layers):
215 |             kernel, padding, output_padding = \
216 |                 self._get_deconv_cfg(num_kernels[i], i)
217 | 
218 |             planes = num_filters[i]
219 |             layers.append(
220 |                 nn.ConvTranspose2d(
221 |                     in_channels=self.inplanes,
222 |                     out_channels=planes,
223 |                     kernel_size=kernel,
224 |                     stride=2,
225 |                     padding=padding,
226 |                     output_padding=output_padding,
227 |                     bias=self.deconv_with_bias))
228 |             layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
229 |             layers.append(nn.ReLU(inplace=True))
230 |             self.inplanes = planes
231 | 
232 |         return nn.Sequential(*layers)
233 | 
234 |     def forward(self, x):
235 |         x = self.conv1(x)
236 |         x = self.bn1(x)
237 |         x = self.relu(x)
238 |         x = self.maxpool(x)
239 | 
240 |         x = self.layer1(x)
241 |         x = self.layer2(x)
242 |         x = self.layer3(x)
243 |         x = self.layer4(x)
244 | 
245 |         x = self.deconv_layers(x)
246 |         x = self.final_layer(x)
247 | 
248 |         return x
249 | 
250 |     def init_weights(self, pretrained=''):
251 |         if os.path.isfile(pretrained):
252 |             logger.info('=> init deconv weights from normal distribution')
253 |             for name, m in self.deconv_layers.named_modules():
254 |                 if isinstance(m, nn.ConvTranspose2d):
255 |                     logger.info('=> init {}.weight as normal(0, 0.001)'.format(name))
256 |                     logger.info('=> init {}.bias as 0'.format(name))
257 |                     nn.init.normal_(m.weight, std=0.001)
258 |                     if self.deconv_with_bias:
259 |                         nn.init.constant_(m.bias, 0)
260 |                 elif isinstance(m, nn.BatchNorm2d):
261 |                     logger.info('=> init {}.weight as 1'.format(name))
262 |                     logger.info('=> init {}.bias as 0'.format(name))
263 |                     nn.init.constant_(m.weight, 1)
264 |                     nn.init.constant_(m.bias, 0)
265 |             logger.info('=> init final conv weights from normal distribution')
266 |             for m in self.final_layer.modules():
267 |                 if isinstance(m, nn.Conv2d):
268 |                     # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
269 |                     logger.info('=> init {}.weight as normal(0, 0.001)'.format(name))
270 |                     logger.info('=> init {}.bias as 0'.format(name))
271 |                     nn.init.normal_(m.weight, std=0.001)
272 |                     nn.init.constant_(m.bias, 0)
273 | 
274 |             # pretrained_state_dict = torch.load(pretrained)
275 |             logger.info('=> loading pretrained model {}'.format(pretrained))
276 |             # self.load_state_dict(pretrained_state_dict, strict=False)
277 |             checkpoint = torch.load(pretrained)
278 |             if isinstance(checkpoint, OrderedDict):
279 |                 state_dict = checkpoint
280 |             elif isinstance(checkpoint, dict) and 'state_dict' in checkpoint:
281 |                 state_dict_old = checkpoint['state_dict']
282 |                 state_dict = OrderedDict()
283 |                 # delete 'module.' because it is saved from DataParallel module
284 |                 for key in state_dict_old.keys():
285 |                     if key.startswith('module.'):
286 |                         # state_dict[key[7:]] = state_dict[key]
287 |                         # state_dict.pop(key)
288 |                         state_dict[key[7:]] = state_dict_old[key]
289 |                     else:
290 |                         state_dict[key] = state_dict_old[key]
291 |             else:
292 |                 raise RuntimeError(
293 |                     'No state_dict found in checkpoint file {}'.format(pretrained))
294 |             self.load_state_dict(state_dict, strict=False)
295 |         else:
296 |             logger.error('=> imagenet pretrained model dose not exist')
297 |             logger.error('=> please download it first')
298 |             raise ValueError('imagenet pretrained model does not exist')
299 | 
300 | 
301 | resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
302 |                34: (BasicBlock, [3, 4, 6, 3]),
303 |                50: (Bottleneck, [3, 4, 6, 3]),
304 |                101: (Bottleneck, [3, 4, 23, 3]),
305 |                152: (Bottleneck, [3, 8, 36, 3])}
306 | 
307 | 
308 | def get_pose_net(cfg, is_train, **kwargs):
309 |     num_layers = cfg.MODEL.EXTRA.NUM_LAYERS
310 |     style = cfg.MODEL.STYLE
311 | 
312 |     block_class, layers = resnet_spec[num_layers]
313 | 
314 |     if style == 'caffe':
315 |         block_class = Bottleneck_CAFFE
316 | 
317 |     model = PoseResNet(block_class, layers, cfg, **kwargs)
318 | 
319 |     if is_train and cfg.MODEL.INIT_WEIGHTS:
320 |         model.init_weights(cfg.MODEL.PRETRAINED)
321 | 
322 |     return model
323 | 


--------------------------------------------------------------------------------
/lib/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/human-pose-estimation.pytorch/49f3f4458c9d5917c75c37a6db48c6a0d7cd89a1/lib/nms/__init__.py


--------------------------------------------------------------------------------
/lib/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | import numpy as np
 8 | cimport numpy as np
 9 | 
10 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
11 |     return a if a >= b else b
12 | 
13 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
14 |     return a if a <= b else b
15 | 
16 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
17 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
18 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
19 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
20 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
21 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
22 | 
23 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
24 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1].astype('i')
25 | 
26 |     cdef int ndets = dets.shape[0]
27 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
28 |             np.zeros((ndets), dtype=np.int)
29 | 
30 |     # nominal indices
31 |     cdef int _i, _j
32 |     # sorted indices
33 |     cdef int i, j
34 |     # temp variables for box i's (the box currently under consideration)
35 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
36 |     # variables for computing overlap with box j (lower scoring box)
37 |     cdef np.float32_t xx1, yy1, xx2, yy2
38 |     cdef np.float32_t w, h
39 |     cdef np.float32_t inter, ovr
40 | 
41 |     keep = []
42 |     for _i in range(ndets):
43 |         i = order[_i]
44 |         if suppressed[i] == 1:
45 |             continue
46 |         keep.append(i)
47 |         ix1 = x1[i]
48 |         iy1 = y1[i]
49 |         ix2 = x2[i]
50 |         iy2 = y2[i]
51 |         iarea = areas[i]
52 |         for _j in range(_i + 1, ndets):
53 |             j = order[_j]
54 |             if suppressed[j] == 1:
55 |                 continue
56 |             xx1 = max(ix1, x1[j])
57 |             yy1 = max(iy1, y1[j])
58 |             xx2 = min(ix2, x2[j])
59 |             yy2 = min(iy2, y2[j])
60 |             w = max(0.0, xx2 - xx1 + 1)
61 |             h = max(0.0, yy2 - yy1 + 1)
62 |             inter = w * h
63 |             ovr = inter / (iarea + areas[j] - inter)
64 |             if ovr >= thresh:
65 |                 suppressed[j] = 1
66 | 
67 |     return keep
68 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | import numpy as np
 8 | cimport numpy as np
 9 | 
10 | assert sizeof(int) == sizeof(np.int32_t)
11 | 
12 | cdef extern from "gpu_nms.hpp":
13 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
14 | 
15 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
16 |             np.int32_t device_id=0):
17 |     cdef int boxes_num = dets.shape[0]
18 |     cdef int boxes_dim = dets.shape[1]
19 |     cdef int num_out
20 |     cdef np.ndarray[np.int32_t, ndim=1] \
21 |         keep = np.zeros(boxes_num, dtype=np.int32)
22 |     cdef np.ndarray[np.float32_t, ndim=1] \
23 |         scores = dets[:, 4]
24 |     cdef np.ndarray[np.int32_t, ndim=1] \
25 |         order = scores.argsort()[::-1].astype(np.int32)
26 |     cdef np.ndarray[np.float32_t, ndim=2] \
27 |         sorted_dets = dets[order, :]
28 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
29 |     keep = keep[:num_out]
30 |     return list(order[keep])
31 | 


--------------------------------------------------------------------------------
/lib/nms/nms.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import numpy as np
 12 | 
 13 | from .cpu_nms import cpu_nms
 14 | from .gpu_nms import gpu_nms
 15 | 
 16 | 
 17 | def py_nms_wrapper(thresh):
 18 |     def _nms(dets):
 19 |         return nms(dets, thresh)
 20 |     return _nms
 21 | 
 22 | 
 23 | def cpu_nms_wrapper(thresh):
 24 |     def _nms(dets):
 25 |         return cpu_nms(dets, thresh)
 26 |     return _nms
 27 | 
 28 | 
 29 | def gpu_nms_wrapper(thresh, device_id):
 30 |     def _nms(dets):
 31 |         return gpu_nms(dets, thresh, device_id)
 32 |     return _nms
 33 | 
 34 | 
 35 | def nms(dets, thresh):
 36 |     """
 37 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
 38 |     rule out overlap >= thresh
 39 |     :param dets: [[x1, y1, x2, y2 score]]
 40 |     :param thresh: retain overlap < thresh
 41 |     :return: indexes to keep
 42 |     """
 43 |     if dets.shape[0] == 0:
 44 |         return []
 45 | 
 46 |     x1 = dets[:, 0]
 47 |     y1 = dets[:, 1]
 48 |     x2 = dets[:, 2]
 49 |     y2 = dets[:, 3]
 50 |     scores = dets[:, 4]
 51 | 
 52 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 53 |     order = scores.argsort()[::-1]
 54 | 
 55 |     keep = []
 56 |     while order.size > 0:
 57 |         i = order[0]
 58 |         keep.append(i)
 59 |         xx1 = np.maximum(x1[i], x1[order[1:]])
 60 |         yy1 = np.maximum(y1[i], y1[order[1:]])
 61 |         xx2 = np.minimum(x2[i], x2[order[1:]])
 62 |         yy2 = np.minimum(y2[i], y2[order[1:]])
 63 | 
 64 |         w = np.maximum(0.0, xx2 - xx1 + 1)
 65 |         h = np.maximum(0.0, yy2 - yy1 + 1)
 66 |         inter = w * h
 67 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
 68 | 
 69 |         inds = np.where(ovr <= thresh)[0]
 70 |         order = order[inds + 1]
 71 | 
 72 |     return keep
 73 | 
 74 | def oks_iou(g, d, a_g, a_d, sigmas=None, in_vis_thre=None):
 75 |     if not isinstance(sigmas, np.ndarray):
 76 |         sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0
 77 |     vars = (sigmas * 2) ** 2
 78 |     xg = g[0::3]
 79 |     yg = g[1::3]
 80 |     vg = g[2::3]
 81 |     ious = np.zeros((d.shape[0]))
 82 |     for n_d in range(0, d.shape[0]):
 83 |         xd = d[n_d, 0::3]
 84 |         yd = d[n_d, 1::3]
 85 |         vd = d[n_d, 2::3]
 86 |         dx = xd - xg
 87 |         dy = yd - yg
 88 |         e = (dx ** 2 + dy ** 2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2
 89 |         if in_vis_thre is not None:
 90 |             ind = list(vg > in_vis_thre) and list(vd > in_vis_thre)
 91 |             e = e[ind]
 92 |         ious[n_d] = np.sum(np.exp(-e)) / e.shape[0] if e.shape[0] != 0 else 0.0
 93 |     return ious
 94 | 
 95 | def oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
 96 |     """
 97 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
 98 |     rule out overlap >= thresh, overlap = oks
 99 |     :param kpts_db
100 |     :param thresh: retain overlap < thresh
101 |     :return: indexes to keep
102 |     """
103 |     if len(kpts_db) == 0:
104 |         return []
105 | 
106 |     scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
107 |     kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
108 |     areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
109 | 
110 |     order = scores.argsort()[::-1]
111 | 
112 |     keep = []
113 |     while order.size > 0:
114 |         i = order[0]
115 |         keep.append(i)
116 | 
117 |         oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre)
118 | 
119 |         inds = np.where(oks_ovr <= thresh)[0]
120 |         order = order[inds + 1]
121 | 
122 |     return keep
123 | 
124 | 


--------------------------------------------------------------------------------
/lib/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Copyright (c) Microsoft
  3 | // Licensed under The MIT License
  4 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
  5 | // ------------------------------------------------------------------
  6 | 
  7 | #include "gpu_nms.hpp"
  8 | #include <vector>
  9 | #include <iostream>
 10 | 
 11 | #define CUDA_CHECK(condition) \
 12 |   /* Code block avoids redefinition of cudaError_t error */ \
 13 |   do { \
 14 |     cudaError_t error = condition; \
 15 |     if (error != cudaSuccess) { \
 16 |       std::cout << cudaGetErrorString(error) << std::endl; \
 17 |     } \
 18 |   } while (0)
 19 | 
 20 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 21 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 22 | 
 23 | __device__ inline float devIoU(float const * const a, float const * const b) {
 24 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 25 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 26 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 27 |   float interS = width * height;
 28 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 29 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 30 |   return interS / (Sa + Sb - interS);
 31 | }
 32 | 
 33 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 34 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 35 |   const int row_start = blockIdx.y;
 36 |   const int col_start = blockIdx.x;
 37 | 
 38 |   // if (row_start > col_start) return;
 39 | 
 40 |   const int row_size =
 41 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 42 |   const int col_size =
 43 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 44 | 
 45 |   __shared__ float block_boxes[threadsPerBlock * 5];
 46 |   if (threadIdx.x < col_size) {
 47 |     block_boxes[threadIdx.x * 5 + 0] =
 48 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 49 |     block_boxes[threadIdx.x * 5 + 1] =
 50 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 51 |     block_boxes[threadIdx.x * 5 + 2] =
 52 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 53 |     block_boxes[threadIdx.x * 5 + 3] =
 54 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 55 |     block_boxes[threadIdx.x * 5 + 4] =
 56 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 57 |   }
 58 |   __syncthreads();
 59 | 
 60 |   if (threadIdx.x < row_size) {
 61 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 62 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 63 |     int i = 0;
 64 |     unsigned long long t = 0;
 65 |     int start = 0;
 66 |     if (row_start == col_start) {
 67 |       start = threadIdx.x + 1;
 68 |     }
 69 |     for (i = start; i < col_size; i++) {
 70 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 71 |         t |= 1ULL << i;
 72 |       }
 73 |     }
 74 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 75 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 76 |   }
 77 | }
 78 | 
 79 | void _set_device(int device_id) {
 80 |   int current_device;
 81 |   CUDA_CHECK(cudaGetDevice(&current_device));
 82 |   if (current_device == device_id) {
 83 |     return;
 84 |   }
 85 |   // The call to cudaSetDevice must come before any calls to Get, which
 86 |   // may perform initialization using the GPU.
 87 |   CUDA_CHECK(cudaSetDevice(device_id));
 88 | }
 89 | 
 90 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 91 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 92 |   _set_device(device_id);
 93 | 
 94 |   float* boxes_dev = NULL;
 95 |   unsigned long long* mask_dev = NULL;
 96 | 
 97 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 98 | 
 99 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
100 |                         boxes_num * boxes_dim * sizeof(float)));
101 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
102 |                         boxes_host,
103 |                         boxes_num * boxes_dim * sizeof(float),
104 |                         cudaMemcpyHostToDevice));
105 | 
106 |   CUDA_CHECK(cudaMalloc(&mask_dev,
107 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
108 | 
109 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
110 |               DIVUP(boxes_num, threadsPerBlock));
111 |   dim3 threads(threadsPerBlock);
112 |   nms_kernel<<<blocks, threads>>>(boxes_num,
113 |                                   nms_overlap_thresh,
114 |                                   boxes_dev,
115 |                                   mask_dev);
116 | 
117 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
118 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
119 |                         mask_dev,
120 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
121 |                         cudaMemcpyDeviceToHost));
122 | 
123 |   std::vector<unsigned long long> remv(col_blocks);
124 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
125 | 
126 |   int num_to_keep = 0;
127 |   for (int i = 0; i < boxes_num; i++) {
128 |     int nblock = i / threadsPerBlock;
129 |     int inblock = i % threadsPerBlock;
130 | 
131 |     if (!(remv[nblock] & (1ULL << inblock))) {
132 |       keep_out[num_to_keep++] = i;
133 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
134 |       for (int j = nblock; j < col_blocks; j++) {
135 |         remv[j] |= p[j];
136 |       }
137 |     }
138 |   }
139 |   *num_out = num_to_keep;
140 | 
141 |   CUDA_CHECK(cudaFree(boxes_dev));
142 |   CUDA_CHECK(cudaFree(mask_dev));
143 | }
144 | 


--------------------------------------------------------------------------------
/lib/nms/setup.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
  5 | # --------------------------------------------------------
  6 | 
  7 | import os
  8 | from os.path import join as pjoin
  9 | from setuptools import setup
 10 | from distutils.extension import Extension
 11 | from Cython.Distutils import build_ext
 12 | import numpy as np
 13 | 
 14 | 
 15 | def find_in_path(name, path):
 16 |     "Find a file in a search path"
 17 |     # Adapted fom
 18 |     # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 19 |     for dir in path.split(os.pathsep):
 20 |         binpath = pjoin(dir, name)
 21 |         if os.path.exists(binpath):
 22 |             return os.path.abspath(binpath)
 23 |     return None
 24 | 
 25 | 
 26 | def locate_cuda():
 27 |     """Locate the CUDA environment on the system
 28 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 29 |     and values giving the absolute path to each directory.
 30 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 31 |     is based on finding 'nvcc' in the PATH.
 32 |     """
 33 | 
 34 |     # first check if the CUDAHOME env variable is in use
 35 |     if 'CUDAHOME' in os.environ:
 36 |         home = os.environ['CUDAHOME']
 37 |         nvcc = pjoin(home, 'bin', 'nvcc')
 38 |     else:
 39 |         # otherwise, search the PATH for NVCC
 40 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 41 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 42 |         if nvcc is None:
 43 |             raise EnvironmentError('The nvcc binary could not be '
 44 |                 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 45 |         home = os.path.dirname(os.path.dirname(nvcc))
 46 | 
 47 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 48 |                   'include': pjoin(home, 'include'),
 49 |                   'lib64': pjoin(home, 'lib64')}
 50 |     for k, v in cudaconfig.items():
 51 |         if not os.path.exists(v):
 52 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 53 | 
 54 |     return cudaconfig
 55 | CUDA = locate_cuda()
 56 | 
 57 | 
 58 | # Obtain the numpy include directory.  This logic works across numpy versions.
 59 | try:
 60 |     numpy_include = np.get_include()
 61 | except AttributeError:
 62 |     numpy_include = np.get_numpy_include()
 63 | 
 64 | 
 65 | def customize_compiler_for_nvcc(self):
 66 |     """inject deep into distutils to customize how the dispatch
 67 |     to gcc/nvcc works.
 68 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 69 |     injected in, and still have the right customizations (i.e.
 70 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 71 |     the OO route, I have this. Note, it's kindof like a wierd functional
 72 |     subclassing going on."""
 73 | 
 74 |     # tell the compiler it can processes .cu
 75 |     self.src_extensions.append('.cu')
 76 | 
 77 |     # save references to the default compiler_so and _comple methods
 78 |     default_compiler_so = self.compiler_so
 79 |     super = self._compile
 80 | 
 81 |     # now redefine the _compile method. This gets executed for each
 82 |     # object but distutils doesn't have the ability to change compilers
 83 |     # based on source extension: we add it.
 84 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 85 |         if os.path.splitext(src)[1] == '.cu':
 86 |             # use the cuda for .cu files
 87 |             self.set_executable('compiler_so', CUDA['nvcc'])
 88 |             # use only a subset of the extra_postargs, which are 1-1 translated
 89 |             # from the extra_compile_args in the Extension class
 90 |             postargs = extra_postargs['nvcc']
 91 |         else:
 92 |             postargs = extra_postargs['gcc']
 93 | 
 94 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 95 |         # reset the default compiler_so, which we might have changed for cuda
 96 |         self.compiler_so = default_compiler_so
 97 | 
 98 |     # inject our redefined _compile method into the class
 99 |     self._compile = _compile
100 | 
101 | 
102 | # run the customize_compiler
103 | class custom_build_ext(build_ext):
104 |     def build_extensions(self):
105 |         customize_compiler_for_nvcc(self.compiler)
106 |         build_ext.build_extensions(self)
107 | 
108 | 
109 | ext_modules = [
110 |     Extension(
111 |         "cpu_nms",
112 |         ["cpu_nms.pyx"],
113 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
114 |         include_dirs = [numpy_include]
115 |     ),
116 |     Extension('gpu_nms',
117 |         ['nms_kernel.cu', 'gpu_nms.pyx'],
118 |         library_dirs=[CUDA['lib64']],
119 |         libraries=['cudart'],
120 |         language='c++',
121 |         runtime_library_dirs=[CUDA['lib64']],
122 |         # this syntax is specific to this build system
123 |         # we're only going to use certain compiler args with nvcc and not with
124 |         # gcc the implementation of this trick is in customize_compiler() below
125 |         extra_compile_args={'gcc': ["-Wno-unused-function"],
126 |                             'nvcc': ['-arch=sm_35',
127 |                                      '--ptxas-options=-v',
128 |                                      '-c',
129 |                                      '--compiler-options',
130 |                                      "'-fPIC'"]},
131 |         include_dirs = [numpy_include, CUDA['include']]
132 |     ),
133 | ]
134 | 
135 | setup(
136 |     name='nms',
137 |     ext_modules=ext_modules,
138 |     # inject our custom trigger
139 |     cmdclass={'build_ext': custom_build_ext},
140 | )
141 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/human-pose-estimation.pytorch/49f3f4458c9d5917c75c37a6db48c6a0d7cd89a1/lib/utils/__init__.py


--------------------------------------------------------------------------------
/lib/utils/transforms.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import numpy as np
 12 | import cv2
 13 | 
 14 | 
 15 | def flip_back(output_flipped, matched_parts):
 16 |     '''
 17 |     ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width)
 18 |     '''
 19 |     assert output_flipped.ndim == 4,\
 20 |         'output_flipped should be [batch_size, num_joints, height, width]'
 21 | 
 22 |     output_flipped = output_flipped[:, :, :, ::-1]
 23 | 
 24 |     for pair in matched_parts:
 25 |         tmp = output_flipped[:, pair[0], :, :].copy()
 26 |         output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
 27 |         output_flipped[:, pair[1], :, :] = tmp
 28 | 
 29 |     return output_flipped
 30 | 
 31 | 
 32 | def fliplr_joints(joints, joints_vis, width, matched_parts):
 33 |     """
 34 |     flip coords
 35 |     """
 36 |     # Flip horizontal
 37 |     joints[:, 0] = width - joints[:, 0] - 1
 38 | 
 39 |     # Change left-right parts
 40 |     for pair in matched_parts:
 41 |         joints[pair[0], :], joints[pair[1], :] = \
 42 |             joints[pair[1], :], joints[pair[0], :].copy()
 43 |         joints_vis[pair[0], :], joints_vis[pair[1], :] = \
 44 |             joints_vis[pair[1], :], joints_vis[pair[0], :].copy()
 45 | 
 46 |     return joints*joints_vis, joints_vis
 47 | 
 48 | 
 49 | def transform_preds(coords, center, scale, output_size):
 50 |     target_coords = np.zeros(coords.shape)
 51 |     trans = get_affine_transform(center, scale, 0, output_size, inv=1)
 52 |     for p in range(coords.shape[0]):
 53 |         target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
 54 |     return target_coords
 55 | 
 56 | 
 57 | def get_affine_transform(center,
 58 |                          scale,
 59 |                          rot,
 60 |                          output_size,
 61 |                          shift=np.array([0, 0], dtype=np.float32),
 62 |                          inv=0):
 63 |     if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
 64 |         print(scale)
 65 |         scale = np.array([scale, scale])
 66 | 
 67 |     scale_tmp = scale * 200.0
 68 |     src_w = scale_tmp[0]
 69 |     dst_w = output_size[0]
 70 |     dst_h = output_size[1]
 71 | 
 72 |     rot_rad = np.pi * rot / 180
 73 |     src_dir = get_dir([0, src_w * -0.5], rot_rad)
 74 |     dst_dir = np.array([0, dst_w * -0.5], np.float32)
 75 | 
 76 |     src = np.zeros((3, 2), dtype=np.float32)
 77 |     dst = np.zeros((3, 2), dtype=np.float32)
 78 |     src[0, :] = center + scale_tmp * shift
 79 |     src[1, :] = center + src_dir + scale_tmp * shift
 80 |     dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
 81 |     dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
 82 | 
 83 |     src[2:, :] = get_3rd_point(src[0, :], src[1, :])
 84 |     dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
 85 | 
 86 |     if inv:
 87 |         trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
 88 |     else:
 89 |         trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
 90 | 
 91 |     return trans
 92 | 
 93 | 
 94 | def affine_transform(pt, t):
 95 |     new_pt = np.array([pt[0], pt[1], 1.]).T
 96 |     new_pt = np.dot(t, new_pt)
 97 |     return new_pt[:2]
 98 | 
 99 | 
100 | def get_3rd_point(a, b):
101 |     direct = a - b
102 |     return b + np.array([-direct[1], direct[0]], dtype=np.float32)
103 | 
104 | 
105 | def get_dir(src_point, rot_rad):
106 |     sn, cs = np.sin(rot_rad), np.cos(rot_rad)
107 | 
108 |     src_result = [0, 0]
109 |     src_result[0] = src_point[0] * cs - src_point[1] * sn
110 |     src_result[1] = src_point[0] * sn + src_point[1] * cs
111 | 
112 |     return src_result
113 | 
114 | 
115 | def crop(img, center, scale, output_size, rot=0):
116 |     trans = get_affine_transform(center, scale, rot, output_size)
117 | 
118 |     dst_img = cv2.warpAffine(img,
119 |                              trans,
120 |                              (int(output_size[0]), int(output_size[1])),
121 |                              flags=cv2.INTER_LINEAR)
122 | 
123 |     return dst_img
124 | 


--------------------------------------------------------------------------------
/lib/utils/utils.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import os
12 | import logging
13 | import time
14 | from pathlib import Path
15 | 
16 | import torch
17 | import torch.optim as optim
18 | 
19 | from core.config import get_model_name
20 | 
21 | 
22 | def create_logger(cfg, cfg_name, phase='train'):
23 |     root_output_dir = Path(cfg.OUTPUT_DIR)
24 |     # set up logger
25 |     if not root_output_dir.exists():
26 |         print('=> creating {}'.format(root_output_dir))
27 |         root_output_dir.mkdir()
28 | 
29 |     dataset = cfg.DATASET.DATASET + '_' + cfg.DATASET.HYBRID_JOINTS_TYPE \
30 |         if cfg.DATASET.HYBRID_JOINTS_TYPE else cfg.DATASET.DATASET
31 |     dataset = dataset.replace(':', '_')
32 |     model, _ = get_model_name(cfg)
33 |     cfg_name = os.path.basename(cfg_name).split('.')[0]
34 | 
35 |     final_output_dir = root_output_dir / dataset / model / cfg_name
36 | 
37 |     print('=> creating {}'.format(final_output_dir))
38 |     final_output_dir.mkdir(parents=True, exist_ok=True)
39 | 
40 |     time_str = time.strftime('%Y-%m-%d-%H-%M')
41 |     log_file = '{}_{}_{}.log'.format(cfg_name, time_str, phase)
42 |     final_log_file = final_output_dir / log_file
43 |     head = '%(asctime)-15s %(message)s'
44 |     logging.basicConfig(filename=str(final_log_file),
45 |                         format=head)
46 |     logger = logging.getLogger()
47 |     logger.setLevel(logging.INFO)
48 |     console = logging.StreamHandler()
49 |     logging.getLogger('').addHandler(console)
50 | 
51 |     tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / \
52 |         (cfg_name + '_' + time_str)
53 |     print('=> creating {}'.format(tensorboard_log_dir))
54 |     tensorboard_log_dir.mkdir(parents=True, exist_ok=True)
55 | 
56 |     return logger, str(final_output_dir), str(tensorboard_log_dir)
57 | 
58 | 
59 | def get_optimizer(cfg, model):
60 |     optimizer = None
61 |     if cfg.TRAIN.OPTIMIZER == 'sgd':
62 |         optimizer = optim.SGD(
63 |             model.parameters(),
64 |             lr=cfg.TRAIN.LR,
65 |             momentum=cfg.TRAIN.MOMENTUM,
66 |             weight_decay=cfg.TRAIN.WD,
67 |             nesterov=cfg.TRAIN.NESTEROV
68 |         )
69 |     elif cfg.TRAIN.OPTIMIZER == 'adam':
70 |         optimizer = optim.Adam(
71 |             model.parameters(),
72 |             lr=cfg.TRAIN.LR
73 |         )
74 | 
75 |     return optimizer
76 | 
77 | 
78 | def save_checkpoint(states, is_best, output_dir,
79 |                     filename='checkpoint.pth.tar'):
80 |     torch.save(states, os.path.join(output_dir, filename))
81 |     if is_best and 'state_dict' in states:
82 |         torch.save(states['state_dict'],
83 |                    os.path.join(output_dir, 'model_best.pth.tar'))
84 | 


--------------------------------------------------------------------------------
/lib/utils/vis.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import math
 12 | 
 13 | import numpy as np
 14 | import torchvision
 15 | import cv2
 16 | 
 17 | from core.inference import get_max_preds
 18 | 
 19 | 
 20 | def save_batch_image_with_joints(batch_image, batch_joints, batch_joints_vis,
 21 |                                  file_name, nrow=8, padding=2):
 22 |     '''
 23 |     batch_image: [batch_size, channel, height, width]
 24 |     batch_joints: [batch_size, num_joints, 3],
 25 |     batch_joints_vis: [batch_size, num_joints, 1],
 26 |     }
 27 |     '''
 28 |     grid = torchvision.utils.make_grid(batch_image, nrow, padding, True)
 29 |     ndarr = grid.mul(255).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy()
 30 |     ndarr = ndarr.copy()
 31 | 
 32 |     nmaps = batch_image.size(0)
 33 |     xmaps = min(nrow, nmaps)
 34 |     ymaps = int(math.ceil(float(nmaps) / xmaps))
 35 |     height = int(batch_image.size(2) + padding)
 36 |     width = int(batch_image.size(3) + padding)
 37 |     k = 0
 38 |     for y in range(ymaps):
 39 |         for x in range(xmaps):
 40 |             if k >= nmaps:
 41 |                 break
 42 |             joints = batch_joints[k]
 43 |             joints_vis = batch_joints_vis[k]
 44 | 
 45 |             for joint, joint_vis in zip(joints, joints_vis):
 46 |                 joint[0] = x * width + padding + joint[0]
 47 |                 joint[1] = y * height + padding + joint[1]
 48 |                 if joint_vis[0]:
 49 |                     cv2.circle(ndarr, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2)
 50 |             k = k + 1
 51 |     cv2.imwrite(file_name, ndarr)
 52 | 
 53 | 
 54 | def save_batch_heatmaps(batch_image, batch_heatmaps, file_name,
 55 |                         normalize=True):
 56 |     '''
 57 |     batch_image: [batch_size, channel, height, width]
 58 |     batch_heatmaps: ['batch_size, num_joints, height, width]
 59 |     file_name: saved file name
 60 |     '''
 61 |     if normalize:
 62 |         batch_image = batch_image.clone()
 63 |         min = float(batch_image.min())
 64 |         max = float(batch_image.max())
 65 | 
 66 |         batch_image.add_(-min).div_(max - min + 1e-5)
 67 | 
 68 |     batch_size = batch_heatmaps.size(0)
 69 |     num_joints = batch_heatmaps.size(1)
 70 |     heatmap_height = batch_heatmaps.size(2)
 71 |     heatmap_width = batch_heatmaps.size(3)
 72 | 
 73 |     grid_image = np.zeros((batch_size*heatmap_height,
 74 |                            (num_joints+1)*heatmap_width,
 75 |                            3),
 76 |                           dtype=np.uint8)
 77 | 
 78 |     preds, maxvals = get_max_preds(batch_heatmaps.detach().cpu().numpy())
 79 | 
 80 |     for i in range(batch_size):
 81 |         image = batch_image[i].mul(255)\
 82 |                               .clamp(0, 255)\
 83 |                               .byte()\
 84 |                               .permute(1, 2, 0)\
 85 |                               .cpu().numpy()
 86 |         heatmaps = batch_heatmaps[i].mul(255)\
 87 |                                     .clamp(0, 255)\
 88 |                                     .byte()\
 89 |                                     .cpu().numpy()
 90 | 
 91 |         resized_image = cv2.resize(image,
 92 |                                    (int(heatmap_width), int(heatmap_height)))
 93 | 
 94 |         height_begin = heatmap_height * i
 95 |         height_end = heatmap_height * (i + 1)
 96 |         for j in range(num_joints):
 97 |             cv2.circle(resized_image,
 98 |                        (int(preds[i][j][0]), int(preds[i][j][1])),
 99 |                        1, [0, 0, 255], 1)
100 |             heatmap = heatmaps[j, :, :]
101 |             colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
102 |             masked_image = colored_heatmap*0.7 + resized_image*0.3
103 |             cv2.circle(masked_image,
104 |                        (int(preds[i][j][0]), int(preds[i][j][1])),
105 |                        1, [0, 0, 255], 1)
106 | 
107 |             width_begin = heatmap_width * (j+1)
108 |             width_end = heatmap_width * (j+2)
109 |             grid_image[height_begin:height_end, width_begin:width_end, :] = \
110 |                 masked_image
111 |             # grid_image[height_begin:height_end, width_begin:width_end, :] = \
112 |             #     colored_heatmap*0.7 + resized_image*0.3
113 | 
114 |         grid_image[height_begin:height_end, 0:heatmap_width, :] = resized_image
115 | 
116 |     cv2.imwrite(file_name, grid_image)
117 | 
118 | 
119 | def save_debug_images(config, input, meta, target, joints_pred, output,
120 |                       prefix):
121 |     if not config.DEBUG.DEBUG:
122 |         return
123 | 
124 |     if config.DEBUG.SAVE_BATCH_IMAGES_GT:
125 |         save_batch_image_with_joints(
126 |             input, meta['joints'], meta['joints_vis'],
127 |             '{}_gt.jpg'.format(prefix)
128 |         )
129 |     if config.DEBUG.SAVE_BATCH_IMAGES_PRED:
130 |         save_batch_image_with_joints(
131 |             input, joints_pred, meta['joints_vis'],
132 |             '{}_pred.jpg'.format(prefix)
133 |         )
134 |     if config.DEBUG.SAVE_HEATMAPS_GT:
135 |         save_batch_heatmaps(
136 |             input, target, '{}_hm_gt.jpg'.format(prefix)
137 |         )
138 |     if config.DEBUG.SAVE_HEATMAPS_PRED:
139 |         save_batch_heatmaps(
140 |             input, output, '{}_hm_pred.jpg'.format(prefix)
141 |         )
142 | 


--------------------------------------------------------------------------------
/lib/utils/zipreader.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import os
12 | import zipfile
13 | import xml.etree.ElementTree as ET
14 | 
15 | import cv2
16 | import numpy as np
17 | 
18 | _im_zfile = []
19 | _xml_path_zip = []
20 | _xml_zfile = []
21 | 
22 | 
23 | def imread(filename, flags=cv2.IMREAD_COLOR):
24 |     global _im_zfile
25 |     path = filename
26 |     pos_at = path.index('@')
27 |     if pos_at == -1:
28 |         print("character '@' is not found from the given path '%s'"%(path))
29 |         assert 0
30 |     path_zip = path[0: pos_at]
31 |     path_img = path[pos_at + 2:]
32 |     if not os.path.isfile(path_zip):
33 |         print("zip file '%s' is not found"%(path_zip))
34 |         assert 0
35 |     for i in range(len(_im_zfile)):
36 |         if _im_zfile[i]['path'] == path_zip:
37 |             data = _im_zfile[i]['zipfile'].read(path_img)
38 |             return cv2.imdecode(np.frombuffer(data, np.uint8), flags)
39 | 
40 |     _im_zfile.append({
41 |         'path': path_zip,
42 |         'zipfile': zipfile.ZipFile(path_zip, 'r')
43 |     })
44 |     data = _im_zfile[-1]['zipfile'].read(path_img)
45 | 
46 |     return cv2.imdecode(np.frombuffer(data, np.uint8), flags)
47 | 
48 | 
49 | def xmlread(filename):
50 |     global _xml_path_zip
51 |     global _xml_zfile
52 |     path = filename
53 |     pos_at = path.index('@')
54 |     if pos_at == -1:
55 |         print("character '@' is not found from the given path '%s'"%(path))
56 |         assert 0
57 |     path_zip = path[0: pos_at]
58 |     path_xml = path[pos_at + 2:]
59 |     if not os.path.isfile(path_zip):
60 |         print("zip file '%s' is not found"%(path_zip))
61 |         assert 0
62 |     for i in range(len(_xml_path_zip)):
63 |         if _xml_path_zip[i] == path_zip:
64 |             data = _xml_zfile[i].open(path_xml)
65 |             return ET.fromstring(data.read())
66 |     _xml_path_zip.append(path_zip)
67 |     print("read new xml file '%s'"%(path_zip))
68 |     _xml_zfile.append(zipfile.ZipFile(path_zip, 'r'))
69 |     data = _xml_zfile[-1].open(path_xml)
70 |     return ET.fromstring(data.read())
71 | 


--------------------------------------------------------------------------------
/pose_estimation/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import os.path as osp
12 | import sys
13 | 
14 | 
15 | def add_path(path):
16 |     if path not in sys.path:
17 |         sys.path.insert(0, path)
18 | 
19 | 
20 | this_dir = osp.dirname(__file__)
21 | 
22 | lib_path = osp.join(this_dir, '..', 'lib')
23 | add_path(lib_path)
24 | 


--------------------------------------------------------------------------------
/pose_estimation/train.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import argparse
 12 | import os
 13 | import pprint
 14 | import shutil
 15 | 
 16 | import torch
 17 | import torch.nn.parallel
 18 | import torch.backends.cudnn as cudnn
 19 | import torch.optim
 20 | import torch.utils.data
 21 | import torch.utils.data.distributed
 22 | import torchvision.transforms as transforms
 23 | from tensorboardX import SummaryWriter
 24 | 
 25 | import _init_paths
 26 | from core.config import config
 27 | from core.config import update_config
 28 | from core.config import update_dir
 29 | from core.config import get_model_name
 30 | from core.loss import JointsMSELoss
 31 | from core.function import train
 32 | from core.function import validate
 33 | from utils.utils import get_optimizer
 34 | from utils.utils import save_checkpoint
 35 | from utils.utils import create_logger
 36 | 
 37 | import dataset
 38 | import models
 39 | 
 40 | 
 41 | def parse_args():
 42 |     parser = argparse.ArgumentParser(description='Train keypoints network')
 43 |     # general
 44 |     parser.add_argument('--cfg',
 45 |                         help='experiment configure file name',
 46 |                         required=True,
 47 |                         type=str)
 48 | 
 49 |     args, rest = parser.parse_known_args()
 50 |     # update config
 51 |     update_config(args.cfg)
 52 | 
 53 |     # training
 54 |     parser.add_argument('--frequent',
 55 |                         help='frequency of logging',
 56 |                         default=config.PRINT_FREQ,
 57 |                         type=int)
 58 |     parser.add_argument('--gpus',
 59 |                         help='gpus',
 60 |                         type=str)
 61 |     parser.add_argument('--workers',
 62 |                         help='num of dataloader workers',
 63 |                         type=int)
 64 | 
 65 |     args = parser.parse_args()
 66 | 
 67 |     return args
 68 | 
 69 | 
 70 | def reset_config(config, args):
 71 |     if args.gpus:
 72 |         config.GPUS = args.gpus
 73 |     if args.workers:
 74 |         config.WORKERS = args.workers
 75 | 
 76 | 
 77 | def main():
 78 |     args = parse_args()
 79 |     reset_config(config, args)
 80 | 
 81 |     logger, final_output_dir, tb_log_dir = create_logger(
 82 |         config, args.cfg, 'train')
 83 | 
 84 |     logger.info(pprint.pformat(args))
 85 |     logger.info(pprint.pformat(config))
 86 | 
 87 |     # cudnn related setting
 88 |     cudnn.benchmark = config.CUDNN.BENCHMARK
 89 |     torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
 90 |     torch.backends.cudnn.enabled = config.CUDNN.ENABLED
 91 | 
 92 |     model = eval('models.'+config.MODEL.NAME+'.get_pose_net')(
 93 |         config, is_train=True
 94 |     )
 95 | 
 96 |     # copy model file
 97 |     this_dir = os.path.dirname(__file__)
 98 |     shutil.copy2(
 99 |         os.path.join(this_dir, '../lib/models', config.MODEL.NAME + '.py'),
100 |         final_output_dir)
101 | 
102 |     writer_dict = {
103 |         'writer': SummaryWriter(log_dir=tb_log_dir),
104 |         'train_global_steps': 0,
105 |         'valid_global_steps': 0,
106 |     }
107 | 
108 |     dump_input = torch.rand((config.TRAIN.BATCH_SIZE,
109 |                              3,
110 |                              config.MODEL.IMAGE_SIZE[1],
111 |                              config.MODEL.IMAGE_SIZE[0]))
112 |     writer_dict['writer'].add_graph(model, (dump_input, ), verbose=False)
113 | 
114 |     gpus = [int(i) for i in config.GPUS.split(',')]
115 |     model = torch.nn.DataParallel(model, device_ids=gpus).cuda()
116 | 
117 |     # define loss function (criterion) and optimizer
118 |     criterion = JointsMSELoss(
119 |         use_target_weight=config.LOSS.USE_TARGET_WEIGHT
120 |     ).cuda()
121 | 
122 |     optimizer = get_optimizer(config, model)
123 | 
124 |     lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
125 |         optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR
126 |     )
127 | 
128 |     # Data loading code
129 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
130 |                                      std=[0.229, 0.224, 0.225])
131 |     train_dataset = eval('dataset.'+config.DATASET.DATASET)(
132 |         config,
133 |         config.DATASET.ROOT,
134 |         config.DATASET.TRAIN_SET,
135 |         True,
136 |         transforms.Compose([
137 |             transforms.ToTensor(),
138 |             normalize,
139 |         ])
140 |     )
141 |     valid_dataset = eval('dataset.'+config.DATASET.DATASET)(
142 |         config,
143 |         config.DATASET.ROOT,
144 |         config.DATASET.TEST_SET,
145 |         False,
146 |         transforms.Compose([
147 |             transforms.ToTensor(),
148 |             normalize,
149 |         ])
150 |     )
151 | 
152 |     train_loader = torch.utils.data.DataLoader(
153 |         train_dataset,
154 |         batch_size=config.TRAIN.BATCH_SIZE*len(gpus),
155 |         shuffle=config.TRAIN.SHUFFLE,
156 |         num_workers=config.WORKERS,
157 |         pin_memory=True
158 |     )
159 |     valid_loader = torch.utils.data.DataLoader(
160 |         valid_dataset,
161 |         batch_size=config.TEST.BATCH_SIZE*len(gpus),
162 |         shuffle=False,
163 |         num_workers=config.WORKERS,
164 |         pin_memory=True
165 |     )
166 | 
167 |     best_perf = 0.0
168 |     best_model = False
169 |     for epoch in range(config.TRAIN.BEGIN_EPOCH, config.TRAIN.END_EPOCH):
170 |         lr_scheduler.step()
171 | 
172 |         # train for one epoch
173 |         train(config, train_loader, model, criterion, optimizer, epoch,
174 |               final_output_dir, tb_log_dir, writer_dict)
175 | 
176 | 
177 |         # evaluate on validation set
178 |         perf_indicator = validate(config, valid_loader, valid_dataset, model,
179 |                                   criterion, final_output_dir, tb_log_dir,
180 |                                   writer_dict)
181 | 
182 |         if perf_indicator > best_perf:
183 |             best_perf = perf_indicator
184 |             best_model = True
185 |         else:
186 |             best_model = False
187 | 
188 |         logger.info('=> saving checkpoint to {}'.format(final_output_dir))
189 |         save_checkpoint({
190 |             'epoch': epoch + 1,
191 |             'model': get_model_name(config),
192 |             'state_dict': model.state_dict(),
193 |             'perf': perf_indicator,
194 |             'optimizer': optimizer.state_dict(),
195 |         }, best_model, final_output_dir)
196 | 
197 |     final_model_state_file = os.path.join(final_output_dir,
198 |                                           'final_state.pth.tar')
199 |     logger.info('saving final model state to {}'.format(
200 |         final_model_state_file))
201 |     torch.save(model.module.state_dict(), final_model_state_file)
202 |     writer_dict['writer'].close()
203 | 
204 | 
205 | if __name__ == '__main__':
206 |     main()
207 | 


--------------------------------------------------------------------------------
/pose_estimation/valid.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | import argparse
 13 | import os
 14 | import pprint
 15 | 
 16 | import torch
 17 | import torch.nn.parallel
 18 | import torch.backends.cudnn as cudnn
 19 | import torch.optim
 20 | import torch.utils.data
 21 | import torch.utils.data.distributed
 22 | import torchvision.transforms as transforms
 23 | 
 24 | import _init_paths
 25 | from core.config import config
 26 | from core.config import update_config
 27 | from core.config import update_dir
 28 | from core.loss import JointsMSELoss
 29 | from core.function import validate
 30 | from utils.utils import create_logger
 31 | 
 32 | import dataset
 33 | import models
 34 | 
 35 | 
 36 | def parse_args():
 37 |     parser = argparse.ArgumentParser(description='Train keypoints network')
 38 |     # general
 39 |     parser.add_argument('--cfg',
 40 |                         help='experiment configure file name',
 41 |                         required=True,
 42 |                         type=str)
 43 | 
 44 |     args, rest = parser.parse_known_args()
 45 |     # update config
 46 |     update_config(args.cfg)
 47 | 
 48 |     # training
 49 |     parser.add_argument('--frequent',
 50 |                         help='frequency of logging',
 51 |                         default=config.PRINT_FREQ,
 52 |                         type=int)
 53 |     parser.add_argument('--gpus',
 54 |                         help='gpus',
 55 |                         type=str)
 56 |     parser.add_argument('--workers',
 57 |                         help='num of dataloader workers',
 58 |                         type=int)
 59 |     parser.add_argument('--model-file',
 60 |                         help='model state file',
 61 |                         type=str)
 62 |     parser.add_argument('--use-detect-bbox',
 63 |                         help='use detect bbox',
 64 |                         action='store_true')
 65 |     parser.add_argument('--flip-test',
 66 |                         help='use flip test',
 67 |                         action='store_true')
 68 |     parser.add_argument('--post-process',
 69 |                         help='use post process',
 70 |                         action='store_true')
 71 |     parser.add_argument('--shift-heatmap',
 72 |                         help='shift heatmap',
 73 |                         action='store_true')
 74 |     parser.add_argument('--coco-bbox-file',
 75 |                         help='coco detection bbox file',
 76 |                         type=str)
 77 | 
 78 |     args = parser.parse_args()
 79 | 
 80 |     return args
 81 | 
 82 | 
 83 | def reset_config(config, args):
 84 |     if args.gpus:
 85 |         config.GPUS = args.gpus
 86 |     if args.workers:
 87 |         config.WORKERS = args.workers
 88 |     if args.use_detect_bbox:
 89 |         config.TEST.USE_GT_BBOX = not args.use_detect_bbox
 90 |     if args.flip_test:
 91 |         config.TEST.FLIP_TEST = args.flip_test
 92 |     if args.post_process:
 93 |         config.TEST.POST_PROCESS = args.post_process
 94 |     if args.shift_heatmap:
 95 |         config.TEST.SHIFT_HEATMAP = args.shift_heatmap
 96 |     if args.model_file:
 97 |         config.TEST.MODEL_FILE = args.model_file
 98 |     if args.coco_bbox_file:
 99 |         config.TEST.COCO_BBOX_FILE = args.coco_bbox_file
100 | 
101 | 
102 | def main():
103 |     args = parse_args()
104 |     reset_config(config, args)
105 | 
106 |     logger, final_output_dir, tb_log_dir = create_logger(
107 |         config, args.cfg, 'valid')
108 | 
109 |     logger.info(pprint.pformat(args))
110 |     logger.info(pprint.pformat(config))
111 | 
112 |     # cudnn related setting
113 |     cudnn.benchmark = config.CUDNN.BENCHMARK
114 |     torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
115 |     torch.backends.cudnn.enabled = config.CUDNN.ENABLED
116 | 
117 |     model = eval('models.'+config.MODEL.NAME+'.get_pose_net')(
118 |         config, is_train=False
119 |     )
120 | 
121 |     if config.TEST.MODEL_FILE:
122 |         logger.info('=> loading model from {}'.format(config.TEST.MODEL_FILE))
123 |         model.load_state_dict(torch.load(config.TEST.MODEL_FILE))
124 |     else:
125 |         model_state_file = os.path.join(final_output_dir,
126 |                                         'final_state.pth.tar')
127 |         logger.info('=> loading model from {}'.format(model_state_file))
128 |         model.load_state_dict(torch.load(model_state_file))
129 | 
130 |     gpus = [int(i) for i in config.GPUS.split(',')]
131 |     model = torch.nn.DataParallel(model, device_ids=gpus).cuda()
132 | 
133 |     # define loss function (criterion) and optimizer
134 |     criterion = JointsMSELoss(
135 |         use_target_weight=config.LOSS.USE_TARGET_WEIGHT
136 |     ).cuda()
137 | 
138 |     # Data loading code
139 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
140 |                                      std=[0.229, 0.224, 0.225])
141 |     valid_dataset = eval('dataset.'+config.DATASET.DATASET)(
142 |         config,
143 |         config.DATASET.ROOT,
144 |         config.DATASET.TEST_SET,
145 |         False,
146 |         transforms.Compose([
147 |             transforms.ToTensor(),
148 |             normalize,
149 |         ])
150 |     )
151 |     valid_loader = torch.utils.data.DataLoader(
152 |         valid_dataset,
153 |         batch_size=config.TEST.BATCH_SIZE*len(gpus),
154 |         shuffle=False,
155 |         num_workers=config.WORKERS,
156 |         pin_memory=True
157 |     )
158 | 
159 |     # evaluate on validation set
160 |     validate(config, valid_loader, valid_dataset, model, criterion,
161 |              final_output_dir, tb_log_dir)
162 | 
163 | 
164 | if __name__ == '__main__':
165 |     main()
166 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | EasyDict==1.7
 2 | opencv-python==3.4.1.15
 3 | Cython
 4 | scipy
 5 | pandas
 6 | pyyaml
 7 | json_tricks
 8 | scikit-image
 9 | tensorboardX>=1.2
10 | torchvision
11 | 


--------------------------------------------------------------------------------