├── .gitignore ├── LICENSE ├── README.md ├── _config.yml ├── demo ├── .gitignore ├── Dockerfile ├── README.md ├── _init_paths.py ├── build-docker.sh ├── demo.py ├── hrnet-demo.gif ├── inference-config.yaml ├── inference.py ├── inference_1.jpg ├── inference_3.jpg ├── inference_5.jpg ├── inference_6.jpg └── inference_7.jpg ├── experiments ├── coco │ ├── hrnet │ │ ├── w32_256x192_adam_lr1e-3.yaml │ │ ├── w32_384x288_adam_lr1e-3.yaml │ │ ├── w48_256x192_adam_lr1e-3.yaml │ │ └── w48_384x288_adam_lr1e-3.yaml │ └── resnet │ │ ├── res101_256x192_d256x3_adam_lr1e-3.yaml │ │ ├── res101_384x288_d256x3_adam_lr1e-3.yaml │ │ ├── res152_256x192_d256x3_adam_lr1e-3.yaml │ │ ├── res152_384x288_d256x3_adam_lr1e-3.yaml │ │ ├── res50_256x192_d256x3_adam_lr1e-3.yaml │ │ └── res50_384x288_d256x3_adam_lr1e-3.yaml └── mpii │ ├── hrnet │ ├── w32_256x256_adam_lr1e-3.yaml │ └── w48_256x256_adam_lr1e-3.yaml │ └── resnet │ ├── res101_256x256_d256x3_adam_lr1e-3.yaml │ ├── res152_256x256_d256x3_adam_lr1e-3.yaml │ └── res50_256x256_d256x3_adam_lr1e-3.yaml ├── figures ├── hrnet.png └── visualization │ └── coco │ ├── score_610_id_2685_000000002685.png │ ├── score_710_id_153229_000000153229.png │ ├── score_755_id_343561_000000343561.png │ ├── score_755_id_559842_000000559842.png │ ├── score_770_id_6954_000000006954.png │ └── score_919_id_53626_000000053626.png ├── lib ├── Makefile ├── config │ ├── __init__.py │ ├── default.py │ └── models.py ├── core │ ├── evaluate.py │ ├── function.py │ ├── inference.py │ └── loss.py ├── dataset │ ├── JointsDataset.py │ ├── __init__.py │ ├── coco.py │ └── mpii.py ├── models │ ├── __init__.py │ ├── pose_hrnet.py │ └── pose_resnet.py ├── nms │ ├── __init__.py │ ├── cpu_nms.pyx │ ├── gpu_nms.cu │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── nms.py │ ├── nms_kernel.cu │ └── setup_linux.py └── utils │ ├── __init__.py │ ├── transforms.py │ ├── utils.py │ ├── vis.py │ └── zipreader.py ├── requirements.txt ├── tools ├── _init_paths.py ├── test.py └── train.py └── visualization └── plot_coco.py /.gitignore: -------------------------------------------------------------------------------- 1 | # IntelliJ project files 2 | .idea 3 | *.iml 4 | out 5 | gen 6 | 7 | ### Vim template 8 | [._]*.s[a-w][a-z] 9 | [._]s[a-w][a-z] 10 | *.un~ 11 | Session.vim 12 | .netrwhist 13 | *~ 14 | 15 | ### IPythonNotebook template 16 | # Temporary data 17 | .ipynb_checkpoints/ 18 | 19 | ### Python template 20 | # Byte-compiled / optimized / DLL files 21 | __pycache__/ 22 | *.py[cod] 23 | *$py.class 24 | 25 | # C extensions 26 | *.so 27 | 28 | # Distribution / packaging 29 | .Python 30 | env/ 31 | build/ 32 | develop-eggs/ 33 | dist/ 34 | downloads/ 35 | eggs/ 36 | .eggs/ 37 | #lib/ 38 | #lib64/ 39 | parts/ 40 | sdist/ 41 | var/ 42 | *.egg-info/ 43 | .installed.cfg 44 | *.egg 45 | 46 | # PyInstaller 47 | # Usually these files are written by a python script from a template 48 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 49 | *.manifest 50 | *.spec 51 | 52 | # Installer logs 53 | pip-log.txt 54 | pip-delete-this-directory.txt 55 | 56 | # Unit test / coverage reports 57 | htmlcov/ 58 | .tox/ 59 | .coverage 60 | .coverage.* 61 | .cache 62 | nosetests.xml 63 | coverage.xml 64 | *,cover 65 | 66 | # Translations 67 | *.mo 68 | *.pot 69 | 70 | # Django stuff: 71 | *.log 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | *.ipynb 80 | *.params 81 | *.json 82 | .vscode/ 83 | 84 | lib/pycocotools/_mask.c 85 | lib/nms/cpu_nms.c 86 | 87 | output/* 88 | models/* 89 | log/* 90 | data/* 91 | external/ 92 | 93 | draws/ 94 | plot/ 95 | 96 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Leo Xiao 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep High-Resolution Representation Learning for Human Pose Estimation (CVPR 2019) 2 | ## News 3 | - [2021/04/12] Welcome to check out our recent work on bottom-up pose estimation (CVPR 2021) [HRNet-DEKR](https://github.com/HRNet/DEKR)! 4 | - [2020/07/05] [A very nice blog](https://towardsdatascience.com/overview-of-human-pose-estimation-neural-networks-hrnet-higherhrnet-architectures-and-faq-1954b2f8b249) from Towards Data Science introducing HRNet and HigherHRNet for human pose estimation. 5 | - [2020/03/13] A longer version is accepted by TPAMI: [Deep High-Resolution Representation Learning for Visual Recognition](https://arxiv.org/pdf/1908.07919.pdf). It includes more HRNet applications, and the codes are available: [semantic segmentation](https://github.com/HRNet/HRNet-Semantic-Segmentation), [objection detection](https://github.com/HRNet/HRNet-Object-Detection), [facial landmark detection](https://github.com/HRNet/HRNet-Facial-Landmark-Detection), and [image classification](https://github.com/HRNet/HRNet-Image-Classification). 6 | - [2020/02/01] We have added demo code for HRNet. Thanks [Alex Simes](https://github.com/alex9311). 7 | - Visualization code for showing the pose estimation results. Thanks Depu! 8 | - [2019/08/27] HigherHRNet is now on [ArXiv](https://arxiv.org/abs/1908.10357), which is a bottom-up approach for human pose estimation powerd by HRNet. We will also release code and models at [Higher-HRNet-Human-Pose-Estimation](https://github.com/HRNet/Higher-HRNet-Human-Pose-Estimation), stay tuned! 9 | - Our new work [High-Resolution Representations for Labeling Pixels and Regions](https://arxiv.org/abs/1904.04514) is available at [HRNet](https://github.com/HRNet). Our HRNet has been applied to a wide range of vision tasks, such as [image classification](https://github.com/HRNet/HRNet-Image-Classification), [objection detection](https://github.com/HRNet/HRNet-Object-Detection), [semantic segmentation](https://github.com/HRNet/HRNet-Semantic-Segmentation) and [facial landmark](https://github.com/HRNet/HRNet-Facial-Landmark-Detection). 10 | 11 | ## Introduction 12 | This is an official pytorch implementation of [*Deep High-Resolution Representation Learning for Human Pose Estimation*](https://arxiv.org/abs/1902.09212). 13 | In this work, we are interested in the human pose estimation problem with a focus on learning reliable high-resolution representations. Most existing methods **recover high-resolution representations from low-resolution representations** produced by a high-to-low resolution network. Instead, our proposed network **maintains high-resolution representations** through the whole process. 14 | We start from a high-resolution subnetwork as the first stage, gradually add high-to-low resolution subnetworks one by one to form more stages, and connect the mutli-resolution subnetworks **in parallel**. We conduct **repeated multi-scale fusions** such that each of the high-to-low resolution representations receives information from other parallel representations over and over, leading to rich high-resolution representations. As a result, the predicted keypoint heatmap is potentially more accurate and spatially more precise. We empirically demonstrate the effectiveness of our network through the superior pose estimation results over two benchmark datasets: the COCO keypoint detection dataset and the MPII Human Pose dataset.
15 | 16 | ![Illustrating the architecture of the proposed HRNet](/figures/hrnet.png) 17 | ## Main Results 18 | ### Results on MPII val 19 | | Arch | Head | Shoulder | Elbow | Wrist | Hip | Knee | Ankle | Mean | Mean@0.1 | 20 | |--------------------|------|----------|-------|-------|------|------|-------|------|----------| 21 | | pose_resnet_50 | 96.4 | 95.3 | 89.0 | 83.2 | 88.4 | 84.0 | 79.6 | 88.5 | 34.0 | 22 | | pose_resnet_101 | 96.9 | 95.9 | 89.5 | 84.4 | 88.4 | 84.5 | 80.7 | 89.1 | 34.0 | 23 | | pose_resnet_152 | 97.0 | 95.9 | 90.0 | 85.0 | 89.2 | 85.3 | 81.3 | 89.6 | 35.0 | 24 | | **pose_hrnet_w32** | 97.1 | 95.9 | 90.3 | 86.4 | 89.1 | 87.1 | 83.3 | 90.3 | 37.7 | 25 | 26 | ### Note: 27 | - Flip test is used. 28 | - Input size is 256x256 29 | - pose_resnet_[50,101,152] is our previous work of [*Simple Baselines for Human Pose Estimation and Tracking*](http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html) 30 | 31 | ### Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset 32 | | Arch | Input size | #Params | GFLOPs | AP | Ap .5 | AP .75 | AP (M) | AP (L) | AR | AR .5 | AR .75 | AR (M) | AR (L) | 33 | |--------------------|------------|---------|--------|-------|-------|--------|--------|--------|-------|-------|--------|--------|--------| 34 | | pose_resnet_50 | 256x192 | 34.0M | 8.9 | 0.704 | 0.886 | 0.783 | 0.671 | 0.772 | 0.763 | 0.929 | 0.834 | 0.721 | 0.824 | 35 | | pose_resnet_50 | 384x288 | 34.0M | 20.0 | 0.722 | 0.893 | 0.789 | 0.681 | 0.797 | 0.776 | 0.932 | 0.838 | 0.728 | 0.846 | 36 | | pose_resnet_101 | 256x192 | 53.0M | 12.4 | 0.714 | 0.893 | 0.793 | 0.681 | 0.781 | 0.771 | 0.934 | 0.840 | 0.730 | 0.832 | 37 | | pose_resnet_101 | 384x288 | 53.0M | 27.9 | 0.736 | 0.896 | 0.803 | 0.699 | 0.811 | 0.791 | 0.936 | 0.851 | 0.745 | 0.858 | 38 | | pose_resnet_152 | 256x192 | 68.6M | 15.7 | 0.720 | 0.893 | 0.798 | 0.687 | 0.789 | 0.778 | 0.934 | 0.846 | 0.736 | 0.839 | 39 | | pose_resnet_152 | 384x288 | 68.6M | 35.3 | 0.743 | 0.896 | 0.811 | 0.705 | 0.816 | 0.797 | 0.937 | 0.858 | 0.751 | 0.863 | 40 | | **pose_hrnet_w32** | 256x192 | 28.5M | 7.1 | 0.744 | 0.905 | 0.819 | 0.708 | 0.810 | 0.798 | 0.942 | 0.865 | 0.757 | 0.858 | 41 | | **pose_hrnet_w32** | 384x288 | 28.5M | 16.0 | 0.758 | 0.906 | 0.825 | 0.720 | 0.827 | 0.809 | 0.943 | 0.869 | 0.767 | 0.871 | 42 | | **pose_hrnet_w48** | 256x192 | 63.6M | 14.6 | 0.751 | 0.906 | 0.822 | 0.715 | 0.818 | 0.804 | 0.943 | 0.867 | 0.762 | 0.864 | 43 | | **pose_hrnet_w48** | 384x288 | 63.6M | 32.9 | 0.763 | 0.908 | 0.829 | 0.723 | 0.834 | 0.812 | 0.942 | 0.871 | 0.767 | 0.876 | 44 | 45 | ### Note: 46 | - Flip test is used. 47 | - Person detector has person AP of 56.4 on COCO val2017 dataset. 48 | - pose_resnet_[50,101,152] is our previous work of [*Simple Baselines for Human Pose Estimation and Tracking*](http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html). 49 | - GFLOPs is for convolution and linear layers only. 50 | 51 | 52 | ### Results on COCO test-dev2017 with detector having human AP of 60.9 on COCO test-dev2017 dataset 53 | | Arch | Input size | #Params | GFLOPs | AP | Ap .5 | AP .75 | AP (M) | AP (L) | AR | AR .5 | AR .75 | AR (M) | AR (L) | 54 | |--------------------|------------|---------|--------|-------|-------|--------|--------|--------|-------|-------|--------|--------|--------| 55 | | pose_resnet_152 | 384x288 | 68.6M | 35.3 | 0.737 | 0.919 | 0.828 | 0.713 | 0.800 | 0.790 | 0.952 | 0.856 | 0.748 | 0.849 | 56 | | **pose_hrnet_w48** | 384x288 | 63.6M | 32.9 | 0.755 | 0.925 | 0.833 | 0.719 | 0.815 | 0.805 | 0.957 | 0.874 | 0.763 | 0.863 | 57 | | **pose_hrnet_w48\*** | 384x288 | 63.6M | 32.9 | 0.770 | 0.927 | 0.845 | 0.734 | 0.831 | 0.820 | 0.960 | 0.886 | 0.778 | 0.877 | 58 | 59 | ### Note: 60 | - Flip test is used. 61 | - Person detector has person AP of 60.9 on COCO test-dev2017 dataset. 62 | - pose_resnet_152 is our previous work of [*Simple Baselines for Human Pose Estimation and Tracking*](http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html). 63 | - GFLOPs is for convolution and linear layers only. 64 | - pose_hrnet_w48\* means using additional data from [AI challenger](https://challenger.ai/dataset/keypoint) for training. 65 | 66 | ## Environment 67 | The code is developed using python 3.6 on Ubuntu 16.04. NVIDIA GPUs are needed. The code is developed and tested using 4 NVIDIA P100 GPU cards. Other platforms or GPU cards are not fully tested. 68 | 69 | ## Quick start 70 | ### Installation 71 | 1. Install pytorch >= v1.0.0 following [official instruction](https://pytorch.org/). 72 | **Note that if you use pytorch's version < v1.0.0, you should following the instruction at to disable cudnn's implementations of BatchNorm layer. We encourage you to use higher pytorch's version(>=v1.0.0)** 73 | 2. Clone this repo, and we'll call the directory that you cloned as ${POSE_ROOT}. 74 | 3. Install dependencies: 75 | ``` 76 | pip install -r requirements.txt 77 | ``` 78 | 4. Make libs: 79 | ``` 80 | cd ${POSE_ROOT}/lib 81 | make 82 | ``` 83 | 5. Install [COCOAPI](https://github.com/cocodataset/cocoapi): 84 | ``` 85 | # COCOAPI=/path/to/clone/cocoapi 86 | git clone https://github.com/cocodataset/cocoapi.git $COCOAPI 87 | cd $COCOAPI/PythonAPI 88 | # Install into global site-packages 89 | make install 90 | # Alternatively, if you do not have permissions or prefer 91 | # not to install the COCO API into global site-packages 92 | python3 setup.py install --user 93 | ``` 94 | Note that instructions like # COCOAPI=/path/to/install/cocoapi indicate that you should pick a path where you'd like to have the software cloned and then set an environment variable (COCOAPI in this case) accordingly. 95 | 4. Init output(training model output directory) and log(tensorboard log directory) directory: 96 | 97 | ``` 98 | mkdir output 99 | mkdir log 100 | ``` 101 | 102 | Your directory tree should look like this: 103 | 104 | ``` 105 | ${POSE_ROOT} 106 | ├── data 107 | ├── experiments 108 | ├── lib 109 | ├── log 110 | ├── models 111 | ├── output 112 | ├── tools 113 | ├── README.md 114 | └── requirements.txt 115 | ``` 116 | 117 | 6. Download pretrained models from our model zoo([GoogleDrive](https://drive.google.com/drive/folders/1hOTihvbyIxsm5ygDpbUuJ7O_tzv4oXjC?usp=sharing) or [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blW231MH2krnmLq5kkQ)) 118 | ``` 119 | ${POSE_ROOT} 120 | `-- models 121 | `-- pytorch 122 | |-- imagenet 123 | | |-- hrnet_w32-36af842e.pth 124 | | |-- hrnet_w48-8ef0771d.pth 125 | | |-- resnet50-19c8e357.pth 126 | | |-- resnet101-5d3b4d8f.pth 127 | | `-- resnet152-b121ed2d.pth 128 | |-- pose_coco 129 | | |-- pose_hrnet_w32_256x192.pth 130 | | |-- pose_hrnet_w32_384x288.pth 131 | | |-- pose_hrnet_w48_256x192.pth 132 | | |-- pose_hrnet_w48_384x288.pth 133 | | |-- pose_resnet_101_256x192.pth 134 | | |-- pose_resnet_101_384x288.pth 135 | | |-- pose_resnet_152_256x192.pth 136 | | |-- pose_resnet_152_384x288.pth 137 | | |-- pose_resnet_50_256x192.pth 138 | | `-- pose_resnet_50_384x288.pth 139 | `-- pose_mpii 140 | |-- pose_hrnet_w32_256x256.pth 141 | |-- pose_hrnet_w48_256x256.pth 142 | |-- pose_resnet_101_256x256.pth 143 | |-- pose_resnet_152_256x256.pth 144 | `-- pose_resnet_50_256x256.pth 145 | 146 | ``` 147 | 148 | ### Data preparation 149 | **For MPII data**, please download from [MPII Human Pose Dataset](http://human-pose.mpi-inf.mpg.de/). The original annotation files are in matlab format. We have converted them into json format, you also need to download them from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blW00SqrairNetmeVu4) or [GoogleDrive](https://drive.google.com/drive/folders/1En_VqmStnsXMdldXA6qpqEyDQulnmS3a?usp=sharing). 150 | Extract them under {POSE_ROOT}/data, and make them look like this: 151 | ``` 152 | ${POSE_ROOT} 153 | |-- data 154 | `-- |-- mpii 155 | `-- |-- annot 156 | | |-- gt_valid.mat 157 | | |-- test.json 158 | | |-- train.json 159 | | |-- trainval.json 160 | | `-- valid.json 161 | `-- images 162 | |-- 000001163.jpg 163 | |-- 000003072.jpg 164 | ``` 165 | 166 | **For COCO data**, please download from [COCO download](http://cocodataset.org/#download), 2017 Train/Val is needed for COCO keypoints training and validation. We also provide person detection result of COCO val2017 and test-dev2017 to reproduce our multi-person pose estimation results. Please download from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blWzzDXoz5BeFl8sWM-) or [GoogleDrive](https://drive.google.com/drive/folders/1fRUDNUDxe9fjqcRZ2bnF_TKMlO0nB_dk?usp=sharing). 167 | Download and extract them under {POSE_ROOT}/data, and make them look like this: 168 | ``` 169 | ${POSE_ROOT} 170 | |-- data 171 | `-- |-- coco 172 | `-- |-- annotations 173 | | |-- person_keypoints_train2017.json 174 | | `-- person_keypoints_val2017.json 175 | |-- person_detection_results 176 | | |-- COCO_val2017_detections_AP_H_56_person.json 177 | | |-- COCO_test-dev2017_detections_AP_H_609_person.json 178 | `-- images 179 | |-- train2017 180 | | |-- 000000000009.jpg 181 | | |-- 000000000025.jpg 182 | | |-- 000000000030.jpg 183 | | |-- ... 184 | `-- val2017 185 | |-- 000000000139.jpg 186 | |-- 000000000285.jpg 187 | |-- 000000000632.jpg 188 | |-- ... 189 | ``` 190 | 191 | ### Training and Testing 192 | 193 | #### Testing on MPII dataset using model zoo's models([GoogleDrive](https://drive.google.com/drive/folders/1hOTihvbyIxsm5ygDpbUuJ7O_tzv4oXjC?usp=sharing) or [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blW231MH2krnmLq5kkQ)) 194 | 195 | 196 | ``` 197 | python tools/test.py \ 198 | --cfg experiments/mpii/hrnet/w32_256x256_adam_lr1e-3.yaml \ 199 | TEST.MODEL_FILE models/pytorch/pose_mpii/pose_hrnet_w32_256x256.pth 200 | ``` 201 | 202 | #### Training on MPII dataset 203 | 204 | ``` 205 | python tools/train.py \ 206 | --cfg experiments/mpii/hrnet/w32_256x256_adam_lr1e-3.yaml 207 | ``` 208 | 209 | #### Testing on COCO val2017 dataset using model zoo's models([GoogleDrive](https://drive.google.com/drive/folders/1hOTihvbyIxsm5ygDpbUuJ7O_tzv4oXjC?usp=sharing) or [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blW231MH2krnmLq5kkQ)) 210 | 211 | 212 | ``` 213 | python tools/test.py \ 214 | --cfg experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml \ 215 | TEST.MODEL_FILE models/pytorch/pose_coco/pose_hrnet_w32_256x192.pth \ 216 | TEST.USE_GT_BBOX False 217 | ``` 218 | 219 | #### Training on COCO train2017 dataset 220 | 221 | ``` 222 | python tools/train.py \ 223 | --cfg experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml \ 224 | ``` 225 | 226 | ### Visualization 227 | 228 | #### Visualizing predictions on COCO val 229 | 230 | ``` 231 | python visualization/plot_coco.py \ 232 | --prediction output/coco/w48_384x288_adam_lr1e-3/results/keypoints_val2017_results_0.json \ 233 | --save-path visualization/results 234 | 235 | ``` 236 | 237 | 238 | 239 | 240 | 241 | 242 | ### Other applications 243 | Many other dense prediction tasks, such as segmentation, face alignment and object detection, etc. have been benefited by HRNet. More information can be found at [High-Resolution Networks](https://github.com/HRNet). 244 | 245 | ### Other implementation 246 | [mmpose](https://github.com/open-mmlab/mmpose)
247 | [ModelScope (中文)](https://modelscope.cn/models/damo/cv_hrnetv2w32_body-2d-keypoints_image/summary)
248 | [timm](https://huggingface.co/docs/timm/main/en/models/hrnet) 249 | 250 | 251 | ### Citation 252 | If you use our code or models in your research, please cite with: 253 | ``` 254 | @inproceedings{sun2019deep, 255 | title={Deep High-Resolution Representation Learning for Human Pose Estimation}, 256 | author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}, 257 | booktitle={CVPR}, 258 | year={2019} 259 | } 260 | 261 | @inproceedings{xiao2018simple, 262 | author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, 263 | title={Simple Baselines for Human Pose Estimation and Tracking}, 264 | booktitle = {European Conference on Computer Vision (ECCV)}, 265 | year = {2018} 266 | } 267 | 268 | @article{WangSCJDZLMTWLX19, 269 | title={Deep High-Resolution Representation Learning for Visual Recognition}, 270 | author={Jingdong Wang and Ke Sun and Tianheng Cheng and 271 | Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and 272 | Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao}, 273 | journal = {TPAMI} 274 | year={2019} 275 | } 276 | ``` 277 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /demo/.gitignore: -------------------------------------------------------------------------------- 1 | output 2 | models 3 | videos 4 | -------------------------------------------------------------------------------- /demo/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu16.04 2 | 3 | ENV OPENCV_VERSION="3.4.6" 4 | 5 | # Basic toolchain 6 | RUN apt-get update && apt-get install -y \ 7 | apt-utils \ 8 | build-essential \ 9 | git \ 10 | wget \ 11 | unzip \ 12 | yasm \ 13 | pkg-config \ 14 | libcurl4-openssl-dev \ 15 | zlib1g-dev \ 16 | htop \ 17 | cmake \ 18 | nano \ 19 | python3-pip \ 20 | python3-dev \ 21 | python3-tk \ 22 | libx264-dev \ 23 | && cd /usr/local/bin \ 24 | && ln -s /usr/bin/python3 python \ 25 | && pip3 install --upgrade pip \ 26 | && apt-get autoremove -y 27 | 28 | # Getting OpenCV dependencies available with apt 29 | RUN apt-get update && apt-get install -y \ 30 | libeigen3-dev \ 31 | libjpeg-dev \ 32 | libpng-dev \ 33 | libtiff-dev \ 34 | libjasper-dev \ 35 | libswscale-dev \ 36 | libavcodec-dev \ 37 | libavformat-dev && \ 38 | apt-get autoremove -y 39 | 40 | # Getting other dependencies 41 | RUN apt-get update && apt-get install -y \ 42 | cppcheck \ 43 | graphviz \ 44 | doxygen \ 45 | p7zip-full \ 46 | libdlib18 \ 47 | libdlib-dev && \ 48 | apt-get autoremove -y 49 | 50 | 51 | # Install OpenCV + OpenCV contrib (takes forever) 52 | RUN mkdir -p /tmp && \ 53 | cd /tmp && \ 54 | wget --no-check-certificate -O opencv.zip https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.zip && \ 55 | wget --no-check-certificate -O opencv_contrib.zip https://github.com/opencv/opencv_contrib/archive/${OPENCV_VERSION}.zip && \ 56 | unzip opencv.zip && \ 57 | unzip opencv_contrib.zip && \ 58 | mkdir opencv-${OPENCV_VERSION}/build && \ 59 | cd opencv-${OPENCV_VERSION}/build && \ 60 | cmake -D CMAKE_BUILD_TYPE=RELEASE \ 61 | -D CMAKE_INSTALL_PREFIX=/usr/local \ 62 | -D WITH_CUDA=ON \ 63 | -D CUDA_FAST_MATH=1 \ 64 | -D WITH_CUBLAS=1 \ 65 | -D WITH_FFMPEG=ON \ 66 | -D WITH_OPENCL=ON \ 67 | -D WITH_V4L=ON \ 68 | -D WITH_OPENGL=ON \ 69 | -D OPENCV_EXTRA_MODULES_PATH=/tmp/opencv_contrib-${OPENCV_VERSION}/modules \ 70 | .. && \ 71 | make -j$(nproc) && \ 72 | make install && \ 73 | echo "/usr/local/lib" > /etc/ld.so.conf.d/opencv.conf && \ 74 | ldconfig && \ 75 | cd /tmp && \ 76 | rm -rf opencv-${OPENCV_VERSION} opencv.zip opencv_contrib-${OPENCV_VERSION} opencv_contrib.zip && \ 77 | cd / 78 | 79 | # Compile and install ffmpeg from source 80 | RUN git clone https://github.com/FFmpeg/FFmpeg /root/ffmpeg && \ 81 | cd /root/ffmpeg && \ 82 | ./configure --enable-gpl --enable-libx264 --enable-nonfree --disable-shared --extra-cflags=-I/usr/local/include && \ 83 | make -j8 && make install -j8 84 | 85 | # clone deep-high-resolution-net 86 | ARG POSE_ROOT=/pose_root 87 | RUN git clone https://github.com/leoxiaobin/deep-high-resolution-net.pytorch.git $POSE_ROOT 88 | WORKDIR $POSE_ROOT 89 | RUN mkdir output && mkdir log 90 | 91 | RUN pip3 install -r requirements.txt && \ 92 | pip3 install torch==1.1.0 \ 93 | torchvision==0.3.0 \ 94 | opencv-python \ 95 | pillow==6.2.1 96 | 97 | # build deep-high-resolution-net lib 98 | WORKDIR $POSE_ROOT/lib 99 | RUN make 100 | 101 | # install COCO API 102 | ARG COCOAPI=/cocoapi 103 | RUN git clone https://github.com/cocodataset/cocoapi.git $COCOAPI 104 | WORKDIR $COCOAPI/PythonAPI 105 | # Install into global site-packages 106 | RUN make install 107 | 108 | # download fastrrnn pretrained model for person detection 109 | RUN python -c "import torchvision; model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True); model.eval()" 110 | 111 | COPY inference.py $POSE_ROOT/tools 112 | COPY inference-config.yaml $POSE_ROOT/ 113 | -------------------------------------------------------------------------------- /demo/README.md: -------------------------------------------------------------------------------- 1 | # Inference hrnet 2 | 3 | Inferencing the deep-high-resolution-net.pytoch without using Docker. 4 | 5 | ## Prep 6 | 1. Download the researchers' pretrained pose estimator from [google drive](https://drive.google.com/drive/folders/1hOTihvbyIxsm5ygDpbUuJ7O_tzv4oXjC?usp=sharing) to this directory under `models/` 7 | 2. Put the video file you'd like to infer on in this directory under `videos` 8 | 3. (OPTIONAL) build the docker container in this directory with `./build-docker.sh` (this can take time because it involves compiling opencv) 9 | 4. update the `inference-config.yaml` file to reflect the number of GPUs you have available and which trained model you want to use. 10 | 11 | ## Running the Model 12 | ### 1. Running on the video 13 | ``` 14 | python demo/inference.py --cfg demo/inference-config.yaml \ 15 | --videoFile ../../multi_people.mp4 \ 16 | --writeBoxFrames \ 17 | --outputDir output \ 18 | TEST.MODEL_FILE ../models/pytorch/pose_coco/pose_hrnet_w32_256x192.pth 19 | 20 | ``` 21 | 22 | The above command will create a video under *output* directory and a lot of pose image under *output/pose* directory. 23 | Even with usage of GPU (GTX1080 in my case), the person detection will take nearly **0.06 sec**, the person pose match will 24 | take nearly **0.07 sec**. In total. inference time per frame will be **0.13 sec**, nearly 10fps. So if you prefer a real-time (fps >= 20) 25 | pose estimation then you should try other approach. 26 | 27 | **===Result===** 28 | 29 | Some output images are as: 30 | 31 | ![1 person](inference_1.jpg) 32 | Fig: 1 person inference 33 | 34 | ![3 person](inference_3.jpg) 35 | Fig: 3 person inference 36 | 37 | ![3 person](inference_5.jpg) 38 | Fig: 3 person inference 39 | 40 | ### 2. Demo with more common functions 41 | Remember to update` TEST.MODEL_FILE` in `demo/inference-config.yaml `according to your model path. 42 | 43 | `demo.py` provides the following functions: 44 | 45 | - use `--webcam` when the input is a real-time camera. 46 | - use `--video [video-path]` when the input is a video. 47 | - use `--image [image-path]` when the input is an image. 48 | - use `--write` to save the image, camera or video result. 49 | - use `--showFps` to show the fps (this fps includes the detection part). 50 | - draw connections between joints. 51 | 52 | #### (1) the input is a real-time carema 53 | ```python 54 | python demo/demo.py --webcam --showFps --write 55 | ``` 56 | 57 | #### (2) the input is a video 58 | ```python 59 | python demo/demo.py --video test.mp4 --showFps --write 60 | ``` 61 | #### (3) the input is a image 62 | 63 | ```python 64 | python demo/demo.py --image test.jpg --showFps --write 65 | ``` 66 | 67 | **===Result===** 68 | 69 | ![show_fps](inference_6.jpg) 70 | 71 | Fig: show fps 72 | 73 | ![multi-people](inference_7.jpg) 74 | 75 | Fig: multi-people -------------------------------------------------------------------------------- /demo/_init_paths.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # pose.pytorch 3 | # Copyright (c) 2018-present Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import os.path as osp 13 | import sys 14 | 15 | 16 | def add_path(path): 17 | if path not in sys.path: 18 | sys.path.insert(0, path) 19 | 20 | 21 | this_dir = osp.dirname(__file__) 22 | 23 | lib_path = osp.join(this_dir, '..', 'lib') 24 | add_path(lib_path) 25 | 26 | mm_path = osp.join(this_dir, '..', 'lib/poseeval/py-motmetrics') 27 | add_path(mm_path) 28 | -------------------------------------------------------------------------------- /demo/build-docker.sh: -------------------------------------------------------------------------------- 1 | docker build -t hrnet_demo_inference . 2 | -------------------------------------------------------------------------------- /demo/demo.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import argparse 6 | import csv 7 | import os 8 | import shutil 9 | 10 | from PIL import Image 11 | import torch 12 | import torch.nn.parallel 13 | import torch.backends.cudnn as cudnn 14 | import torch.optim 15 | import torch.utils.data 16 | import torch.utils.data.distributed 17 | import torchvision.transforms as transforms 18 | import torchvision 19 | import cv2 20 | import numpy as np 21 | import time 22 | 23 | 24 | import _init_paths 25 | import models 26 | from config import cfg 27 | from config import update_config 28 | from core.function import get_final_preds 29 | from utils.transforms import get_affine_transform 30 | 31 | COCO_KEYPOINT_INDEXES = { 32 | 0: 'nose', 33 | 1: 'left_eye', 34 | 2: 'right_eye', 35 | 3: 'left_ear', 36 | 4: 'right_ear', 37 | 5: 'left_shoulder', 38 | 6: 'right_shoulder', 39 | 7: 'left_elbow', 40 | 8: 'right_elbow', 41 | 9: 'left_wrist', 42 | 10: 'right_wrist', 43 | 11: 'left_hip', 44 | 12: 'right_hip', 45 | 13: 'left_knee', 46 | 14: 'right_knee', 47 | 15: 'left_ankle', 48 | 16: 'right_ankle' 49 | } 50 | 51 | COCO_INSTANCE_CATEGORY_NAMES = [ 52 | '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 53 | 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign', 54 | 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 55 | 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A', 56 | 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 57 | 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 58 | 'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 59 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 60 | 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', 61 | 'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 62 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', 63 | 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' 64 | ] 65 | 66 | SKELETON = [ 67 | [1,3],[1,0],[2,4],[2,0],[0,5],[0,6],[5,7],[7,9],[6,8],[8,10],[5,11],[6,12],[11,12],[11,13],[13,15],[12,14],[14,16] 68 | ] 69 | 70 | CocoColors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], 71 | [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], 72 | [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]] 73 | 74 | NUM_KPTS = 17 75 | 76 | CTX = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 77 | 78 | def draw_pose(keypoints,img): 79 | """draw the keypoints and the skeletons. 80 | :params keypoints: the shape should be equal to [17,2] 81 | :params img: 82 | """ 83 | assert keypoints.shape == (NUM_KPTS,2) 84 | for i in range(len(SKELETON)): 85 | kpt_a, kpt_b = SKELETON[i][0], SKELETON[i][1] 86 | x_a, y_a = keypoints[kpt_a][0],keypoints[kpt_a][1] 87 | x_b, y_b = keypoints[kpt_b][0],keypoints[kpt_b][1] 88 | cv2.circle(img, (int(x_a), int(y_a)), 6, CocoColors[i], -1) 89 | cv2.circle(img, (int(x_b), int(y_b)), 6, CocoColors[i], -1) 90 | cv2.line(img, (int(x_a), int(y_a)), (int(x_b), int(y_b)), CocoColors[i], 2) 91 | 92 | def draw_bbox(box,img): 93 | """draw the detected bounding box on the image. 94 | :param img: 95 | """ 96 | cv2.rectangle(img, box[0], box[1], color=(0, 255, 0),thickness=3) 97 | 98 | 99 | def get_person_detection_boxes(model, img, threshold=0.5): 100 | pred = model(img) 101 | pred_classes = [COCO_INSTANCE_CATEGORY_NAMES[i] 102 | for i in list(pred[0]['labels'].cpu().numpy())] # Get the Prediction Score 103 | pred_boxes = [[(i[0], i[1]), (i[2], i[3])] 104 | for i in list(pred[0]['boxes'].detach().cpu().numpy())] # Bounding boxes 105 | pred_score = list(pred[0]['scores'].detach().cpu().numpy()) 106 | if not pred_score or max(pred_score) threshold][-1] 110 | pred_boxes = pred_boxes[:pred_t+1] 111 | pred_classes = pred_classes[:pred_t+1] 112 | 113 | person_boxes = [] 114 | for idx, box in enumerate(pred_boxes): 115 | if pred_classes[idx] == 'person': 116 | person_boxes.append(box) 117 | 118 | return person_boxes 119 | 120 | 121 | def get_pose_estimation_prediction(pose_model, image, center, scale): 122 | rotation = 0 123 | 124 | # pose estimation transformation 125 | trans = get_affine_transform(center, scale, rotation, cfg.MODEL.IMAGE_SIZE) 126 | model_input = cv2.warpAffine( 127 | image, 128 | trans, 129 | (int(cfg.MODEL.IMAGE_SIZE[0]), int(cfg.MODEL.IMAGE_SIZE[1])), 130 | flags=cv2.INTER_LINEAR) 131 | transform = transforms.Compose([ 132 | transforms.ToTensor(), 133 | transforms.Normalize(mean=[0.485, 0.456, 0.406], 134 | std=[0.229, 0.224, 0.225]), 135 | ]) 136 | 137 | # pose estimation inference 138 | model_input = transform(model_input).unsqueeze(0) 139 | # switch to evaluate mode 140 | pose_model.eval() 141 | with torch.no_grad(): 142 | # compute output heatmap 143 | output = pose_model(model_input) 144 | preds, _ = get_final_preds( 145 | cfg, 146 | output.clone().cpu().numpy(), 147 | np.asarray([center]), 148 | np.asarray([scale])) 149 | 150 | return preds 151 | 152 | 153 | def box_to_center_scale(box, model_image_width, model_image_height): 154 | """convert a box to center,scale information required for pose transformation 155 | Parameters 156 | ---------- 157 | box : list of tuple 158 | list of length 2 with two tuples of floats representing 159 | bottom left and top right corner of a box 160 | model_image_width : int 161 | model_image_height : int 162 | 163 | Returns 164 | ------- 165 | (numpy array, numpy array) 166 | Two numpy arrays, coordinates for the center of the box and the scale of the box 167 | """ 168 | center = np.zeros((2), dtype=np.float32) 169 | 170 | bottom_left_corner = box[0] 171 | top_right_corner = box[1] 172 | box_width = top_right_corner[0]-bottom_left_corner[0] 173 | box_height = top_right_corner[1]-bottom_left_corner[1] 174 | bottom_left_x = bottom_left_corner[0] 175 | bottom_left_y = bottom_left_corner[1] 176 | center[0] = bottom_left_x + box_width * 0.5 177 | center[1] = bottom_left_y + box_height * 0.5 178 | 179 | aspect_ratio = model_image_width * 1.0 / model_image_height 180 | pixel_std = 200 181 | 182 | if box_width > aspect_ratio * box_height: 183 | box_height = box_width * 1.0 / aspect_ratio 184 | elif box_width < aspect_ratio * box_height: 185 | box_width = box_height * aspect_ratio 186 | scale = np.array( 187 | [box_width * 1.0 / pixel_std, box_height * 1.0 / pixel_std], 188 | dtype=np.float32) 189 | if center[0] != -1: 190 | scale = scale * 1.25 191 | 192 | return center, scale 193 | 194 | def parse_args(): 195 | parser = argparse.ArgumentParser(description='Train keypoints network') 196 | # general 197 | parser.add_argument('--cfg', type=str, default='demo/inference-config.yaml') 198 | parser.add_argument('--video', type=str) 199 | parser.add_argument('--webcam',action='store_true') 200 | parser.add_argument('--image',type=str) 201 | parser.add_argument('--write',action='store_true') 202 | parser.add_argument('--showFps',action='store_true') 203 | 204 | parser.add_argument('opts', 205 | help='Modify config options using the command-line', 206 | default=None, 207 | nargs=argparse.REMAINDER) 208 | 209 | args = parser.parse_args() 210 | 211 | # args expected by supporting codebase 212 | args.modelDir = '' 213 | args.logDir = '' 214 | args.dataDir = '' 215 | args.prevModelDir = '' 216 | return args 217 | 218 | 219 | def main(): 220 | # cudnn related setting 221 | cudnn.benchmark = cfg.CUDNN.BENCHMARK 222 | torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC 223 | torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED 224 | 225 | args = parse_args() 226 | update_config(cfg, args) 227 | 228 | box_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) 229 | box_model.to(CTX) 230 | box_model.eval() 231 | 232 | pose_model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( 233 | cfg, is_train=False 234 | ) 235 | 236 | if cfg.TEST.MODEL_FILE: 237 | print('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) 238 | pose_model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False) 239 | else: 240 | print('expected model defined in config at TEST.MODEL_FILE') 241 | 242 | pose_model = torch.nn.DataParallel(pose_model, device_ids=cfg.GPUS) 243 | pose_model.to(CTX) 244 | pose_model.eval() 245 | 246 | # Loading an video or an image or webcam 247 | if args.webcam: 248 | vidcap = cv2.VideoCapture(0) 249 | elif args.video: 250 | vidcap = cv2.VideoCapture(args.video) 251 | elif args.image: 252 | image_bgr = cv2.imread(args.image) 253 | else: 254 | print('please use --video or --webcam or --image to define the input.') 255 | return 256 | 257 | if args.webcam or args.video: 258 | if args.write: 259 | save_path = 'output.avi' 260 | fourcc = cv2.VideoWriter_fourcc(*'XVID') 261 | out = cv2.VideoWriter(save_path,fourcc, 24.0, (int(vidcap.get(3)),int(vidcap.get(4)))) 262 | while True: 263 | ret, image_bgr = vidcap.read() 264 | if ret: 265 | last_time = time.time() 266 | image = image_bgr[:, :, [2, 1, 0]] 267 | 268 | input = [] 269 | img = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) 270 | img_tensor = torch.from_numpy(img/255.).permute(2,0,1).float().to(CTX) 271 | input.append(img_tensor) 272 | 273 | # object detection box 274 | pred_boxes = get_person_detection_boxes(box_model, input, threshold=0.9) 275 | 276 | # pose estimation 277 | if len(pred_boxes) >= 1: 278 | for box in pred_boxes: 279 | center, scale = box_to_center_scale(box, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[1]) 280 | image_pose = image.copy() if cfg.DATASET.COLOR_RGB else image_bgr.copy() 281 | pose_preds = get_pose_estimation_prediction(pose_model, image_pose, center, scale) 282 | if len(pose_preds)>=1: 283 | for kpt in pose_preds: 284 | draw_pose(kpt,image_bgr) # draw the poses 285 | 286 | if args.showFps: 287 | fps = 1/(time.time()-last_time) 288 | img = cv2.putText(image_bgr, 'fps: '+ "%.2f"%(fps), (25, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 2) 289 | 290 | if args.write: 291 | out.write(image_bgr) 292 | 293 | cv2.imshow('demo',image_bgr) 294 | if cv2.waitKey(1) & 0XFF==ord('q'): 295 | break 296 | else: 297 | print('cannot load the video.') 298 | break 299 | 300 | cv2.destroyAllWindows() 301 | vidcap.release() 302 | if args.write: 303 | print('video has been saved as {}'.format(save_path)) 304 | out.release() 305 | 306 | else: 307 | # estimate on the image 308 | last_time = time.time() 309 | image = image_bgr[:, :, [2, 1, 0]] 310 | 311 | input = [] 312 | img = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) 313 | img_tensor = torch.from_numpy(img/255.).permute(2,0,1).float().to(CTX) 314 | input.append(img_tensor) 315 | 316 | # object detection box 317 | pred_boxes = get_person_detection_boxes(box_model, input, threshold=0.9) 318 | 319 | # pose estimation 320 | if len(pred_boxes) >= 1: 321 | for box in pred_boxes: 322 | center, scale = box_to_center_scale(box, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[1]) 323 | image_pose = image.copy() if cfg.DATASET.COLOR_RGB else image_bgr.copy() 324 | pose_preds = get_pose_estimation_prediction(pose_model, image_pose, center, scale) 325 | if len(pose_preds)>=1: 326 | for kpt in pose_preds: 327 | draw_pose(kpt,image_bgr) # draw the poses 328 | 329 | if args.showFps: 330 | fps = 1/(time.time()-last_time) 331 | img = cv2.putText(image_bgr, 'fps: '+ "%.2f"%(fps), (25, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 2) 332 | 333 | if args.write: 334 | save_path = 'output.jpg' 335 | cv2.imwrite(save_path,image_bgr) 336 | print('the result image has been saved as {}'.format(save_path)) 337 | 338 | cv2.imshow('demo',image_bgr) 339 | if cv2.waitKey(0) & 0XFF==ord('q'): 340 | cv2.destroyAllWindows() 341 | 342 | if __name__ == '__main__': 343 | main() 344 | -------------------------------------------------------------------------------- /demo/hrnet-demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/6f69e4676ad8d43d0d61b64b1b9726f0c369e7b1/demo/hrnet-demo.gif -------------------------------------------------------------------------------- /demo/inference-config.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: 'data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 288 33 | - 384 34 | HEATMAP_SIZE: 35 | - 72 36 | - 96 37 | SIGMA: 3 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /demo/inference.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import argparse 6 | import csv 7 | import os 8 | import shutil 9 | 10 | from PIL import Image 11 | import torch 12 | import torch.nn.parallel 13 | import torch.backends.cudnn as cudnn 14 | import torch.optim 15 | import torch.utils.data 16 | import torch.utils.data.distributed 17 | import torchvision.transforms as transforms 18 | import torchvision 19 | import cv2 20 | import numpy as np 21 | 22 | import sys 23 | sys.path.append("../lib") 24 | import time 25 | 26 | # import _init_paths 27 | import models 28 | from config import cfg 29 | from config import update_config 30 | from core.inference import get_final_preds 31 | from utils.transforms import get_affine_transform 32 | 33 | CTX = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 34 | 35 | 36 | COCO_KEYPOINT_INDEXES = { 37 | 0: 'nose', 38 | 1: 'left_eye', 39 | 2: 'right_eye', 40 | 3: 'left_ear', 41 | 4: 'right_ear', 42 | 5: 'left_shoulder', 43 | 6: 'right_shoulder', 44 | 7: 'left_elbow', 45 | 8: 'right_elbow', 46 | 9: 'left_wrist', 47 | 10: 'right_wrist', 48 | 11: 'left_hip', 49 | 12: 'right_hip', 50 | 13: 'left_knee', 51 | 14: 'right_knee', 52 | 15: 'left_ankle', 53 | 16: 'right_ankle' 54 | } 55 | 56 | COCO_INSTANCE_CATEGORY_NAMES = [ 57 | '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 58 | 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign', 59 | 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 60 | 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A', 61 | 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 62 | 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 63 | 'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 64 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 65 | 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', 66 | 'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 67 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', 68 | 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' 69 | ] 70 | 71 | 72 | def get_person_detection_boxes(model, img, threshold=0.5): 73 | pil_image = Image.fromarray(img) # Load the image 74 | transform = transforms.Compose([transforms.ToTensor()]) # Defing PyTorch Transform 75 | transformed_img = transform(pil_image) # Apply the transform to the image 76 | pred = model([transformed_img.to(CTX)]) # Pass the image to the model 77 | # Use the first detected person 78 | pred_classes = [COCO_INSTANCE_CATEGORY_NAMES[i] 79 | for i in list(pred[0]['labels'].cpu().numpy())] # Get the Prediction Score 80 | pred_boxes = [[(i[0], i[1]), (i[2], i[3])] 81 | for i in list(pred[0]['boxes'].cpu().detach().numpy())] # Bounding boxes 82 | pred_scores = list(pred[0]['scores'].cpu().detach().numpy()) 83 | 84 | person_boxes = [] 85 | # Select box has score larger than threshold and is person 86 | for pred_class, pred_box, pred_score in zip(pred_classes, pred_boxes, pred_scores): 87 | if (pred_score > threshold) and (pred_class == 'person'): 88 | person_boxes.append(pred_box) 89 | 90 | return person_boxes 91 | 92 | 93 | def get_pose_estimation_prediction(pose_model, image, centers, scales, transform): 94 | rotation = 0 95 | 96 | # pose estimation transformation 97 | model_inputs = [] 98 | for center, scale in zip(centers, scales): 99 | trans = get_affine_transform(center, scale, rotation, cfg.MODEL.IMAGE_SIZE) 100 | # Crop smaller image of people 101 | model_input = cv2.warpAffine( 102 | image, 103 | trans, 104 | (int(cfg.MODEL.IMAGE_SIZE[0]), int(cfg.MODEL.IMAGE_SIZE[1])), 105 | flags=cv2.INTER_LINEAR) 106 | 107 | # hwc -> 1chw 108 | model_input = transform(model_input)#.unsqueeze(0) 109 | model_inputs.append(model_input) 110 | 111 | # n * 1chw -> nchw 112 | model_inputs = torch.stack(model_inputs) 113 | 114 | # compute output heatmap 115 | output = pose_model(model_inputs.to(CTX)) 116 | coords, _ = get_final_preds( 117 | cfg, 118 | output.cpu().detach().numpy(), 119 | np.asarray(centers), 120 | np.asarray(scales)) 121 | 122 | return coords 123 | 124 | 125 | def box_to_center_scale(box, model_image_width, model_image_height): 126 | """convert a box to center,scale information required for pose transformation 127 | Parameters 128 | ---------- 129 | box : list of tuple 130 | list of length 2 with two tuples of floats representing 131 | bottom left and top right corner of a box 132 | model_image_width : int 133 | model_image_height : int 134 | 135 | Returns 136 | ------- 137 | (numpy array, numpy array) 138 | Two numpy arrays, coordinates for the center of the box and the scale of the box 139 | """ 140 | center = np.zeros((2), dtype=np.float32) 141 | 142 | bottom_left_corner = box[0] 143 | top_right_corner = box[1] 144 | box_width = top_right_corner[0]-bottom_left_corner[0] 145 | box_height = top_right_corner[1]-bottom_left_corner[1] 146 | bottom_left_x = bottom_left_corner[0] 147 | bottom_left_y = bottom_left_corner[1] 148 | center[0] = bottom_left_x + box_width * 0.5 149 | center[1] = bottom_left_y + box_height * 0.5 150 | 151 | aspect_ratio = model_image_width * 1.0 / model_image_height 152 | pixel_std = 200 153 | 154 | if box_width > aspect_ratio * box_height: 155 | box_height = box_width * 1.0 / aspect_ratio 156 | elif box_width < aspect_ratio * box_height: 157 | box_width = box_height * aspect_ratio 158 | scale = np.array( 159 | [box_width * 1.0 / pixel_std, box_height * 1.0 / pixel_std], 160 | dtype=np.float32) 161 | if center[0] != -1: 162 | scale = scale * 1.25 163 | 164 | return center, scale 165 | 166 | 167 | def prepare_output_dirs(prefix='/output/'): 168 | pose_dir = os.path.join(prefix, "pose") 169 | if os.path.exists(pose_dir) and os.path.isdir(pose_dir): 170 | shutil.rmtree(pose_dir) 171 | os.makedirs(pose_dir, exist_ok=True) 172 | return pose_dir 173 | 174 | 175 | def parse_args(): 176 | parser = argparse.ArgumentParser(description='Train keypoints network') 177 | # general 178 | parser.add_argument('--cfg', type=str, required=True) 179 | parser.add_argument('--videoFile', type=str, required=True) 180 | parser.add_argument('--outputDir', type=str, default='/output/') 181 | parser.add_argument('--inferenceFps', type=int, default=10) 182 | parser.add_argument('--writeBoxFrames', action='store_true') 183 | 184 | parser.add_argument('opts', 185 | help='Modify config options using the command-line', 186 | default=None, 187 | nargs=argparse.REMAINDER) 188 | 189 | args = parser.parse_args() 190 | 191 | # args expected by supporting codebase 192 | args.modelDir = '' 193 | args.logDir = '' 194 | args.dataDir = '' 195 | args.prevModelDir = '' 196 | return args 197 | 198 | 199 | def main(): 200 | # transformation 201 | pose_transform = transforms.Compose([ 202 | transforms.ToTensor(), 203 | transforms.Normalize(mean=[0.485, 0.456, 0.406], 204 | std=[0.229, 0.224, 0.225]), 205 | ]) 206 | 207 | # cudnn related setting 208 | cudnn.benchmark = cfg.CUDNN.BENCHMARK 209 | torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC 210 | torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED 211 | 212 | args = parse_args() 213 | update_config(cfg, args) 214 | pose_dir = prepare_output_dirs(args.outputDir) 215 | csv_output_rows = [] 216 | 217 | box_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) 218 | box_model.to(CTX) 219 | box_model.eval() 220 | pose_model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( 221 | cfg, is_train=False 222 | ) 223 | 224 | if cfg.TEST.MODEL_FILE: 225 | print('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) 226 | pose_model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False) 227 | else: 228 | print('expected model defined in config at TEST.MODEL_FILE') 229 | 230 | pose_model.to(CTX) 231 | pose_model.eval() 232 | 233 | # Loading an video 234 | vidcap = cv2.VideoCapture(args.videoFile) 235 | fps = vidcap.get(cv2.CAP_PROP_FPS) 236 | if fps < args.inferenceFps: 237 | print('desired inference fps is '+str(args.inferenceFps)+' but video fps is '+str(fps)) 238 | exit() 239 | skip_frame_cnt = round(fps / args.inferenceFps) 240 | frame_width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH)) 241 | frame_height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 242 | outcap = cv2.VideoWriter('{}/{}_pose.avi'.format(args.outputDir, os.path.splitext(os.path.basename(args.videoFile))[0]), 243 | cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), int(skip_frame_cnt), (frame_width, frame_height)) 244 | 245 | count = 0 246 | while vidcap.isOpened(): 247 | total_now = time.time() 248 | ret, image_bgr = vidcap.read() 249 | count += 1 250 | 251 | if not ret: 252 | continue 253 | 254 | if count % skip_frame_cnt != 0: 255 | continue 256 | 257 | image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) 258 | 259 | # Clone 2 image for person detection and pose estimation 260 | if cfg.DATASET.COLOR_RGB: 261 | image_per = image_rgb.copy() 262 | image_pose = image_rgb.copy() 263 | else: 264 | image_per = image_bgr.copy() 265 | image_pose = image_bgr.copy() 266 | 267 | # Clone 1 image for debugging purpose 268 | image_debug = image_bgr.copy() 269 | 270 | # object detection box 271 | now = time.time() 272 | pred_boxes = get_person_detection_boxes(box_model, image_per, threshold=0.9) 273 | then = time.time() 274 | print("Find person bbox in: {} sec".format(then - now)) 275 | 276 | # Can not find people. Move to next frame 277 | if not pred_boxes: 278 | count += 1 279 | continue 280 | 281 | if args.writeBoxFrames: 282 | for box in pred_boxes: 283 | cv2.rectangle(image_debug, box[0], box[1], color=(0, 255, 0), 284 | thickness=3) # Draw Rectangle with the coordinates 285 | 286 | # pose estimation : for multiple people 287 | centers = [] 288 | scales = [] 289 | for box in pred_boxes: 290 | center, scale = box_to_center_scale(box, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[1]) 291 | centers.append(center) 292 | scales.append(scale) 293 | 294 | now = time.time() 295 | pose_preds = get_pose_estimation_prediction(pose_model, image_pose, centers, scales, transform=pose_transform) 296 | then = time.time() 297 | print("Find person pose in: {} sec".format(then - now)) 298 | 299 | new_csv_row = [] 300 | for coords in pose_preds: 301 | # Draw each point on image 302 | for coord in coords: 303 | x_coord, y_coord = int(coord[0]), int(coord[1]) 304 | cv2.circle(image_debug, (x_coord, y_coord), 4, (255, 0, 0), 2) 305 | new_csv_row.extend([x_coord, y_coord]) 306 | 307 | total_then = time.time() 308 | 309 | text = "{:03.2f} sec".format(total_then - total_now) 310 | cv2.putText(image_debug, text, (100, 50), cv2.FONT_HERSHEY_SIMPLEX, 311 | 1, (0, 0, 255), 2, cv2.LINE_AA) 312 | 313 | cv2.imshow("pos", image_debug) 314 | if cv2.waitKey(1) & 0xFF == ord('q'): 315 | break 316 | 317 | csv_output_rows.append(new_csv_row) 318 | img_file = os.path.join(pose_dir, 'pose_{:08d}.jpg'.format(count)) 319 | cv2.imwrite(img_file, image_debug) 320 | outcap.write(image_debug) 321 | 322 | 323 | # write csv 324 | csv_headers = ['frame'] 325 | for keypoint in COCO_KEYPOINT_INDEXES.values(): 326 | csv_headers.extend([keypoint+'_x', keypoint+'_y']) 327 | 328 | csv_output_filename = os.path.join(args.outputDir, 'pose-data.csv') 329 | with open(csv_output_filename, 'w', newline='') as csvfile: 330 | csvwriter = csv.writer(csvfile) 331 | csvwriter.writerow(csv_headers) 332 | csvwriter.writerows(csv_output_rows) 333 | 334 | vidcap.release() 335 | outcap.release() 336 | 337 | cv2.destroyAllWindows() 338 | 339 | 340 | if __name__ == '__main__': 341 | main() 342 | -------------------------------------------------------------------------------- /demo/inference_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/6f69e4676ad8d43d0d61b64b1b9726f0c369e7b1/demo/inference_1.jpg -------------------------------------------------------------------------------- /demo/inference_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/6f69e4676ad8d43d0d61b64b1b9726f0c369e7b1/demo/inference_3.jpg -------------------------------------------------------------------------------- /demo/inference_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/6f69e4676ad8d43d0d61b64b1b9726f0c369e7b1/demo/inference_5.jpg -------------------------------------------------------------------------------- /demo/inference_6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/6f69e4676ad8d43d0d61b64b1b9726f0c369e7b1/demo/inference_6.jpg -------------------------------------------------------------------------------- /demo/inference_7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/6f69e4676ad8d43d0d61b64b1b9726f0c369e7b1/demo/inference_7.jpg -------------------------------------------------------------------------------- /experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: 'data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 192 33 | - 256 34 | HEATMAP_SIZE: 35 | - 48 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/w32_384x288_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: 'data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 288 33 | - 384 34 | HEATMAP_SIZE: 35 | - 72 36 | - 96 37 | SIGMA: 3 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/w48_256x192_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: 'data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 192 33 | - 256 34 | HEATMAP_SIZE: 35 | - 48 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 48 61 | - 96 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 48 73 | - 96 74 | - 192 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 48 87 | - 96 88 | - 192 89 | - 384 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: 'data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 288 33 | - 384 34 | HEATMAP_SIZE: 35 | - 72 36 | - 96 37 | SIGMA: 3 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 48 61 | - 96 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 48 73 | - 96 74 | - 192 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 48 87 | - 96 88 | - 192 89 | - 384 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 24 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 24 111 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /experiments/coco/resnet/res101_256x192_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth' 25 | IMAGE_SIZE: 26 | - 192 27 | - 256 28 | HEATMAP_SIZE: 29 | - 48 30 | - 64 31 | SIGMA: 2 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 101 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/coco/resnet/res101_384x288_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth' 25 | IMAGE_SIZE: 26 | - 288 27 | - 384 28 | HEATMAP_SIZE: 29 | - 72 30 | - 96 31 | SIGMA: 3 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 101 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/coco/resnet/res152_256x192_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth' 25 | IMAGE_SIZE: 26 | - 192 27 | - 256 28 | HEATMAP_SIZE: 29 | - 48 30 | - 64 31 | SIGMA: 2 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 152 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/coco/resnet/res152_384x288_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth' 25 | IMAGE_SIZE: 26 | - 288 27 | - 384 28 | HEATMAP_SIZE: 29 | - 72 30 | - 96 31 | SIGMA: 3 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 152 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/coco/resnet/res50_256x192_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth' 25 | IMAGE_SIZE: 26 | - 192 27 | - 256 28 | HEATMAP_SIZE: 29 | - 48 30 | - 64 31 | SIGMA: 2 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 50 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/coco/resnet/res50_384x288_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth' 25 | IMAGE_SIZE: 26 | - 288 27 | - 384 28 | HEATMAP_SIZE: 29 | - 72 30 | - 96 31 | SIGMA: 3 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 50 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/mpii/hrnet/w32_256x256_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: 'data/mpii/' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 16 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 256 33 | - 256 34 | HEATMAP_SIZE: 35 | - 64 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | MODEL_FILE: '' 112 | FLIP_TEST: true 113 | POST_PROCESS: true 114 | SHIFT_HEATMAP: true 115 | DEBUG: 116 | DEBUG: true 117 | SAVE_BATCH_IMAGES_GT: true 118 | SAVE_BATCH_IMAGES_PRED: true 119 | SAVE_HEATMAPS_GT: true 120 | SAVE_HEATMAPS_PRED: true 121 | -------------------------------------------------------------------------------- /experiments/mpii/hrnet/w48_256x256_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: 'data/mpii/' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 16 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 256 33 | - 256 34 | HEATMAP_SIZE: 35 | - 64 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 48 61 | - 96 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 48 73 | - 96 74 | - 192 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 48 87 | - 96 88 | - 192 89 | - 384 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | MODEL_FILE: '' 112 | FLIP_TEST: true 113 | POST_PROCESS: true 114 | SHIFT_HEATMAP: true 115 | DEBUG: 116 | DEBUG: true 117 | SAVE_BATCH_IMAGES_GT: true 118 | SAVE_BATCH_IMAGES_PRED: true 119 | SAVE_HEATMAPS_GT: true 120 | SAVE_HEATMAPS_PRED: true 121 | -------------------------------------------------------------------------------- /experiments/mpii/resnet/res101_256x256_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: 'data/mpii/' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | NAME: 'pose_resnet' 27 | PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth' 28 | IMAGE_SIZE: 29 | - 256 30 | - 256 31 | HEATMAP_SIZE: 32 | - 64 33 | - 64 34 | SIGMA: 2 35 | NUM_JOINTS: 16 36 | TARGET_TYPE: 'gaussian' 37 | EXTRA: 38 | FINAL_CONV_KERNEL: 1 39 | DECONV_WITH_BIAS: false 40 | NUM_DECONV_LAYERS: 3 41 | NUM_DECONV_FILTERS: 42 | - 256 43 | - 256 44 | - 256 45 | NUM_DECONV_KERNELS: 46 | - 4 47 | - 4 48 | - 4 49 | NUM_LAYERS: 101 50 | LOSS: 51 | USE_TARGET_WEIGHT: true 52 | TRAIN: 53 | BATCH_SIZE_PER_GPU: 32 54 | SHUFFLE: true 55 | BEGIN_EPOCH: 0 56 | END_EPOCH: 140 57 | OPTIMIZER: 'adam' 58 | LR: 0.001 59 | LR_FACTOR: 0.1 60 | LR_STEP: 61 | - 90 62 | - 120 63 | WD: 0.0001 64 | GAMMA1: 0.99 65 | GAMMA2: 0.0 66 | MOMENTUM: 0.9 67 | NESTEROV: false 68 | TEST: 69 | BATCH_SIZE_PER_GPU: 32 70 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 71 | BBOX_THRE: 1.0 72 | IMAGE_THRE: 0.0 73 | IN_VIS_THRE: 0.2 74 | MODEL_FILE: '' 75 | NMS_THRE: 1.0 76 | OKS_THRE: 0.9 77 | FLIP_TEST: true 78 | POST_PROCESS: true 79 | SHIFT_HEATMAP: true 80 | USE_GT_BBOX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /experiments/mpii/resnet/res152_256x256_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: 'data/mpii/' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | NAME: 'pose_resnet' 27 | PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth' 28 | IMAGE_SIZE: 29 | - 256 30 | - 256 31 | HEATMAP_SIZE: 32 | - 64 33 | - 64 34 | SIGMA: 2 35 | NUM_JOINTS: 16 36 | TARGET_TYPE: 'gaussian' 37 | EXTRA: 38 | FINAL_CONV_KERNEL: 1 39 | DECONV_WITH_BIAS: false 40 | NUM_DECONV_LAYERS: 3 41 | NUM_DECONV_FILTERS: 42 | - 256 43 | - 256 44 | - 256 45 | NUM_DECONV_KERNELS: 46 | - 4 47 | - 4 48 | - 4 49 | NUM_LAYERS: 152 50 | LOSS: 51 | USE_TARGET_WEIGHT: true 52 | TRAIN: 53 | BATCH_SIZE_PER_GPU: 32 54 | SHUFFLE: true 55 | BEGIN_EPOCH: 0 56 | END_EPOCH: 140 57 | OPTIMIZER: 'adam' 58 | LR: 0.001 59 | LR_FACTOR: 0.1 60 | LR_STEP: 61 | - 90 62 | - 120 63 | WD: 0.0001 64 | GAMMA1: 0.99 65 | GAMMA2: 0.0 66 | MOMENTUM: 0.9 67 | NESTEROV: false 68 | TEST: 69 | BATCH_SIZE_PER_GPU: 32 70 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 71 | BBOX_THRE: 1.0 72 | IMAGE_THRE: 0.0 73 | IN_VIS_THRE: 0.2 74 | MODEL_FILE: '' 75 | NMS_THRE: 1.0 76 | OKS_THRE: 0.9 77 | FLIP_TEST: true 78 | POST_PROCESS: true 79 | SHIFT_HEATMAP: true 80 | USE_GT_BBOX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /experiments/mpii/resnet/res50_256x256_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: 'data/mpii/' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | NAME: 'pose_resnet' 27 | PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth' 28 | IMAGE_SIZE: 29 | - 256 30 | - 256 31 | HEATMAP_SIZE: 32 | - 64 33 | - 64 34 | SIGMA: 2 35 | NUM_JOINTS: 16 36 | TARGET_TYPE: 'gaussian' 37 | EXTRA: 38 | FINAL_CONV_KERNEL: 1 39 | DECONV_WITH_BIAS: false 40 | NUM_DECONV_LAYERS: 3 41 | NUM_DECONV_FILTERS: 42 | - 256 43 | - 256 44 | - 256 45 | NUM_DECONV_KERNELS: 46 | - 4 47 | - 4 48 | - 4 49 | NUM_LAYERS: 50 50 | LOSS: 51 | USE_TARGET_WEIGHT: true 52 | TRAIN: 53 | BATCH_SIZE_PER_GPU: 32 54 | SHUFFLE: true 55 | BEGIN_EPOCH: 0 56 | END_EPOCH: 140 57 | OPTIMIZER: 'adam' 58 | LR: 0.001 59 | LR_FACTOR: 0.1 60 | LR_STEP: 61 | - 90 62 | - 120 63 | WD: 0.0001 64 | GAMMA1: 0.99 65 | GAMMA2: 0.0 66 | MOMENTUM: 0.9 67 | NESTEROV: false 68 | TEST: 69 | BATCH_SIZE_PER_GPU: 32 70 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 71 | BBOX_THRE: 1.0 72 | IMAGE_THRE: 0.0 73 | IN_VIS_THRE: 0.2 74 | MODEL_FILE: '' 75 | NMS_THRE: 1.0 76 | OKS_THRE: 0.9 77 | FLIP_TEST: true 78 | POST_PROCESS: true 79 | SHIFT_HEATMAP: true 80 | USE_GT_BBOX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /figures/hrnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/6f69e4676ad8d43d0d61b64b1b9726f0c369e7b1/figures/hrnet.png -------------------------------------------------------------------------------- /figures/visualization/coco/score_610_id_2685_000000002685.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/6f69e4676ad8d43d0d61b64b1b9726f0c369e7b1/figures/visualization/coco/score_610_id_2685_000000002685.png -------------------------------------------------------------------------------- /figures/visualization/coco/score_710_id_153229_000000153229.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/6f69e4676ad8d43d0d61b64b1b9726f0c369e7b1/figures/visualization/coco/score_710_id_153229_000000153229.png -------------------------------------------------------------------------------- /figures/visualization/coco/score_755_id_343561_000000343561.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/6f69e4676ad8d43d0d61b64b1b9726f0c369e7b1/figures/visualization/coco/score_755_id_343561_000000343561.png -------------------------------------------------------------------------------- /figures/visualization/coco/score_755_id_559842_000000559842.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/6f69e4676ad8d43d0d61b64b1b9726f0c369e7b1/figures/visualization/coco/score_755_id_559842_000000559842.png -------------------------------------------------------------------------------- /figures/visualization/coco/score_770_id_6954_000000006954.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/6f69e4676ad8d43d0d61b64b1b9726f0c369e7b1/figures/visualization/coco/score_770_id_6954_000000006954.png -------------------------------------------------------------------------------- /figures/visualization/coco/score_919_id_53626_000000053626.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/6f69e4676ad8d43d0d61b64b1b9726f0c369e7b1/figures/visualization/coco/score_919_id_53626_000000053626.png -------------------------------------------------------------------------------- /lib/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | cd nms; python setup_linux.py build_ext --inplace; rm -rf build; cd ../../ 3 | clean: 4 | cd nms; rm *.so; cd ../../ 5 | -------------------------------------------------------------------------------- /lib/config/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from .default import _C as cfg 8 | from .default import update_config 9 | from .models import MODEL_EXTRAS 10 | -------------------------------------------------------------------------------- /lib/config/default.py: -------------------------------------------------------------------------------- 1 | 2 | # ------------------------------------------------------------------------------ 3 | # Copyright (c) Microsoft 4 | # Licensed under the MIT License. 5 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import os 13 | 14 | from yacs.config import CfgNode as CN 15 | 16 | 17 | _C = CN() 18 | 19 | _C.OUTPUT_DIR = '' 20 | _C.LOG_DIR = '' 21 | _C.DATA_DIR = '' 22 | _C.GPUS = (0,) 23 | _C.WORKERS = 4 24 | _C.PRINT_FREQ = 20 25 | _C.AUTO_RESUME = False 26 | _C.PIN_MEMORY = True 27 | _C.RANK = 0 28 | 29 | # Cudnn related params 30 | _C.CUDNN = CN() 31 | _C.CUDNN.BENCHMARK = True 32 | _C.CUDNN.DETERMINISTIC = False 33 | _C.CUDNN.ENABLED = True 34 | 35 | # common params for NETWORK 36 | _C.MODEL = CN() 37 | _C.MODEL.NAME = 'pose_hrnet' 38 | _C.MODEL.INIT_WEIGHTS = True 39 | _C.MODEL.PRETRAINED = '' 40 | _C.MODEL.NUM_JOINTS = 17 41 | _C.MODEL.TAG_PER_JOINT = True 42 | _C.MODEL.TARGET_TYPE = 'gaussian' 43 | _C.MODEL.IMAGE_SIZE = [256, 256] # width * height, ex: 192 * 256 44 | _C.MODEL.HEATMAP_SIZE = [64, 64] # width * height, ex: 24 * 32 45 | _C.MODEL.SIGMA = 2 46 | _C.MODEL.EXTRA = CN(new_allowed=True) 47 | 48 | _C.LOSS = CN() 49 | _C.LOSS.USE_OHKM = False 50 | _C.LOSS.TOPK = 8 51 | _C.LOSS.USE_TARGET_WEIGHT = True 52 | _C.LOSS.USE_DIFFERENT_JOINTS_WEIGHT = False 53 | 54 | # DATASET related params 55 | _C.DATASET = CN() 56 | _C.DATASET.ROOT = '' 57 | _C.DATASET.DATASET = 'mpii' 58 | _C.DATASET.TRAIN_SET = 'train' 59 | _C.DATASET.TEST_SET = 'valid' 60 | _C.DATASET.DATA_FORMAT = 'jpg' 61 | _C.DATASET.HYBRID_JOINTS_TYPE = '' 62 | _C.DATASET.SELECT_DATA = False 63 | 64 | # training data augmentation 65 | _C.DATASET.FLIP = True 66 | _C.DATASET.SCALE_FACTOR = 0.25 67 | _C.DATASET.ROT_FACTOR = 30 68 | _C.DATASET.PROB_HALF_BODY = 0.0 69 | _C.DATASET.NUM_JOINTS_HALF_BODY = 8 70 | _C.DATASET.COLOR_RGB = False 71 | 72 | # train 73 | _C.TRAIN = CN() 74 | 75 | _C.TRAIN.LR_FACTOR = 0.1 76 | _C.TRAIN.LR_STEP = [90, 110] 77 | _C.TRAIN.LR = 0.001 78 | 79 | _C.TRAIN.OPTIMIZER = 'adam' 80 | _C.TRAIN.MOMENTUM = 0.9 81 | _C.TRAIN.WD = 0.0001 82 | _C.TRAIN.NESTEROV = False 83 | _C.TRAIN.GAMMA1 = 0.99 84 | _C.TRAIN.GAMMA2 = 0.0 85 | 86 | _C.TRAIN.BEGIN_EPOCH = 0 87 | _C.TRAIN.END_EPOCH = 140 88 | 89 | _C.TRAIN.RESUME = False 90 | _C.TRAIN.CHECKPOINT = '' 91 | 92 | _C.TRAIN.BATCH_SIZE_PER_GPU = 32 93 | _C.TRAIN.SHUFFLE = True 94 | 95 | # testing 96 | _C.TEST = CN() 97 | 98 | # size of images for each device 99 | _C.TEST.BATCH_SIZE_PER_GPU = 32 100 | # Test Model Epoch 101 | _C.TEST.FLIP_TEST = False 102 | _C.TEST.POST_PROCESS = False 103 | _C.TEST.SHIFT_HEATMAP = False 104 | 105 | _C.TEST.USE_GT_BBOX = False 106 | 107 | # nms 108 | _C.TEST.IMAGE_THRE = 0.1 109 | _C.TEST.NMS_THRE = 0.6 110 | _C.TEST.SOFT_NMS = False 111 | _C.TEST.OKS_THRE = 0.5 112 | _C.TEST.IN_VIS_THRE = 0.0 113 | _C.TEST.COCO_BBOX_FILE = '' 114 | _C.TEST.BBOX_THRE = 1.0 115 | _C.TEST.MODEL_FILE = '' 116 | 117 | # debug 118 | _C.DEBUG = CN() 119 | _C.DEBUG.DEBUG = False 120 | _C.DEBUG.SAVE_BATCH_IMAGES_GT = False 121 | _C.DEBUG.SAVE_BATCH_IMAGES_PRED = False 122 | _C.DEBUG.SAVE_HEATMAPS_GT = False 123 | _C.DEBUG.SAVE_HEATMAPS_PRED = False 124 | 125 | 126 | def update_config(cfg, args): 127 | cfg.defrost() 128 | cfg.merge_from_file(args.cfg) 129 | cfg.merge_from_list(args.opts) 130 | 131 | if args.modelDir: 132 | cfg.OUTPUT_DIR = args.modelDir 133 | 134 | if args.logDir: 135 | cfg.LOG_DIR = args.logDir 136 | 137 | if args.dataDir: 138 | cfg.DATA_DIR = args.dataDir 139 | 140 | cfg.DATASET.ROOT = os.path.join( 141 | cfg.DATA_DIR, cfg.DATASET.ROOT 142 | ) 143 | 144 | cfg.MODEL.PRETRAINED = os.path.join( 145 | cfg.DATA_DIR, cfg.MODEL.PRETRAINED 146 | ) 147 | 148 | if cfg.TEST.MODEL_FILE: 149 | cfg.TEST.MODEL_FILE = os.path.join( 150 | cfg.DATA_DIR, cfg.TEST.MODEL_FILE 151 | ) 152 | 153 | cfg.freeze() 154 | 155 | 156 | if __name__ == '__main__': 157 | import sys 158 | with open(sys.argv[1], 'w') as f: 159 | print(_C, file=f) 160 | 161 | -------------------------------------------------------------------------------- /lib/config/models.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from yacs.config import CfgNode as CN 12 | 13 | 14 | # pose_resnet related params 15 | POSE_RESNET = CN() 16 | POSE_RESNET.NUM_LAYERS = 50 17 | POSE_RESNET.DECONV_WITH_BIAS = False 18 | POSE_RESNET.NUM_DECONV_LAYERS = 3 19 | POSE_RESNET.NUM_DECONV_FILTERS = [256, 256, 256] 20 | POSE_RESNET.NUM_DECONV_KERNELS = [4, 4, 4] 21 | POSE_RESNET.FINAL_CONV_KERNEL = 1 22 | POSE_RESNET.PRETRAINED_LAYERS = ['*'] 23 | 24 | # pose_multi_resoluton_net related params 25 | POSE_HIGH_RESOLUTION_NET = CN() 26 | POSE_HIGH_RESOLUTION_NET.PRETRAINED_LAYERS = ['*'] 27 | POSE_HIGH_RESOLUTION_NET.STEM_INPLANES = 64 28 | POSE_HIGH_RESOLUTION_NET.FINAL_CONV_KERNEL = 1 29 | 30 | POSE_HIGH_RESOLUTION_NET.STAGE2 = CN() 31 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_MODULES = 1 32 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BRANCHES = 2 33 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BLOCKS = [4, 4] 34 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_CHANNELS = [32, 64] 35 | POSE_HIGH_RESOLUTION_NET.STAGE2.BLOCK = 'BASIC' 36 | POSE_HIGH_RESOLUTION_NET.STAGE2.FUSE_METHOD = 'SUM' 37 | 38 | POSE_HIGH_RESOLUTION_NET.STAGE3 = CN() 39 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_MODULES = 1 40 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BRANCHES = 3 41 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BLOCKS = [4, 4, 4] 42 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_CHANNELS = [32, 64, 128] 43 | POSE_HIGH_RESOLUTION_NET.STAGE3.BLOCK = 'BASIC' 44 | POSE_HIGH_RESOLUTION_NET.STAGE3.FUSE_METHOD = 'SUM' 45 | 46 | POSE_HIGH_RESOLUTION_NET.STAGE4 = CN() 47 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_MODULES = 1 48 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BRANCHES = 4 49 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] 50 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256] 51 | POSE_HIGH_RESOLUTION_NET.STAGE4.BLOCK = 'BASIC' 52 | POSE_HIGH_RESOLUTION_NET.STAGE4.FUSE_METHOD = 'SUM' 53 | 54 | 55 | MODEL_EXTRAS = { 56 | 'pose_resnet': POSE_RESNET, 57 | 'pose_high_resolution_net': POSE_HIGH_RESOLUTION_NET, 58 | } 59 | -------------------------------------------------------------------------------- /lib/core/evaluate.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | 13 | from core.inference import get_max_preds 14 | 15 | 16 | def calc_dists(preds, target, normalize): 17 | preds = preds.astype(np.float32) 18 | target = target.astype(np.float32) 19 | dists = np.zeros((preds.shape[1], preds.shape[0])) 20 | for n in range(preds.shape[0]): 21 | for c in range(preds.shape[1]): 22 | if target[n, c, 0] > 1 and target[n, c, 1] > 1: 23 | normed_preds = preds[n, c, :] / normalize[n] 24 | normed_targets = target[n, c, :] / normalize[n] 25 | dists[c, n] = np.linalg.norm(normed_preds - normed_targets) 26 | else: 27 | dists[c, n] = -1 28 | return dists 29 | 30 | 31 | def dist_acc(dists, thr=0.5): 32 | ''' Return percentage below threshold while ignoring values with a -1 ''' 33 | dist_cal = np.not_equal(dists, -1) 34 | num_dist_cal = dist_cal.sum() 35 | if num_dist_cal > 0: 36 | return np.less(dists[dist_cal], thr).sum() * 1.0 / num_dist_cal 37 | else: 38 | return -1 39 | 40 | 41 | def accuracy(output, target, hm_type='gaussian', thr=0.5): 42 | ''' 43 | Calculate accuracy according to PCK, 44 | but uses ground truth heatmap rather than x,y locations 45 | First value to be returned is average accuracy across 'idxs', 46 | followed by individual accuracies 47 | ''' 48 | idx = list(range(output.shape[1])) 49 | norm = 1.0 50 | if hm_type == 'gaussian': 51 | pred, _ = get_max_preds(output) 52 | target, _ = get_max_preds(target) 53 | h = output.shape[2] 54 | w = output.shape[3] 55 | norm = np.ones((pred.shape[0], 2)) * np.array([h, w]) / 10 56 | dists = calc_dists(pred, target, norm) 57 | 58 | acc = np.zeros((len(idx) + 1)) 59 | avg_acc = 0 60 | cnt = 0 61 | 62 | for i in range(len(idx)): 63 | acc[i + 1] = dist_acc(dists[idx[i]]) 64 | if acc[i + 1] >= 0: 65 | avg_acc = avg_acc + acc[i + 1] 66 | cnt += 1 67 | 68 | avg_acc = avg_acc / cnt if cnt != 0 else 0 69 | if cnt != 0: 70 | acc[0] = avg_acc 71 | return acc, avg_acc, cnt, pred 72 | 73 | 74 | -------------------------------------------------------------------------------- /lib/core/function.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import time 12 | import logging 13 | import os 14 | 15 | import numpy as np 16 | import torch 17 | 18 | from core.evaluate import accuracy 19 | from core.inference import get_final_preds 20 | from utils.transforms import flip_back 21 | from utils.vis import save_debug_images 22 | 23 | 24 | logger = logging.getLogger(__name__) 25 | 26 | 27 | def train(config, train_loader, model, criterion, optimizer, epoch, 28 | output_dir, tb_log_dir, writer_dict): 29 | batch_time = AverageMeter() 30 | data_time = AverageMeter() 31 | losses = AverageMeter() 32 | acc = AverageMeter() 33 | 34 | # switch to train mode 35 | model.train() 36 | 37 | end = time.time() 38 | for i, (input, target, target_weight, meta) in enumerate(train_loader): 39 | # measure data loading time 40 | data_time.update(time.time() - end) 41 | 42 | # compute output 43 | outputs = model(input) 44 | 45 | target = target.cuda(non_blocking=True) 46 | target_weight = target_weight.cuda(non_blocking=True) 47 | 48 | if isinstance(outputs, list): 49 | loss = criterion(outputs[0], target, target_weight) 50 | for output in outputs[1:]: 51 | loss += criterion(output, target, target_weight) 52 | else: 53 | output = outputs 54 | loss = criterion(output, target, target_weight) 55 | 56 | # loss = criterion(output, target, target_weight) 57 | 58 | # compute gradient and do update step 59 | optimizer.zero_grad() 60 | loss.backward() 61 | optimizer.step() 62 | 63 | # measure accuracy and record loss 64 | losses.update(loss.item(), input.size(0)) 65 | 66 | _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(), 67 | target.detach().cpu().numpy()) 68 | acc.update(avg_acc, cnt) 69 | 70 | # measure elapsed time 71 | batch_time.update(time.time() - end) 72 | end = time.time() 73 | 74 | if i % config.PRINT_FREQ == 0: 75 | msg = 'Epoch: [{0}][{1}/{2}]\t' \ 76 | 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 77 | 'Speed {speed:.1f} samples/s\t' \ 78 | 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 79 | 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 80 | 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( 81 | epoch, i, len(train_loader), batch_time=batch_time, 82 | speed=input.size(0)/batch_time.val, 83 | data_time=data_time, loss=losses, acc=acc) 84 | logger.info(msg) 85 | 86 | writer = writer_dict['writer'] 87 | global_steps = writer_dict['train_global_steps'] 88 | writer.add_scalar('train_loss', losses.val, global_steps) 89 | writer.add_scalar('train_acc', acc.val, global_steps) 90 | writer_dict['train_global_steps'] = global_steps + 1 91 | 92 | prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) 93 | save_debug_images(config, input, meta, target, pred*4, output, 94 | prefix) 95 | 96 | 97 | def validate(config, val_loader, val_dataset, model, criterion, output_dir, 98 | tb_log_dir, writer_dict=None): 99 | batch_time = AverageMeter() 100 | losses = AverageMeter() 101 | acc = AverageMeter() 102 | 103 | # switch to evaluate mode 104 | model.eval() 105 | 106 | num_samples = len(val_dataset) 107 | all_preds = np.zeros( 108 | (num_samples, config.MODEL.NUM_JOINTS, 3), 109 | dtype=np.float32 110 | ) 111 | all_boxes = np.zeros((num_samples, 6)) 112 | image_path = [] 113 | filenames = [] 114 | imgnums = [] 115 | idx = 0 116 | with torch.no_grad(): 117 | end = time.time() 118 | for i, (input, target, target_weight, meta) in enumerate(val_loader): 119 | # compute output 120 | outputs = model(input) 121 | if isinstance(outputs, list): 122 | output = outputs[-1] 123 | else: 124 | output = outputs 125 | 126 | if config.TEST.FLIP_TEST: 127 | input_flipped = input.flip(3) 128 | outputs_flipped = model(input_flipped) 129 | 130 | if isinstance(outputs_flipped, list): 131 | output_flipped = outputs_flipped[-1] 132 | else: 133 | output_flipped = outputs_flipped 134 | 135 | output_flipped = flip_back(output_flipped.cpu().numpy(), 136 | val_dataset.flip_pairs) 137 | output_flipped = torch.from_numpy(output_flipped.copy()).cuda() 138 | 139 | 140 | # feature is not aligned, shift flipped heatmap for higher accuracy 141 | if config.TEST.SHIFT_HEATMAP: 142 | output_flipped[:, :, :, 1:] = \ 143 | output_flipped.clone()[:, :, :, 0:-1] 144 | 145 | output = (output + output_flipped) * 0.5 146 | 147 | target = target.cuda(non_blocking=True) 148 | target_weight = target_weight.cuda(non_blocking=True) 149 | 150 | loss = criterion(output, target, target_weight) 151 | 152 | num_images = input.size(0) 153 | # measure accuracy and record loss 154 | losses.update(loss.item(), num_images) 155 | _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), 156 | target.cpu().numpy()) 157 | 158 | acc.update(avg_acc, cnt) 159 | 160 | # measure elapsed time 161 | batch_time.update(time.time() - end) 162 | end = time.time() 163 | 164 | c = meta['center'].numpy() 165 | s = meta['scale'].numpy() 166 | score = meta['score'].numpy() 167 | 168 | preds, maxvals = get_final_preds( 169 | config, output.clone().cpu().numpy(), c, s) 170 | 171 | all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] 172 | all_preds[idx:idx + num_images, :, 2:3] = maxvals 173 | # double check this all_boxes parts 174 | all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] 175 | all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] 176 | all_boxes[idx:idx + num_images, 4] = np.prod(s*200, 1) 177 | all_boxes[idx:idx + num_images, 5] = score 178 | image_path.extend(meta['image']) 179 | 180 | idx += num_images 181 | 182 | if i % config.PRINT_FREQ == 0: 183 | msg = 'Test: [{0}/{1}]\t' \ 184 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 185 | 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 186 | 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( 187 | i, len(val_loader), batch_time=batch_time, 188 | loss=losses, acc=acc) 189 | logger.info(msg) 190 | 191 | prefix = '{}_{}'.format( 192 | os.path.join(output_dir, 'val'), i 193 | ) 194 | save_debug_images(config, input, meta, target, pred*4, output, 195 | prefix) 196 | 197 | name_values, perf_indicator = val_dataset.evaluate( 198 | config, all_preds, output_dir, all_boxes, image_path, 199 | filenames, imgnums 200 | ) 201 | 202 | model_name = config.MODEL.NAME 203 | if isinstance(name_values, list): 204 | for name_value in name_values: 205 | _print_name_value(name_value, model_name) 206 | else: 207 | _print_name_value(name_values, model_name) 208 | 209 | if writer_dict: 210 | writer = writer_dict['writer'] 211 | global_steps = writer_dict['valid_global_steps'] 212 | writer.add_scalar( 213 | 'valid_loss', 214 | losses.avg, 215 | global_steps 216 | ) 217 | writer.add_scalar( 218 | 'valid_acc', 219 | acc.avg, 220 | global_steps 221 | ) 222 | if isinstance(name_values, list): 223 | for name_value in name_values: 224 | writer.add_scalars( 225 | 'valid', 226 | dict(name_value), 227 | global_steps 228 | ) 229 | else: 230 | writer.add_scalars( 231 | 'valid', 232 | dict(name_values), 233 | global_steps 234 | ) 235 | writer_dict['valid_global_steps'] = global_steps + 1 236 | 237 | return perf_indicator 238 | 239 | 240 | # markdown format output 241 | def _print_name_value(name_value, full_arch_name): 242 | names = name_value.keys() 243 | values = name_value.values() 244 | num_values = len(name_value) 245 | logger.info( 246 | '| Arch ' + 247 | ' '.join(['| {}'.format(name) for name in names]) + 248 | ' |' 249 | ) 250 | logger.info('|---' * (num_values+1) + '|') 251 | 252 | if len(full_arch_name) > 15: 253 | full_arch_name = full_arch_name[:8] + '...' 254 | logger.info( 255 | '| ' + full_arch_name + ' ' + 256 | ' '.join(['| {:.3f}'.format(value) for value in values]) + 257 | ' |' 258 | ) 259 | 260 | 261 | class AverageMeter(object): 262 | """Computes and stores the average and current value""" 263 | def __init__(self): 264 | self.reset() 265 | 266 | def reset(self): 267 | self.val = 0 268 | self.avg = 0 269 | self.sum = 0 270 | self.count = 0 271 | 272 | def update(self, val, n=1): 273 | self.val = val 274 | self.sum += val * n 275 | self.count += n 276 | self.avg = self.sum / self.count if self.count != 0 else 0 277 | -------------------------------------------------------------------------------- /lib/core/inference.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import math 12 | 13 | import numpy as np 14 | 15 | from utils.transforms import transform_preds 16 | 17 | 18 | def get_max_preds(batch_heatmaps): 19 | ''' 20 | get predictions from score maps 21 | heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) 22 | ''' 23 | assert isinstance(batch_heatmaps, np.ndarray), \ 24 | 'batch_heatmaps should be numpy.ndarray' 25 | assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim' 26 | 27 | batch_size = batch_heatmaps.shape[0] 28 | num_joints = batch_heatmaps.shape[1] 29 | width = batch_heatmaps.shape[3] 30 | heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) 31 | idx = np.argmax(heatmaps_reshaped, 2) 32 | maxvals = np.amax(heatmaps_reshaped, 2) 33 | 34 | maxvals = maxvals.reshape((batch_size, num_joints, 1)) 35 | idx = idx.reshape((batch_size, num_joints, 1)) 36 | 37 | preds = np.tile(idx, (1, 1, 2)).astype(np.float32) 38 | 39 | preds[:, :, 0] = (preds[:, :, 0]) % width 40 | preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) 41 | 42 | pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) 43 | pred_mask = pred_mask.astype(np.float32) 44 | 45 | preds *= pred_mask 46 | return preds, maxvals 47 | 48 | 49 | def get_final_preds(config, batch_heatmaps, center, scale): 50 | coords, maxvals = get_max_preds(batch_heatmaps) 51 | 52 | heatmap_height = batch_heatmaps.shape[2] 53 | heatmap_width = batch_heatmaps.shape[3] 54 | 55 | # post-processing 56 | if config.TEST.POST_PROCESS: 57 | for n in range(coords.shape[0]): 58 | for p in range(coords.shape[1]): 59 | hm = batch_heatmaps[n][p] 60 | px = int(math.floor(coords[n][p][0] + 0.5)) 61 | py = int(math.floor(coords[n][p][1] + 0.5)) 62 | if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1: 63 | diff = np.array( 64 | [ 65 | hm[py][px+1] - hm[py][px-1], 66 | hm[py+1][px]-hm[py-1][px] 67 | ] 68 | ) 69 | coords[n][p] += np.sign(diff) * .25 70 | 71 | preds = coords.copy() 72 | 73 | # Transform back 74 | for i in range(coords.shape[0]): 75 | preds[i] = transform_preds( 76 | coords[i], center[i], scale[i], [heatmap_width, heatmap_height] 77 | ) 78 | 79 | return preds, maxvals 80 | -------------------------------------------------------------------------------- /lib/core/loss.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import torch 12 | import torch.nn as nn 13 | 14 | 15 | class JointsMSELoss(nn.Module): 16 | def __init__(self, use_target_weight): 17 | super(JointsMSELoss, self).__init__() 18 | self.criterion = nn.MSELoss(reduction='mean') 19 | self.use_target_weight = use_target_weight 20 | 21 | def forward(self, output, target, target_weight): 22 | batch_size = output.size(0) 23 | num_joints = output.size(1) 24 | heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1) 25 | heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1) 26 | loss = 0 27 | 28 | for idx in range(num_joints): 29 | heatmap_pred = heatmaps_pred[idx].squeeze() 30 | heatmap_gt = heatmaps_gt[idx].squeeze() 31 | if self.use_target_weight: 32 | loss += 0.5 * self.criterion( 33 | heatmap_pred.mul(target_weight[:, idx]), 34 | heatmap_gt.mul(target_weight[:, idx]) 35 | ) 36 | else: 37 | loss += 0.5 * self.criterion(heatmap_pred, heatmap_gt) 38 | 39 | return loss / num_joints 40 | 41 | 42 | class JointsOHKMMSELoss(nn.Module): 43 | def __init__(self, use_target_weight, topk=8): 44 | super(JointsOHKMMSELoss, self).__init__() 45 | self.criterion = nn.MSELoss(reduction='none') 46 | self.use_target_weight = use_target_weight 47 | self.topk = topk 48 | 49 | def ohkm(self, loss): 50 | ohkm_loss = 0. 51 | for i in range(loss.size()[0]): 52 | sub_loss = loss[i] 53 | topk_val, topk_idx = torch.topk( 54 | sub_loss, k=self.topk, dim=0, sorted=False 55 | ) 56 | tmp_loss = torch.gather(sub_loss, 0, topk_idx) 57 | ohkm_loss += torch.sum(tmp_loss) / self.topk 58 | ohkm_loss /= loss.size()[0] 59 | return ohkm_loss 60 | 61 | def forward(self, output, target, target_weight): 62 | batch_size = output.size(0) 63 | num_joints = output.size(1) 64 | heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1) 65 | heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1) 66 | 67 | loss = [] 68 | for idx in range(num_joints): 69 | heatmap_pred = heatmaps_pred[idx].squeeze() 70 | heatmap_gt = heatmaps_gt[idx].squeeze() 71 | if self.use_target_weight: 72 | loss.append(0.5 * self.criterion( 73 | heatmap_pred.mul(target_weight[:, idx]), 74 | heatmap_gt.mul(target_weight[:, idx]) 75 | )) 76 | else: 77 | loss.append( 78 | 0.5 * self.criterion(heatmap_pred, heatmap_gt) 79 | ) 80 | 81 | loss = [l.mean(dim=1).unsqueeze(dim=1) for l in loss] 82 | loss = torch.cat(loss, dim=1) 83 | 84 | return self.ohkm(loss) 85 | -------------------------------------------------------------------------------- /lib/dataset/JointsDataset.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import copy 12 | import logging 13 | import random 14 | 15 | import cv2 16 | import numpy as np 17 | import torch 18 | from torch.utils.data import Dataset 19 | 20 | from utils.transforms import get_affine_transform 21 | from utils.transforms import affine_transform 22 | from utils.transforms import fliplr_joints 23 | 24 | 25 | logger = logging.getLogger(__name__) 26 | 27 | 28 | class JointsDataset(Dataset): 29 | def __init__(self, cfg, root, image_set, is_train, transform=None): 30 | self.num_joints = 0 31 | self.pixel_std = 200 32 | self.flip_pairs = [] 33 | self.parent_ids = [] 34 | 35 | self.is_train = is_train 36 | self.root = root 37 | self.image_set = image_set 38 | 39 | self.output_path = cfg.OUTPUT_DIR 40 | self.data_format = cfg.DATASET.DATA_FORMAT 41 | 42 | self.scale_factor = cfg.DATASET.SCALE_FACTOR 43 | self.rotation_factor = cfg.DATASET.ROT_FACTOR 44 | self.flip = cfg.DATASET.FLIP 45 | self.num_joints_half_body = cfg.DATASET.NUM_JOINTS_HALF_BODY 46 | self.prob_half_body = cfg.DATASET.PROB_HALF_BODY 47 | self.color_rgb = cfg.DATASET.COLOR_RGB 48 | 49 | self.target_type = cfg.MODEL.TARGET_TYPE 50 | self.image_size = np.array(cfg.MODEL.IMAGE_SIZE) 51 | self.heatmap_size = np.array(cfg.MODEL.HEATMAP_SIZE) 52 | self.sigma = cfg.MODEL.SIGMA 53 | self.use_different_joints_weight = cfg.LOSS.USE_DIFFERENT_JOINTS_WEIGHT 54 | self.joints_weight = 1 55 | 56 | self.transform = transform 57 | self.db = [] 58 | 59 | def _get_db(self): 60 | raise NotImplementedError 61 | 62 | def evaluate(self, cfg, preds, output_dir, *args, **kwargs): 63 | raise NotImplementedError 64 | 65 | def half_body_transform(self, joints, joints_vis): 66 | upper_joints = [] 67 | lower_joints = [] 68 | for joint_id in range(self.num_joints): 69 | if joints_vis[joint_id][0] > 0: 70 | if joint_id in self.upper_body_ids: 71 | upper_joints.append(joints[joint_id]) 72 | else: 73 | lower_joints.append(joints[joint_id]) 74 | 75 | if np.random.randn() < 0.5 and len(upper_joints) > 2: 76 | selected_joints = upper_joints 77 | else: 78 | selected_joints = lower_joints \ 79 | if len(lower_joints) > 2 else upper_joints 80 | 81 | if len(selected_joints) < 2: 82 | return None, None 83 | 84 | selected_joints = np.array(selected_joints, dtype=np.float32) 85 | center = selected_joints.mean(axis=0)[:2] 86 | 87 | left_top = np.amin(selected_joints, axis=0) 88 | right_bottom = np.amax(selected_joints, axis=0) 89 | 90 | w = right_bottom[0] - left_top[0] 91 | h = right_bottom[1] - left_top[1] 92 | 93 | if w > self.aspect_ratio * h: 94 | h = w * 1.0 / self.aspect_ratio 95 | elif w < self.aspect_ratio * h: 96 | w = h * self.aspect_ratio 97 | 98 | scale = np.array( 99 | [ 100 | w * 1.0 / self.pixel_std, 101 | h * 1.0 / self.pixel_std 102 | ], 103 | dtype=np.float32 104 | ) 105 | 106 | scale = scale * 1.5 107 | 108 | return center, scale 109 | 110 | def __len__(self,): 111 | return len(self.db) 112 | 113 | def __getitem__(self, idx): 114 | db_rec = copy.deepcopy(self.db[idx]) 115 | 116 | image_file = db_rec['image'] 117 | filename = db_rec['filename'] if 'filename' in db_rec else '' 118 | imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' 119 | 120 | if self.data_format == 'zip': 121 | from utils import zipreader 122 | data_numpy = zipreader.imread( 123 | image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION 124 | ) 125 | else: 126 | data_numpy = cv2.imread( 127 | image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION 128 | ) 129 | 130 | if self.color_rgb: 131 | data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) 132 | 133 | if data_numpy is None: 134 | logger.error('=> fail to read {}'.format(image_file)) 135 | raise ValueError('Fail to read {}'.format(image_file)) 136 | 137 | joints = db_rec['joints_3d'] 138 | joints_vis = db_rec['joints_3d_vis'] 139 | 140 | c = db_rec['center'] 141 | s = db_rec['scale'] 142 | score = db_rec['score'] if 'score' in db_rec else 1 143 | r = 0 144 | 145 | if self.is_train: 146 | if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body 147 | and np.random.rand() < self.prob_half_body): 148 | c_half_body, s_half_body = self.half_body_transform( 149 | joints, joints_vis 150 | ) 151 | 152 | if c_half_body is not None and s_half_body is not None: 153 | c, s = c_half_body, s_half_body 154 | 155 | sf = self.scale_factor 156 | rf = self.rotation_factor 157 | s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) 158 | r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ 159 | if random.random() <= 0.6 else 0 160 | 161 | if self.flip and random.random() <= 0.5: 162 | data_numpy = data_numpy[:, ::-1, :] 163 | joints, joints_vis = fliplr_joints( 164 | joints, joints_vis, data_numpy.shape[1], self.flip_pairs) 165 | c[0] = data_numpy.shape[1] - c[0] - 1 166 | 167 | trans = get_affine_transform(c, s, r, self.image_size) 168 | input = cv2.warpAffine( 169 | data_numpy, 170 | trans, 171 | (int(self.image_size[0]), int(self.image_size[1])), 172 | flags=cv2.INTER_LINEAR) 173 | 174 | if self.transform: 175 | input = self.transform(input) 176 | 177 | for i in range(self.num_joints): 178 | if joints_vis[i, 0] > 0.0: 179 | joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) 180 | 181 | target, target_weight = self.generate_target(joints, joints_vis) 182 | 183 | target = torch.from_numpy(target) 184 | target_weight = torch.from_numpy(target_weight) 185 | 186 | meta = { 187 | 'image': image_file, 188 | 'filename': filename, 189 | 'imgnum': imgnum, 190 | 'joints': joints, 191 | 'joints_vis': joints_vis, 192 | 'center': c, 193 | 'scale': s, 194 | 'rotation': r, 195 | 'score': score 196 | } 197 | 198 | return input, target, target_weight, meta 199 | 200 | def select_data(self, db): 201 | db_selected = [] 202 | for rec in db: 203 | num_vis = 0 204 | joints_x = 0.0 205 | joints_y = 0.0 206 | for joint, joint_vis in zip( 207 | rec['joints_3d'], rec['joints_3d_vis']): 208 | if joint_vis[0] <= 0: 209 | continue 210 | num_vis += 1 211 | 212 | joints_x += joint[0] 213 | joints_y += joint[1] 214 | if num_vis == 0: 215 | continue 216 | 217 | joints_x, joints_y = joints_x / num_vis, joints_y / num_vis 218 | 219 | area = rec['scale'][0] * rec['scale'][1] * (self.pixel_std**2) 220 | joints_center = np.array([joints_x, joints_y]) 221 | bbox_center = np.array(rec['center']) 222 | diff_norm2 = np.linalg.norm((joints_center-bbox_center), 2) 223 | ks = np.exp(-1.0*(diff_norm2**2) / ((0.2)**2*2.0*area)) 224 | 225 | metric = (0.2 / 16) * num_vis + 0.45 - 0.2 / 16 226 | if ks > metric: 227 | db_selected.append(rec) 228 | 229 | logger.info('=> num db: {}'.format(len(db))) 230 | logger.info('=> num selected db: {}'.format(len(db_selected))) 231 | return db_selected 232 | 233 | def generate_target(self, joints, joints_vis): 234 | ''' 235 | :param joints: [num_joints, 3] 236 | :param joints_vis: [num_joints, 3] 237 | :return: target, target_weight(1: visible, 0: invisible) 238 | ''' 239 | target_weight = np.ones((self.num_joints, 1), dtype=np.float32) 240 | target_weight[:, 0] = joints_vis[:, 0] 241 | 242 | assert self.target_type == 'gaussian', \ 243 | 'Only support gaussian map now!' 244 | 245 | if self.target_type == 'gaussian': 246 | target = np.zeros((self.num_joints, 247 | self.heatmap_size[1], 248 | self.heatmap_size[0]), 249 | dtype=np.float32) 250 | 251 | tmp_size = self.sigma * 3 252 | 253 | for joint_id in range(self.num_joints): 254 | feat_stride = self.image_size / self.heatmap_size 255 | mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5) 256 | mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5) 257 | # Check that any part of the gaussian is in-bounds 258 | ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] 259 | br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] 260 | if ul[0] >= self.heatmap_size[0] or ul[1] >= self.heatmap_size[1] \ 261 | or br[0] < 0 or br[1] < 0: 262 | # If not, just return the image as is 263 | target_weight[joint_id] = 0 264 | continue 265 | 266 | # # Generate gaussian 267 | size = 2 * tmp_size + 1 268 | x = np.arange(0, size, 1, np.float32) 269 | y = x[:, np.newaxis] 270 | x0 = y0 = size // 2 271 | # The gaussian is not normalized, we want the center value to equal 1 272 | g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * self.sigma ** 2)) 273 | 274 | # Usable gaussian range 275 | g_x = max(0, -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0] 276 | g_y = max(0, -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1] 277 | # Image range 278 | img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0]) 279 | img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1]) 280 | 281 | v = target_weight[joint_id] 282 | if v > 0.5: 283 | target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \ 284 | g[g_y[0]:g_y[1], g_x[0]:g_x[1]] 285 | 286 | if self.use_different_joints_weight: 287 | target_weight = np.multiply(target_weight, self.joints_weight) 288 | 289 | return target, target_weight 290 | -------------------------------------------------------------------------------- /lib/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from .mpii import MPIIDataset as mpii 12 | from .coco import COCODataset as coco 13 | -------------------------------------------------------------------------------- /lib/dataset/mpii.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import logging 12 | import os 13 | import json_tricks as json 14 | from collections import OrderedDict 15 | 16 | import numpy as np 17 | from scipy.io import loadmat, savemat 18 | 19 | from dataset.JointsDataset import JointsDataset 20 | 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | 25 | class MPIIDataset(JointsDataset): 26 | def __init__(self, cfg, root, image_set, is_train, transform=None): 27 | super().__init__(cfg, root, image_set, is_train, transform) 28 | 29 | self.num_joints = 16 30 | self.flip_pairs = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]] 31 | self.parent_ids = [1, 2, 6, 6, 3, 4, 6, 6, 7, 8, 11, 12, 7, 7, 13, 14] 32 | 33 | self.upper_body_ids = (7, 8, 9, 10, 11, 12, 13, 14, 15) 34 | self.lower_body_ids = (0, 1, 2, 3, 4, 5, 6) 35 | 36 | self.db = self._get_db() 37 | 38 | if is_train and cfg.DATASET.SELECT_DATA: 39 | self.db = self.select_data(self.db) 40 | 41 | logger.info('=> load {} samples'.format(len(self.db))) 42 | 43 | def _get_db(self): 44 | # create train/val split 45 | file_name = os.path.join( 46 | self.root, 'annot', self.image_set+'.json' 47 | ) 48 | with open(file_name) as anno_file: 49 | anno = json.load(anno_file) 50 | 51 | gt_db = [] 52 | for a in anno: 53 | image_name = a['image'] 54 | 55 | c = np.array(a['center'], dtype=np.float) 56 | s = np.array([a['scale'], a['scale']], dtype=np.float) 57 | 58 | # Adjust center/scale slightly to avoid cropping limbs 59 | if c[0] != -1: 60 | c[1] = c[1] + 15 * s[1] 61 | s = s * 1.25 62 | 63 | # MPII uses matlab format, index is based 1, 64 | # we should first convert to 0-based index 65 | c = c - 1 66 | 67 | joints_3d = np.zeros((self.num_joints, 3), dtype=np.float) 68 | joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float) 69 | if self.image_set != 'test': 70 | joints = np.array(a['joints']) 71 | joints[:, 0:2] = joints[:, 0:2] - 1 72 | joints_vis = np.array(a['joints_vis']) 73 | assert len(joints) == self.num_joints, \ 74 | 'joint num diff: {} vs {}'.format(len(joints), 75 | self.num_joints) 76 | 77 | joints_3d[:, 0:2] = joints[:, 0:2] 78 | joints_3d_vis[:, 0] = joints_vis[:] 79 | joints_3d_vis[:, 1] = joints_vis[:] 80 | 81 | image_dir = 'images.zip@' if self.data_format == 'zip' else 'images' 82 | gt_db.append( 83 | { 84 | 'image': os.path.join(self.root, image_dir, image_name), 85 | 'center': c, 86 | 'scale': s, 87 | 'joints_3d': joints_3d, 88 | 'joints_3d_vis': joints_3d_vis, 89 | 'filename': '', 90 | 'imgnum': 0, 91 | } 92 | ) 93 | 94 | return gt_db 95 | 96 | def evaluate(self, cfg, preds, output_dir, *args, **kwargs): 97 | # convert 0-based index to 1-based index 98 | preds = preds[:, :, 0:2] + 1.0 99 | 100 | if output_dir: 101 | pred_file = os.path.join(output_dir, 'pred.mat') 102 | savemat(pred_file, mdict={'preds': preds}) 103 | 104 | if 'test' in cfg.DATASET.TEST_SET: 105 | return {'Null': 0.0}, 0.0 106 | 107 | SC_BIAS = 0.6 108 | threshold = 0.5 109 | 110 | gt_file = os.path.join(cfg.DATASET.ROOT, 111 | 'annot', 112 | 'gt_{}.mat'.format(cfg.DATASET.TEST_SET)) 113 | gt_dict = loadmat(gt_file) 114 | dataset_joints = gt_dict['dataset_joints'] 115 | jnt_missing = gt_dict['jnt_missing'] 116 | pos_gt_src = gt_dict['pos_gt_src'] 117 | headboxes_src = gt_dict['headboxes_src'] 118 | 119 | pos_pred_src = np.transpose(preds, [1, 2, 0]) 120 | 121 | head = np.where(dataset_joints == 'head')[1][0] 122 | lsho = np.where(dataset_joints == 'lsho')[1][0] 123 | lelb = np.where(dataset_joints == 'lelb')[1][0] 124 | lwri = np.where(dataset_joints == 'lwri')[1][0] 125 | lhip = np.where(dataset_joints == 'lhip')[1][0] 126 | lkne = np.where(dataset_joints == 'lkne')[1][0] 127 | lank = np.where(dataset_joints == 'lank')[1][0] 128 | 129 | rsho = np.where(dataset_joints == 'rsho')[1][0] 130 | relb = np.where(dataset_joints == 'relb')[1][0] 131 | rwri = np.where(dataset_joints == 'rwri')[1][0] 132 | rkne = np.where(dataset_joints == 'rkne')[1][0] 133 | rank = np.where(dataset_joints == 'rank')[1][0] 134 | rhip = np.where(dataset_joints == 'rhip')[1][0] 135 | 136 | jnt_visible = 1 - jnt_missing 137 | uv_error = pos_pred_src - pos_gt_src 138 | uv_err = np.linalg.norm(uv_error, axis=1) 139 | headsizes = headboxes_src[1, :, :] - headboxes_src[0, :, :] 140 | headsizes = np.linalg.norm(headsizes, axis=0) 141 | headsizes *= SC_BIAS 142 | scale = np.multiply(headsizes, np.ones((len(uv_err), 1))) 143 | scaled_uv_err = np.divide(uv_err, scale) 144 | scaled_uv_err = np.multiply(scaled_uv_err, jnt_visible) 145 | jnt_count = np.sum(jnt_visible, axis=1) 146 | less_than_threshold = np.multiply((scaled_uv_err <= threshold), 147 | jnt_visible) 148 | PCKh = np.divide(100.*np.sum(less_than_threshold, axis=1), jnt_count) 149 | 150 | # save 151 | rng = np.arange(0, 0.5+0.01, 0.01) 152 | pckAll = np.zeros((len(rng), 16)) 153 | 154 | for r in range(len(rng)): 155 | threshold = rng[r] 156 | less_than_threshold = np.multiply(scaled_uv_err <= threshold, 157 | jnt_visible) 158 | pckAll[r, :] = np.divide(100.*np.sum(less_than_threshold, axis=1), 159 | jnt_count) 160 | 161 | PCKh = np.ma.array(PCKh, mask=False) 162 | PCKh.mask[6:8] = True 163 | 164 | jnt_count = np.ma.array(jnt_count, mask=False) 165 | jnt_count.mask[6:8] = True 166 | jnt_ratio = jnt_count / np.sum(jnt_count).astype(np.float64) 167 | 168 | name_value = [ 169 | ('Head', PCKh[head]), 170 | ('Shoulder', 0.5 * (PCKh[lsho] + PCKh[rsho])), 171 | ('Elbow', 0.5 * (PCKh[lelb] + PCKh[relb])), 172 | ('Wrist', 0.5 * (PCKh[lwri] + PCKh[rwri])), 173 | ('Hip', 0.5 * (PCKh[lhip] + PCKh[rhip])), 174 | ('Knee', 0.5 * (PCKh[lkne] + PCKh[rkne])), 175 | ('Ankle', 0.5 * (PCKh[lank] + PCKh[rank])), 176 | ('Mean', np.sum(PCKh * jnt_ratio)), 177 | ('Mean@0.1', np.sum(pckAll[11, :] * jnt_ratio)) 178 | ] 179 | name_value = OrderedDict(name_value) 180 | 181 | return name_value, name_value['Mean'] 182 | -------------------------------------------------------------------------------- /lib/models/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import models.pose_resnet 16 | import models.pose_hrnet 17 | -------------------------------------------------------------------------------- /lib/models/pose_resnet.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import logging 13 | 14 | import torch 15 | import torch.nn as nn 16 | 17 | 18 | BN_MOMENTUM = 0.1 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | def conv3x3(in_planes, out_planes, stride=1): 23 | """3x3 convolution with padding""" 24 | return nn.Conv2d( 25 | in_planes, out_planes, kernel_size=3, stride=stride, 26 | padding=1, bias=False 27 | ) 28 | 29 | 30 | class BasicBlock(nn.Module): 31 | expansion = 1 32 | 33 | def __init__(self, inplanes, planes, stride=1, downsample=None): 34 | super(BasicBlock, self).__init__() 35 | self.conv1 = conv3x3(inplanes, planes, stride) 36 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 37 | self.relu = nn.ReLU(inplace=True) 38 | self.conv2 = conv3x3(planes, planes) 39 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 40 | self.downsample = downsample 41 | self.stride = stride 42 | 43 | def forward(self, x): 44 | residual = x 45 | 46 | out = self.conv1(x) 47 | out = self.bn1(out) 48 | out = self.relu(out) 49 | 50 | out = self.conv2(out) 51 | out = self.bn2(out) 52 | 53 | if self.downsample is not None: 54 | residual = self.downsample(x) 55 | 56 | out += residual 57 | out = self.relu(out) 58 | 59 | return out 60 | 61 | 62 | class Bottleneck(nn.Module): 63 | expansion = 4 64 | 65 | def __init__(self, inplanes, planes, stride=1, downsample=None): 66 | super(Bottleneck, self).__init__() 67 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 68 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 69 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 70 | padding=1, bias=False) 71 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 72 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, 73 | bias=False) 74 | self.bn3 = nn.BatchNorm2d(planes * self.expansion, 75 | momentum=BN_MOMENTUM) 76 | self.relu = nn.ReLU(inplace=True) 77 | self.downsample = downsample 78 | self.stride = stride 79 | 80 | def forward(self, x): 81 | residual = x 82 | 83 | out = self.conv1(x) 84 | out = self.bn1(out) 85 | out = self.relu(out) 86 | 87 | out = self.conv2(out) 88 | out = self.bn2(out) 89 | out = self.relu(out) 90 | 91 | out = self.conv3(out) 92 | out = self.bn3(out) 93 | 94 | if self.downsample is not None: 95 | residual = self.downsample(x) 96 | 97 | out += residual 98 | out = self.relu(out) 99 | 100 | return out 101 | 102 | 103 | class PoseResNet(nn.Module): 104 | 105 | def __init__(self, block, layers, cfg, **kwargs): 106 | self.inplanes = 64 107 | extra = cfg.MODEL.EXTRA 108 | self.deconv_with_bias = extra.DECONV_WITH_BIAS 109 | 110 | super(PoseResNet, self).__init__() 111 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 112 | bias=False) 113 | self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) 114 | self.relu = nn.ReLU(inplace=True) 115 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 116 | self.layer1 = self._make_layer(block, 64, layers[0]) 117 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 118 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 119 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 120 | 121 | # used for deconv layers 122 | self.deconv_layers = self._make_deconv_layer( 123 | extra.NUM_DECONV_LAYERS, 124 | extra.NUM_DECONV_FILTERS, 125 | extra.NUM_DECONV_KERNELS, 126 | ) 127 | 128 | self.final_layer = nn.Conv2d( 129 | in_channels=extra.NUM_DECONV_FILTERS[-1], 130 | out_channels=cfg.MODEL.NUM_JOINTS, 131 | kernel_size=extra.FINAL_CONV_KERNEL, 132 | stride=1, 133 | padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0 134 | ) 135 | 136 | def _make_layer(self, block, planes, blocks, stride=1): 137 | downsample = None 138 | if stride != 1 or self.inplanes != planes * block.expansion: 139 | downsample = nn.Sequential( 140 | nn.Conv2d(self.inplanes, planes * block.expansion, 141 | kernel_size=1, stride=stride, bias=False), 142 | nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), 143 | ) 144 | 145 | layers = [] 146 | layers.append(block(self.inplanes, planes, stride, downsample)) 147 | self.inplanes = planes * block.expansion 148 | for i in range(1, blocks): 149 | layers.append(block(self.inplanes, planes)) 150 | 151 | return nn.Sequential(*layers) 152 | 153 | def _get_deconv_cfg(self, deconv_kernel, index): 154 | if deconv_kernel == 4: 155 | padding = 1 156 | output_padding = 0 157 | elif deconv_kernel == 3: 158 | padding = 1 159 | output_padding = 1 160 | elif deconv_kernel == 2: 161 | padding = 0 162 | output_padding = 0 163 | 164 | return deconv_kernel, padding, output_padding 165 | 166 | def _make_deconv_layer(self, num_layers, num_filters, num_kernels): 167 | assert num_layers == len(num_filters), \ 168 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 169 | assert num_layers == len(num_kernels), \ 170 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 171 | 172 | layers = [] 173 | for i in range(num_layers): 174 | kernel, padding, output_padding = \ 175 | self._get_deconv_cfg(num_kernels[i], i) 176 | 177 | planes = num_filters[i] 178 | layers.append( 179 | nn.ConvTranspose2d( 180 | in_channels=self.inplanes, 181 | out_channels=planes, 182 | kernel_size=kernel, 183 | stride=2, 184 | padding=padding, 185 | output_padding=output_padding, 186 | bias=self.deconv_with_bias)) 187 | layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) 188 | layers.append(nn.ReLU(inplace=True)) 189 | self.inplanes = planes 190 | 191 | return nn.Sequential(*layers) 192 | 193 | def forward(self, x): 194 | x = self.conv1(x) 195 | x = self.bn1(x) 196 | x = self.relu(x) 197 | x = self.maxpool(x) 198 | 199 | x = self.layer1(x) 200 | x = self.layer2(x) 201 | x = self.layer3(x) 202 | x = self.layer4(x) 203 | 204 | x = self.deconv_layers(x) 205 | x = self.final_layer(x) 206 | 207 | return x 208 | 209 | def init_weights(self, pretrained=''): 210 | if os.path.isfile(pretrained): 211 | logger.info('=> init deconv weights from normal distribution') 212 | for name, m in self.deconv_layers.named_modules(): 213 | if isinstance(m, nn.ConvTranspose2d): 214 | logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 215 | logger.info('=> init {}.bias as 0'.format(name)) 216 | nn.init.normal_(m.weight, std=0.001) 217 | if self.deconv_with_bias: 218 | nn.init.constant_(m.bias, 0) 219 | elif isinstance(m, nn.BatchNorm2d): 220 | logger.info('=> init {}.weight as 1'.format(name)) 221 | logger.info('=> init {}.bias as 0'.format(name)) 222 | nn.init.constant_(m.weight, 1) 223 | nn.init.constant_(m.bias, 0) 224 | logger.info('=> init final conv weights from normal distribution') 225 | for m in self.final_layer.modules(): 226 | if isinstance(m, nn.Conv2d): 227 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 228 | logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 229 | logger.info('=> init {}.bias as 0'.format(name)) 230 | nn.init.normal_(m.weight, std=0.001) 231 | nn.init.constant_(m.bias, 0) 232 | 233 | pretrained_state_dict = torch.load(pretrained) 234 | logger.info('=> loading pretrained model {}'.format(pretrained)) 235 | self.load_state_dict(pretrained_state_dict, strict=False) 236 | else: 237 | logger.info('=> init weights from normal distribution') 238 | for m in self.modules(): 239 | if isinstance(m, nn.Conv2d): 240 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 241 | nn.init.normal_(m.weight, std=0.001) 242 | # nn.init.constant_(m.bias, 0) 243 | elif isinstance(m, nn.BatchNorm2d): 244 | nn.init.constant_(m.weight, 1) 245 | nn.init.constant_(m.bias, 0) 246 | elif isinstance(m, nn.ConvTranspose2d): 247 | nn.init.normal_(m.weight, std=0.001) 248 | if self.deconv_with_bias: 249 | nn.init.constant_(m.bias, 0) 250 | 251 | 252 | resnet_spec = { 253 | 18: (BasicBlock, [2, 2, 2, 2]), 254 | 34: (BasicBlock, [3, 4, 6, 3]), 255 | 50: (Bottleneck, [3, 4, 6, 3]), 256 | 101: (Bottleneck, [3, 4, 23, 3]), 257 | 152: (Bottleneck, [3, 8, 36, 3]) 258 | } 259 | 260 | 261 | def get_pose_net(cfg, is_train, **kwargs): 262 | num_layers = cfg.MODEL.EXTRA.NUM_LAYERS 263 | 264 | block_class, layers = resnet_spec[num_layers] 265 | 266 | model = PoseResNet(block_class, layers, cfg, **kwargs) 267 | 268 | if is_train and cfg.MODEL.INIT_WEIGHTS: 269 | model.init_weights(cfg.MODEL.PRETRAINED) 270 | 271 | return model 272 | -------------------------------------------------------------------------------- /lib/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/6f69e4676ad8d43d0d61b64b1b9726f0c369e7b1/lib/nms/__init__.py -------------------------------------------------------------------------------- /lib/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | cimport numpy as np 13 | 14 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 15 | return a if a >= b else b 16 | 17 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 18 | return a if a <= b else b 19 | 20 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 21 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 22 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 23 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 24 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 25 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 26 | 27 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 28 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1].astype('i') 29 | 30 | cdef int ndets = dets.shape[0] 31 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 32 | np.zeros((ndets), dtype=np.int) 33 | 34 | # nominal indices 35 | cdef int _i, _j 36 | # sorted indices 37 | cdef int i, j 38 | # temp variables for box i's (the box currently under consideration) 39 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 40 | # variables for computing overlap with box j (lower scoring box) 41 | cdef np.float32_t xx1, yy1, xx2, yy2 42 | cdef np.float32_t w, h 43 | cdef np.float32_t inter, ovr 44 | 45 | keep = [] 46 | for _i in range(ndets): 47 | i = order[_i] 48 | if suppressed[i] == 1: 49 | continue 50 | keep.append(i) 51 | ix1 = x1[i] 52 | iy1 = y1[i] 53 | ix2 = x2[i] 54 | iy2 = y2[i] 55 | iarea = areas[i] 56 | for _j in range(_i + 1, ndets): 57 | j = order[_j] 58 | if suppressed[j] == 1: 59 | continue 60 | xx1 = max(ix1, x1[j]) 61 | yy1 = max(iy1, y1[j]) 62 | xx2 = min(ix2, x2[j]) 63 | yy2 = min(iy2, y2[j]) 64 | w = max(0.0, xx2 - xx1 + 1) 65 | h = max(0.0, yy2 - yy1 + 1) 66 | inter = w * h 67 | ovr = inter / (iarea + areas[j] - inter) 68 | if ovr >= thresh: 69 | suppressed[j] = 1 70 | 71 | return keep 72 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | cimport numpy as np 13 | 14 | assert sizeof(int) == sizeof(np.int32_t) 15 | 16 | cdef extern from "gpu_nms.hpp": 17 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 18 | 19 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 20 | np.int32_t device_id=0): 21 | cdef int boxes_num = dets.shape[0] 22 | cdef int boxes_dim = dets.shape[1] 23 | cdef int num_out 24 | cdef np.ndarray[np.int32_t, ndim=1] \ 25 | keep = np.zeros(boxes_num, dtype=np.int32) 26 | cdef np.ndarray[np.float32_t, ndim=1] \ 27 | scores = dets[:, 4] 28 | cdef np.ndarray[np.int32_t, ndim=1] \ 29 | order = scores.argsort()[::-1].astype(np.int32) 30 | cdef np.ndarray[np.float32_t, ndim=2] \ 31 | sorted_dets = dets[order, :] 32 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 33 | keep = keep[:num_out] 34 | return list(order[keep]) 35 | -------------------------------------------------------------------------------- /lib/nms/nms.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | 13 | from .cpu_nms import cpu_nms 14 | from .gpu_nms import gpu_nms 15 | 16 | 17 | def py_nms_wrapper(thresh): 18 | def _nms(dets): 19 | return nms(dets, thresh) 20 | return _nms 21 | 22 | 23 | def cpu_nms_wrapper(thresh): 24 | def _nms(dets): 25 | return cpu_nms(dets, thresh) 26 | return _nms 27 | 28 | 29 | def gpu_nms_wrapper(thresh, device_id): 30 | def _nms(dets): 31 | return gpu_nms(dets, thresh, device_id) 32 | return _nms 33 | 34 | 35 | def nms(dets, thresh): 36 | """ 37 | greedily select boxes with high confidence and overlap with current maximum <= thresh 38 | rule out overlap >= thresh 39 | :param dets: [[x1, y1, x2, y2 score]] 40 | :param thresh: retain overlap < thresh 41 | :return: indexes to keep 42 | """ 43 | if dets.shape[0] == 0: 44 | return [] 45 | 46 | x1 = dets[:, 0] 47 | y1 = dets[:, 1] 48 | x2 = dets[:, 2] 49 | y2 = dets[:, 3] 50 | scores = dets[:, 4] 51 | 52 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 53 | order = scores.argsort()[::-1] 54 | 55 | keep = [] 56 | while order.size > 0: 57 | i = order[0] 58 | keep.append(i) 59 | xx1 = np.maximum(x1[i], x1[order[1:]]) 60 | yy1 = np.maximum(y1[i], y1[order[1:]]) 61 | xx2 = np.minimum(x2[i], x2[order[1:]]) 62 | yy2 = np.minimum(y2[i], y2[order[1:]]) 63 | 64 | w = np.maximum(0.0, xx2 - xx1 + 1) 65 | h = np.maximum(0.0, yy2 - yy1 + 1) 66 | inter = w * h 67 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 68 | 69 | inds = np.where(ovr <= thresh)[0] 70 | order = order[inds + 1] 71 | 72 | return keep 73 | 74 | 75 | def oks_iou(g, d, a_g, a_d, sigmas=None, in_vis_thre=None): 76 | if not isinstance(sigmas, np.ndarray): 77 | sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0 78 | vars = (sigmas * 2) ** 2 79 | xg = g[0::3] 80 | yg = g[1::3] 81 | vg = g[2::3] 82 | ious = np.zeros((d.shape[0])) 83 | for n_d in range(0, d.shape[0]): 84 | xd = d[n_d, 0::3] 85 | yd = d[n_d, 1::3] 86 | vd = d[n_d, 2::3] 87 | dx = xd - xg 88 | dy = yd - yg 89 | e = (dx ** 2 + dy ** 2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2 90 | if in_vis_thre is not None: 91 | ind = list(vg > in_vis_thre) and list(vd > in_vis_thre) 92 | e = e[ind] 93 | ious[n_d] = np.sum(np.exp(-e)) / e.shape[0] if e.shape[0] != 0 else 0.0 94 | return ious 95 | 96 | 97 | def oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None): 98 | """ 99 | greedily select boxes with high confidence and overlap with current maximum <= thresh 100 | rule out overlap >= thresh, overlap = oks 101 | :param kpts_db 102 | :param thresh: retain overlap < thresh 103 | :return: indexes to keep 104 | """ 105 | if len(kpts_db) == 0: 106 | return [] 107 | 108 | scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))]) 109 | kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))]) 110 | areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))]) 111 | 112 | order = scores.argsort()[::-1] 113 | 114 | keep = [] 115 | while order.size > 0: 116 | i = order[0] 117 | keep.append(i) 118 | 119 | oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre) 120 | 121 | inds = np.where(oks_ovr <= thresh)[0] 122 | order = order[inds + 1] 123 | 124 | return keep 125 | 126 | 127 | def rescore(overlap, scores, thresh, type='gaussian'): 128 | assert overlap.shape[0] == scores.shape[0] 129 | if type == 'linear': 130 | inds = np.where(overlap >= thresh)[0] 131 | scores[inds] = scores[inds] * (1 - overlap[inds]) 132 | else: 133 | scores = scores * np.exp(- overlap**2 / thresh) 134 | 135 | return scores 136 | 137 | 138 | def soft_oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None): 139 | """ 140 | greedily select boxes with high confidence and overlap with current maximum <= thresh 141 | rule out overlap >= thresh, overlap = oks 142 | :param kpts_db 143 | :param thresh: retain overlap < thresh 144 | :return: indexes to keep 145 | """ 146 | if len(kpts_db) == 0: 147 | return [] 148 | 149 | scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))]) 150 | kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))]) 151 | areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))]) 152 | 153 | order = scores.argsort()[::-1] 154 | scores = scores[order] 155 | 156 | # max_dets = order.size 157 | max_dets = 20 158 | keep = np.zeros(max_dets, dtype=np.intp) 159 | keep_cnt = 0 160 | while order.size > 0 and keep_cnt < max_dets: 161 | i = order[0] 162 | 163 | oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre) 164 | 165 | order = order[1:] 166 | scores = rescore(oks_ovr, scores[1:], thresh) 167 | 168 | tmp = scores.argsort()[::-1] 169 | order = order[tmp] 170 | scores = scores[tmp] 171 | 172 | keep[keep_cnt] = i 173 | keep_cnt += 1 174 | 175 | keep = keep[:keep_cnt] 176 | 177 | return keep 178 | # kpts_db = kpts_db[:keep_cnt] 179 | 180 | # return kpts_db 181 | -------------------------------------------------------------------------------- /lib/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Copyright (c) Microsoft 3 | // Licensed under The MIT License 4 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 5 | // ------------------------------------------------------------------ 6 | 7 | #include "gpu_nms.hpp" 8 | #include 9 | #include 10 | 11 | #define CUDA_CHECK(condition) \ 12 | /* Code block avoids redefinition of cudaError_t error */ \ 13 | do { \ 14 | cudaError_t error = condition; \ 15 | if (error != cudaSuccess) { \ 16 | std::cout << cudaGetErrorString(error) << std::endl; \ 17 | } \ 18 | } while (0) 19 | 20 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 21 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 22 | 23 | __device__ inline float devIoU(float const * const a, float const * const b) { 24 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 25 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 26 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 27 | float interS = width * height; 28 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 29 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 30 | return interS / (Sa + Sb - interS); 31 | } 32 | 33 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 34 | const float *dev_boxes, unsigned long long *dev_mask) { 35 | const int row_start = blockIdx.y; 36 | const int col_start = blockIdx.x; 37 | 38 | // if (row_start > col_start) return; 39 | 40 | const int row_size = 41 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 42 | const int col_size = 43 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 44 | 45 | __shared__ float block_boxes[threadsPerBlock * 5]; 46 | if (threadIdx.x < col_size) { 47 | block_boxes[threadIdx.x * 5 + 0] = 48 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 49 | block_boxes[threadIdx.x * 5 + 1] = 50 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 51 | block_boxes[threadIdx.x * 5 + 2] = 52 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 53 | block_boxes[threadIdx.x * 5 + 3] = 54 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 55 | block_boxes[threadIdx.x * 5 + 4] = 56 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 57 | } 58 | __syncthreads(); 59 | 60 | if (threadIdx.x < row_size) { 61 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 62 | const float *cur_box = dev_boxes + cur_box_idx * 5; 63 | int i = 0; 64 | unsigned long long t = 0; 65 | int start = 0; 66 | if (row_start == col_start) { 67 | start = threadIdx.x + 1; 68 | } 69 | for (i = start; i < col_size; i++) { 70 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 71 | t |= 1ULL << i; 72 | } 73 | } 74 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 75 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 76 | } 77 | } 78 | 79 | void _set_device(int device_id) { 80 | int current_device; 81 | CUDA_CHECK(cudaGetDevice(¤t_device)); 82 | if (current_device == device_id) { 83 | return; 84 | } 85 | // The call to cudaSetDevice must come before any calls to Get, which 86 | // may perform initialization using the GPU. 87 | CUDA_CHECK(cudaSetDevice(device_id)); 88 | } 89 | 90 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 91 | int boxes_dim, float nms_overlap_thresh, int device_id) { 92 | _set_device(device_id); 93 | 94 | float* boxes_dev = NULL; 95 | unsigned long long* mask_dev = NULL; 96 | 97 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 98 | 99 | CUDA_CHECK(cudaMalloc(&boxes_dev, 100 | boxes_num * boxes_dim * sizeof(float))); 101 | CUDA_CHECK(cudaMemcpy(boxes_dev, 102 | boxes_host, 103 | boxes_num * boxes_dim * sizeof(float), 104 | cudaMemcpyHostToDevice)); 105 | 106 | CUDA_CHECK(cudaMalloc(&mask_dev, 107 | boxes_num * col_blocks * sizeof(unsigned long long))); 108 | 109 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 110 | DIVUP(boxes_num, threadsPerBlock)); 111 | dim3 threads(threadsPerBlock); 112 | nms_kernel<<>>(boxes_num, 113 | nms_overlap_thresh, 114 | boxes_dev, 115 | mask_dev); 116 | 117 | std::vector mask_host(boxes_num * col_blocks); 118 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 119 | mask_dev, 120 | sizeof(unsigned long long) * boxes_num * col_blocks, 121 | cudaMemcpyDeviceToHost)); 122 | 123 | std::vector remv(col_blocks); 124 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 125 | 126 | int num_to_keep = 0; 127 | for (int i = 0; i < boxes_num; i++) { 128 | int nblock = i / threadsPerBlock; 129 | int inblock = i % threadsPerBlock; 130 | 131 | if (!(remv[nblock] & (1ULL << inblock))) { 132 | keep_out[num_to_keep++] = i; 133 | unsigned long long *p = &mask_host[0] + i * col_blocks; 134 | for (int j = nblock; j < col_blocks; j++) { 135 | remv[j] |= p[j]; 136 | } 137 | } 138 | } 139 | *num_out = num_to_keep; 140 | 141 | CUDA_CHECK(cudaFree(boxes_dev)); 142 | CUDA_CHECK(cudaFree(mask_dev)); 143 | } 144 | -------------------------------------------------------------------------------- /lib/nms/setup_linux.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Pose.gluon 3 | # Copyright (c) 2018-present Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | from setuptools import setup 11 | from distutils.extension import Extension 12 | from Cython.Distutils import build_ext 13 | import numpy as np 14 | 15 | 16 | def find_in_path(name, path): 17 | "Find a file in a search path" 18 | # Adapted fom 19 | # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 20 | for dir in path.split(os.pathsep): 21 | binpath = pjoin(dir, name) 22 | if os.path.exists(binpath): 23 | return os.path.abspath(binpath) 24 | return None 25 | 26 | 27 | def locate_cuda(): 28 | """Locate the CUDA environment on the system 29 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 30 | and values giving the absolute path to each directory. 31 | Starts by looking for the CUDAHOME env variable. If not found, everything 32 | is based on finding 'nvcc' in the PATH. 33 | """ 34 | 35 | # first check if the CUDAHOME env variable is in use 36 | if 'CUDAHOME' in os.environ: 37 | home = os.environ['CUDAHOME'] 38 | nvcc = pjoin(home, 'bin', 'nvcc') 39 | else: 40 | # otherwise, search the PATH for NVCC 41 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 42 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 43 | if nvcc is None: 44 | raise EnvironmentError('The nvcc binary could not be ' 45 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 46 | home = os.path.dirname(os.path.dirname(nvcc)) 47 | 48 | cudaconfig = {'home':home, 'nvcc':nvcc, 49 | 'include': pjoin(home, 'include'), 50 | 'lib64': pjoin(home, 'lib64')} 51 | for k, v in cudaconfig.items(): 52 | if not os.path.exists(v): 53 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 54 | 55 | return cudaconfig 56 | CUDA = locate_cuda() 57 | 58 | 59 | # Obtain the numpy include directory. This logic works across numpy versions. 60 | try: 61 | numpy_include = np.get_include() 62 | except AttributeError: 63 | numpy_include = np.get_numpy_include() 64 | 65 | 66 | def customize_compiler_for_nvcc(self): 67 | """inject deep into distutils to customize how the dispatch 68 | to gcc/nvcc works. 69 | If you subclass UnixCCompiler, it's not trivial to get your subclass 70 | injected in, and still have the right customizations (i.e. 71 | distutils.sysconfig.customize_compiler) run on it. So instead of going 72 | the OO route, I have this. Note, it's kindof like a wierd functional 73 | subclassing going on.""" 74 | 75 | # tell the compiler it can processes .cu 76 | self.src_extensions.append('.cu') 77 | 78 | # save references to the default compiler_so and _comple methods 79 | default_compiler_so = self.compiler_so 80 | super = self._compile 81 | 82 | # now redefine the _compile method. This gets executed for each 83 | # object but distutils doesn't have the ability to change compilers 84 | # based on source extension: we add it. 85 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 86 | if os.path.splitext(src)[1] == '.cu': 87 | # use the cuda for .cu files 88 | self.set_executable('compiler_so', CUDA['nvcc']) 89 | # use only a subset of the extra_postargs, which are 1-1 translated 90 | # from the extra_compile_args in the Extension class 91 | postargs = extra_postargs['nvcc'] 92 | else: 93 | postargs = extra_postargs['gcc'] 94 | 95 | super(obj, src, ext, cc_args, postargs, pp_opts) 96 | # reset the default compiler_so, which we might have changed for cuda 97 | self.compiler_so = default_compiler_so 98 | 99 | # inject our redefined _compile method into the class 100 | self._compile = _compile 101 | 102 | 103 | # run the customize_compiler 104 | class custom_build_ext(build_ext): 105 | def build_extensions(self): 106 | customize_compiler_for_nvcc(self.compiler) 107 | build_ext.build_extensions(self) 108 | 109 | 110 | ext_modules = [ 111 | Extension( 112 | "cpu_nms", 113 | ["cpu_nms.pyx"], 114 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 115 | include_dirs = [numpy_include] 116 | ), 117 | Extension('gpu_nms', 118 | ['nms_kernel.cu', 'gpu_nms.pyx'], 119 | library_dirs=[CUDA['lib64']], 120 | libraries=['cudart'], 121 | language='c++', 122 | runtime_library_dirs=[CUDA['lib64']], 123 | # this syntax is specific to this build system 124 | # we're only going to use certain compiler args with nvcc and not with 125 | # gcc the implementation of this trick is in customize_compiler() below 126 | extra_compile_args={'gcc': ["-Wno-unused-function"], 127 | 'nvcc': ['-arch=sm_35', 128 | '--ptxas-options=-v', 129 | '-c', 130 | '--compiler-options', 131 | "'-fPIC'"]}, 132 | include_dirs = [numpy_include, CUDA['include']] 133 | ), 134 | ] 135 | 136 | setup( 137 | name='nms', 138 | ext_modules=ext_modules, 139 | # inject our custom trigger 140 | cmdclass={'build_ext': custom_build_ext}, 141 | ) 142 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/6f69e4676ad8d43d0d61b64b1b9726f0c369e7b1/lib/utils/__init__.py -------------------------------------------------------------------------------- /lib/utils/transforms.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | import cv2 13 | 14 | 15 | def flip_back(output_flipped, matched_parts): 16 | ''' 17 | ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width) 18 | ''' 19 | assert output_flipped.ndim == 4,\ 20 | 'output_flipped should be [batch_size, num_joints, height, width]' 21 | 22 | output_flipped = output_flipped[:, :, :, ::-1] 23 | 24 | for pair in matched_parts: 25 | tmp = output_flipped[:, pair[0], :, :].copy() 26 | output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :] 27 | output_flipped[:, pair[1], :, :] = tmp 28 | 29 | return output_flipped 30 | 31 | 32 | def fliplr_joints(joints, joints_vis, width, matched_parts): 33 | """ 34 | flip coords 35 | """ 36 | # Flip horizontal 37 | joints[:, 0] = width - joints[:, 0] - 1 38 | 39 | # Change left-right parts 40 | for pair in matched_parts: 41 | joints[pair[0], :], joints[pair[1], :] = \ 42 | joints[pair[1], :], joints[pair[0], :].copy() 43 | joints_vis[pair[0], :], joints_vis[pair[1], :] = \ 44 | joints_vis[pair[1], :], joints_vis[pair[0], :].copy() 45 | 46 | return joints*joints_vis, joints_vis 47 | 48 | 49 | def transform_preds(coords, center, scale, output_size): 50 | target_coords = np.zeros(coords.shape) 51 | trans = get_affine_transform(center, scale, 0, output_size, inv=1) 52 | for p in range(coords.shape[0]): 53 | target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) 54 | return target_coords 55 | 56 | 57 | def get_affine_transform( 58 | center, scale, rot, output_size, 59 | shift=np.array([0, 0], dtype=np.float32), inv=0 60 | ): 61 | if not isinstance(scale, np.ndarray) and not isinstance(scale, list): 62 | print(scale) 63 | scale = np.array([scale, scale]) 64 | 65 | scale_tmp = scale * 200.0 66 | src_w = scale_tmp[0] 67 | dst_w = output_size[0] 68 | dst_h = output_size[1] 69 | 70 | rot_rad = np.pi * rot / 180 71 | src_dir = get_dir([0, src_w * -0.5], rot_rad) 72 | dst_dir = np.array([0, dst_w * -0.5], np.float32) 73 | 74 | src = np.zeros((3, 2), dtype=np.float32) 75 | dst = np.zeros((3, 2), dtype=np.float32) 76 | src[0, :] = center + scale_tmp * shift 77 | src[1, :] = center + src_dir + scale_tmp * shift 78 | dst[0, :] = [dst_w * 0.5, dst_h * 0.5] 79 | dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir 80 | 81 | src[2:, :] = get_3rd_point(src[0, :], src[1, :]) 82 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) 83 | 84 | if inv: 85 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) 86 | else: 87 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) 88 | 89 | return trans 90 | 91 | 92 | def affine_transform(pt, t): 93 | new_pt = np.array([pt[0], pt[1], 1.]).T 94 | new_pt = np.dot(t, new_pt) 95 | return new_pt[:2] 96 | 97 | 98 | def get_3rd_point(a, b): 99 | direct = a - b 100 | return b + np.array([-direct[1], direct[0]], dtype=np.float32) 101 | 102 | 103 | def get_dir(src_point, rot_rad): 104 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 105 | 106 | src_result = [0, 0] 107 | src_result[0] = src_point[0] * cs - src_point[1] * sn 108 | src_result[1] = src_point[0] * sn + src_point[1] * cs 109 | 110 | return src_result 111 | 112 | 113 | def crop(img, center, scale, output_size, rot=0): 114 | trans = get_affine_transform(center, scale, rot, output_size) 115 | 116 | dst_img = cv2.warpAffine( 117 | img, trans, (int(output_size[0]), int(output_size[1])), 118 | flags=cv2.INTER_LINEAR 119 | ) 120 | 121 | return dst_img 122 | -------------------------------------------------------------------------------- /lib/utils/utils.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import logging 13 | import time 14 | from collections import namedtuple 15 | from pathlib import Path 16 | 17 | import torch 18 | import torch.optim as optim 19 | import torch.nn as nn 20 | 21 | 22 | def create_logger(cfg, cfg_name, phase='train'): 23 | root_output_dir = Path(cfg.OUTPUT_DIR) 24 | # set up logger 25 | if not root_output_dir.exists(): 26 | print('=> creating {}'.format(root_output_dir)) 27 | root_output_dir.mkdir() 28 | 29 | dataset = cfg.DATASET.DATASET + '_' + cfg.DATASET.HYBRID_JOINTS_TYPE \ 30 | if cfg.DATASET.HYBRID_JOINTS_TYPE else cfg.DATASET.DATASET 31 | dataset = dataset.replace(':', '_') 32 | model = cfg.MODEL.NAME 33 | cfg_name = os.path.basename(cfg_name).split('.')[0] 34 | 35 | final_output_dir = root_output_dir / dataset / model / cfg_name 36 | 37 | print('=> creating {}'.format(final_output_dir)) 38 | final_output_dir.mkdir(parents=True, exist_ok=True) 39 | 40 | time_str = time.strftime('%Y-%m-%d-%H-%M') 41 | log_file = '{}_{}_{}.log'.format(cfg_name, time_str, phase) 42 | final_log_file = final_output_dir / log_file 43 | head = '%(asctime)-15s %(message)s' 44 | logging.basicConfig(filename=str(final_log_file), 45 | format=head) 46 | logger = logging.getLogger() 47 | logger.setLevel(logging.INFO) 48 | console = logging.StreamHandler() 49 | logging.getLogger('').addHandler(console) 50 | 51 | tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / \ 52 | (cfg_name + '_' + time_str) 53 | 54 | print('=> creating {}'.format(tensorboard_log_dir)) 55 | tensorboard_log_dir.mkdir(parents=True, exist_ok=True) 56 | 57 | return logger, str(final_output_dir), str(tensorboard_log_dir) 58 | 59 | 60 | def get_optimizer(cfg, model): 61 | optimizer = None 62 | if cfg.TRAIN.OPTIMIZER == 'sgd': 63 | optimizer = optim.SGD( 64 | model.parameters(), 65 | lr=cfg.TRAIN.LR, 66 | momentum=cfg.TRAIN.MOMENTUM, 67 | weight_decay=cfg.TRAIN.WD, 68 | nesterov=cfg.TRAIN.NESTEROV 69 | ) 70 | elif cfg.TRAIN.OPTIMIZER == 'adam': 71 | optimizer = optim.Adam( 72 | model.parameters(), 73 | lr=cfg.TRAIN.LR 74 | ) 75 | 76 | return optimizer 77 | 78 | 79 | def save_checkpoint(states, is_best, output_dir, 80 | filename='checkpoint.pth'): 81 | torch.save(states, os.path.join(output_dir, filename)) 82 | if is_best and 'state_dict' in states: 83 | torch.save(states['best_state_dict'], 84 | os.path.join(output_dir, 'model_best.pth')) 85 | 86 | 87 | def get_model_summary(model, *input_tensors, item_length=26, verbose=False): 88 | """ 89 | :param model: 90 | :param input_tensors: 91 | :param item_length: 92 | :return: 93 | """ 94 | 95 | summary = [] 96 | 97 | ModuleDetails = namedtuple( 98 | "Layer", ["name", "input_size", "output_size", "num_parameters", "multiply_adds"]) 99 | hooks = [] 100 | layer_instances = {} 101 | 102 | def add_hooks(module): 103 | 104 | def hook(module, input, output): 105 | class_name = str(module.__class__.__name__) 106 | 107 | instance_index = 1 108 | if class_name not in layer_instances: 109 | layer_instances[class_name] = instance_index 110 | else: 111 | instance_index = layer_instances[class_name] + 1 112 | layer_instances[class_name] = instance_index 113 | 114 | layer_name = class_name + "_" + str(instance_index) 115 | 116 | params = 0 117 | 118 | if class_name.find("Conv") != -1 or class_name.find("BatchNorm") != -1 or \ 119 | class_name.find("Linear") != -1: 120 | for param_ in module.parameters(): 121 | params += param_.view(-1).size(0) 122 | 123 | flops = "Not Available" 124 | if class_name.find("Conv") != -1 and hasattr(module, "weight"): 125 | flops = ( 126 | torch.prod( 127 | torch.LongTensor(list(module.weight.data.size()))) * 128 | torch.prod( 129 | torch.LongTensor(list(output.size())[2:]))).item() 130 | elif isinstance(module, nn.Linear): 131 | flops = (torch.prod(torch.LongTensor(list(output.size()))) \ 132 | * input[0].size(1)).item() 133 | 134 | if isinstance(input[0], list): 135 | input = input[0] 136 | if isinstance(output, list): 137 | output = output[0] 138 | 139 | summary.append( 140 | ModuleDetails( 141 | name=layer_name, 142 | input_size=list(input[0].size()), 143 | output_size=list(output.size()), 144 | num_parameters=params, 145 | multiply_adds=flops) 146 | ) 147 | 148 | if not isinstance(module, nn.ModuleList) \ 149 | and not isinstance(module, nn.Sequential) \ 150 | and module != model: 151 | hooks.append(module.register_forward_hook(hook)) 152 | 153 | model.eval() 154 | model.apply(add_hooks) 155 | 156 | space_len = item_length 157 | 158 | model(*input_tensors) 159 | for hook in hooks: 160 | hook.remove() 161 | 162 | details = '' 163 | if verbose: 164 | details = "Model Summary" + \ 165 | os.linesep + \ 166 | "Name{}Input Size{}Output Size{}Parameters{}Multiply Adds (Flops){}".format( 167 | ' ' * (space_len - len("Name")), 168 | ' ' * (space_len - len("Input Size")), 169 | ' ' * (space_len - len("Output Size")), 170 | ' ' * (space_len - len("Parameters")), 171 | ' ' * (space_len - len("Multiply Adds (Flops)"))) \ 172 | + os.linesep + '-' * space_len * 5 + os.linesep 173 | 174 | params_sum = 0 175 | flops_sum = 0 176 | for layer in summary: 177 | params_sum += layer.num_parameters 178 | if layer.multiply_adds != "Not Available": 179 | flops_sum += layer.multiply_adds 180 | if verbose: 181 | details += "{}{}{}{}{}{}{}{}{}{}".format( 182 | layer.name, 183 | ' ' * (space_len - len(layer.name)), 184 | layer.input_size, 185 | ' ' * (space_len - len(str(layer.input_size))), 186 | layer.output_size, 187 | ' ' * (space_len - len(str(layer.output_size))), 188 | layer.num_parameters, 189 | ' ' * (space_len - len(str(layer.num_parameters))), 190 | layer.multiply_adds, 191 | ' ' * (space_len - len(str(layer.multiply_adds)))) \ 192 | + os.linesep + '-' * space_len * 5 + os.linesep 193 | 194 | details += os.linesep \ 195 | + "Total Parameters: {:,}".format(params_sum) \ 196 | + os.linesep + '-' * space_len * 5 + os.linesep 197 | details += "Total Multiply Adds (For Convolution and Linear Layers only): {:,} GFLOPs".format(flops_sum/(1024**3)) \ 198 | + os.linesep + '-' * space_len * 5 + os.linesep 199 | details += "Number of Layers" + os.linesep 200 | for layer in layer_instances: 201 | details += "{} : {} layers ".format(layer, layer_instances[layer]) 202 | 203 | return details 204 | -------------------------------------------------------------------------------- /lib/utils/vis.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import math 12 | 13 | import numpy as np 14 | import torchvision 15 | import cv2 16 | 17 | from core.inference import get_max_preds 18 | 19 | 20 | def save_batch_image_with_joints(batch_image, batch_joints, batch_joints_vis, 21 | file_name, nrow=8, padding=2): 22 | ''' 23 | batch_image: [batch_size, channel, height, width] 24 | batch_joints: [batch_size, num_joints, 3], 25 | batch_joints_vis: [batch_size, num_joints, 1], 26 | } 27 | ''' 28 | grid = torchvision.utils.make_grid(batch_image, nrow, padding, True) 29 | ndarr = grid.mul(255).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy() 30 | ndarr = ndarr.copy() 31 | 32 | nmaps = batch_image.size(0) 33 | xmaps = min(nrow, nmaps) 34 | ymaps = int(math.ceil(float(nmaps) / xmaps)) 35 | height = int(batch_image.size(2) + padding) 36 | width = int(batch_image.size(3) + padding) 37 | k = 0 38 | for y in range(ymaps): 39 | for x in range(xmaps): 40 | if k >= nmaps: 41 | break 42 | joints = batch_joints[k] 43 | joints_vis = batch_joints_vis[k] 44 | 45 | for joint, joint_vis in zip(joints, joints_vis): 46 | joint[0] = x * width + padding + joint[0] 47 | joint[1] = y * height + padding + joint[1] 48 | if joint_vis[0]: 49 | cv2.circle(ndarr, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2) 50 | k = k + 1 51 | cv2.imwrite(file_name, ndarr) 52 | 53 | 54 | def save_batch_heatmaps(batch_image, batch_heatmaps, file_name, 55 | normalize=True): 56 | ''' 57 | batch_image: [batch_size, channel, height, width] 58 | batch_heatmaps: ['batch_size, num_joints, height, width] 59 | file_name: saved file name 60 | ''' 61 | if normalize: 62 | batch_image = batch_image.clone() 63 | min = float(batch_image.min()) 64 | max = float(batch_image.max()) 65 | 66 | batch_image.add_(-min).div_(max - min + 1e-5) 67 | 68 | batch_size = batch_heatmaps.size(0) 69 | num_joints = batch_heatmaps.size(1) 70 | heatmap_height = batch_heatmaps.size(2) 71 | heatmap_width = batch_heatmaps.size(3) 72 | 73 | grid_image = np.zeros((batch_size*heatmap_height, 74 | (num_joints+1)*heatmap_width, 75 | 3), 76 | dtype=np.uint8) 77 | 78 | preds, maxvals = get_max_preds(batch_heatmaps.detach().cpu().numpy()) 79 | 80 | for i in range(batch_size): 81 | image = batch_image[i].mul(255)\ 82 | .clamp(0, 255)\ 83 | .byte()\ 84 | .permute(1, 2, 0)\ 85 | .cpu().numpy() 86 | heatmaps = batch_heatmaps[i].mul(255)\ 87 | .clamp(0, 255)\ 88 | .byte()\ 89 | .cpu().numpy() 90 | 91 | resized_image = cv2.resize(image, 92 | (int(heatmap_width), int(heatmap_height))) 93 | 94 | height_begin = heatmap_height * i 95 | height_end = heatmap_height * (i + 1) 96 | for j in range(num_joints): 97 | cv2.circle(resized_image, 98 | (int(preds[i][j][0]), int(preds[i][j][1])), 99 | 1, [0, 0, 255], 1) 100 | heatmap = heatmaps[j, :, :] 101 | colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) 102 | masked_image = colored_heatmap*0.7 + resized_image*0.3 103 | cv2.circle(masked_image, 104 | (int(preds[i][j][0]), int(preds[i][j][1])), 105 | 1, [0, 0, 255], 1) 106 | 107 | width_begin = heatmap_width * (j+1) 108 | width_end = heatmap_width * (j+2) 109 | grid_image[height_begin:height_end, width_begin:width_end, :] = \ 110 | masked_image 111 | # grid_image[height_begin:height_end, width_begin:width_end, :] = \ 112 | # colored_heatmap*0.7 + resized_image*0.3 113 | 114 | grid_image[height_begin:height_end, 0:heatmap_width, :] = resized_image 115 | 116 | cv2.imwrite(file_name, grid_image) 117 | 118 | 119 | def save_debug_images(config, input, meta, target, joints_pred, output, 120 | prefix): 121 | if not config.DEBUG.DEBUG: 122 | return 123 | 124 | if config.DEBUG.SAVE_BATCH_IMAGES_GT: 125 | save_batch_image_with_joints( 126 | input, meta['joints'], meta['joints_vis'], 127 | '{}_gt.jpg'.format(prefix) 128 | ) 129 | if config.DEBUG.SAVE_BATCH_IMAGES_PRED: 130 | save_batch_image_with_joints( 131 | input, joints_pred, meta['joints_vis'], 132 | '{}_pred.jpg'.format(prefix) 133 | ) 134 | if config.DEBUG.SAVE_HEATMAPS_GT: 135 | save_batch_heatmaps( 136 | input, target, '{}_hm_gt.jpg'.format(prefix) 137 | ) 138 | if config.DEBUG.SAVE_HEATMAPS_PRED: 139 | save_batch_heatmaps( 140 | input, output, '{}_hm_pred.jpg'.format(prefix) 141 | ) 142 | -------------------------------------------------------------------------------- /lib/utils/zipreader.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import zipfile 13 | import xml.etree.ElementTree as ET 14 | 15 | import cv2 16 | import numpy as np 17 | 18 | _im_zfile = [] 19 | _xml_path_zip = [] 20 | _xml_zfile = [] 21 | 22 | 23 | def imread(filename, flags=cv2.IMREAD_COLOR): 24 | global _im_zfile 25 | path = filename 26 | pos_at = path.index('@') 27 | if pos_at == -1: 28 | print("character '@' is not found from the given path '%s'"%(path)) 29 | assert 0 30 | path_zip = path[0: pos_at] 31 | path_img = path[pos_at + 2:] 32 | if not os.path.isfile(path_zip): 33 | print("zip file '%s' is not found"%(path_zip)) 34 | assert 0 35 | for i in range(len(_im_zfile)): 36 | if _im_zfile[i]['path'] == path_zip: 37 | data = _im_zfile[i]['zipfile'].read(path_img) 38 | return cv2.imdecode(np.frombuffer(data, np.uint8), flags) 39 | 40 | _im_zfile.append({ 41 | 'path': path_zip, 42 | 'zipfile': zipfile.ZipFile(path_zip, 'r') 43 | }) 44 | data = _im_zfile[-1]['zipfile'].read(path_img) 45 | 46 | return cv2.imdecode(np.frombuffer(data, np.uint8), flags) 47 | 48 | 49 | def xmlread(filename): 50 | global _xml_path_zip 51 | global _xml_zfile 52 | path = filename 53 | pos_at = path.index('@') 54 | if pos_at == -1: 55 | print("character '@' is not found from the given path '%s'"%(path)) 56 | assert 0 57 | path_zip = path[0: pos_at] 58 | path_xml = path[pos_at + 2:] 59 | if not os.path.isfile(path_zip): 60 | print("zip file '%s' is not found"%(path_zip)) 61 | assert 0 62 | for i in xrange(len(_xml_path_zip)): 63 | if _xml_path_zip[i] == path_zip: 64 | data = _xml_zfile[i].open(path_xml) 65 | return ET.fromstring(data.read()) 66 | _xml_path_zip.append(path_zip) 67 | print("read new xml file '%s'"%(path_zip)) 68 | _xml_zfile.append(zipfile.ZipFile(path_zip, 'r')) 69 | data = _xml_zfile[-1].open(path_xml) 70 | return ET.fromstring(data.read()) 71 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | EasyDict==1.7 2 | opencv-python==3.4.1.15 3 | shapely==1.6.4 4 | Cython 5 | scipy 6 | pandas 7 | pyyaml 8 | json_tricks 9 | scikit-image 10 | yacs>=0.1.5 11 | tensorboardX==1.6 12 | -------------------------------------------------------------------------------- /tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # pose.pytorch 3 | # Copyright (c) 2018-present Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import os.path as osp 13 | import sys 14 | 15 | 16 | def add_path(path): 17 | if path not in sys.path: 18 | sys.path.insert(0, path) 19 | 20 | 21 | this_dir = osp.dirname(__file__) 22 | 23 | lib_path = osp.join(this_dir, '..', 'lib') 24 | add_path(lib_path) 25 | 26 | mm_path = osp.join(this_dir, '..', 'lib/poseeval/py-motmetrics') 27 | add_path(mm_path) 28 | -------------------------------------------------------------------------------- /tools/test.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # pose.pytorch 3 | # Copyright (c) 2018-present Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import argparse 13 | import os 14 | import pprint 15 | 16 | import torch 17 | import torch.nn.parallel 18 | import torch.backends.cudnn as cudnn 19 | import torch.optim 20 | import torch.utils.data 21 | import torch.utils.data.distributed 22 | import torchvision.transforms as transforms 23 | 24 | import _init_paths 25 | from config import cfg 26 | from config import update_config 27 | from core.loss import JointsMSELoss 28 | from core.function import validate 29 | from utils.utils import create_logger 30 | 31 | import dataset 32 | import models 33 | 34 | 35 | def parse_args(): 36 | parser = argparse.ArgumentParser(description='Train keypoints network') 37 | # general 38 | parser.add_argument('--cfg', 39 | help='experiment configure file name', 40 | required=True, 41 | type=str) 42 | 43 | parser.add_argument('opts', 44 | help="Modify config options using the command-line", 45 | default=None, 46 | nargs=argparse.REMAINDER) 47 | 48 | parser.add_argument('--modelDir', 49 | help='model directory', 50 | type=str, 51 | default='') 52 | parser.add_argument('--logDir', 53 | help='log directory', 54 | type=str, 55 | default='') 56 | parser.add_argument('--dataDir', 57 | help='data directory', 58 | type=str, 59 | default='') 60 | parser.add_argument('--prevModelDir', 61 | help='prev Model directory', 62 | type=str, 63 | default='') 64 | 65 | args = parser.parse_args() 66 | return args 67 | 68 | 69 | def main(): 70 | args = parse_args() 71 | update_config(cfg, args) 72 | 73 | logger, final_output_dir, tb_log_dir = create_logger( 74 | cfg, args.cfg, 'valid') 75 | 76 | logger.info(pprint.pformat(args)) 77 | logger.info(cfg) 78 | 79 | # cudnn related setting 80 | cudnn.benchmark = cfg.CUDNN.BENCHMARK 81 | torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC 82 | torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED 83 | 84 | model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( 85 | cfg, is_train=False 86 | ) 87 | 88 | if cfg.TEST.MODEL_FILE: 89 | logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) 90 | model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False) 91 | else: 92 | model_state_file = os.path.join( 93 | final_output_dir, 'final_state.pth' 94 | ) 95 | logger.info('=> loading model from {}'.format(model_state_file)) 96 | model.load_state_dict(torch.load(model_state_file)) 97 | 98 | model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() 99 | 100 | # define loss function (criterion) and optimizer 101 | criterion = JointsMSELoss( 102 | use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT 103 | ).cuda() 104 | 105 | # Data loading code 106 | normalize = transforms.Normalize( 107 | mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] 108 | ) 109 | valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)( 110 | cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, 111 | transforms.Compose([ 112 | transforms.ToTensor(), 113 | normalize, 114 | ]) 115 | ) 116 | valid_loader = torch.utils.data.DataLoader( 117 | valid_dataset, 118 | batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS), 119 | shuffle=False, 120 | num_workers=cfg.WORKERS, 121 | pin_memory=True 122 | ) 123 | 124 | # evaluate on validation set 125 | validate(cfg, valid_loader, valid_dataset, model, criterion, 126 | final_output_dir, tb_log_dir) 127 | 128 | 129 | if __name__ == '__main__': 130 | main() 131 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import argparse 12 | import os 13 | import pprint 14 | import shutil 15 | 16 | import torch 17 | import torch.nn.parallel 18 | import torch.backends.cudnn as cudnn 19 | import torch.optim 20 | import torch.utils.data 21 | import torch.utils.data.distributed 22 | import torchvision.transforms as transforms 23 | from tensorboardX import SummaryWriter 24 | 25 | import _init_paths 26 | from config import cfg 27 | from config import update_config 28 | from core.loss import JointsMSELoss 29 | from core.function import train 30 | from core.function import validate 31 | from utils.utils import get_optimizer 32 | from utils.utils import save_checkpoint 33 | from utils.utils import create_logger 34 | from utils.utils import get_model_summary 35 | 36 | import dataset 37 | import models 38 | 39 | 40 | def parse_args(): 41 | parser = argparse.ArgumentParser(description='Train keypoints network') 42 | # general 43 | parser.add_argument('--cfg', 44 | help='experiment configure file name', 45 | required=True, 46 | type=str) 47 | 48 | parser.add_argument('opts', 49 | help="Modify config options using the command-line", 50 | default=None, 51 | nargs=argparse.REMAINDER) 52 | 53 | # philly 54 | parser.add_argument('--modelDir', 55 | help='model directory', 56 | type=str, 57 | default='') 58 | parser.add_argument('--logDir', 59 | help='log directory', 60 | type=str, 61 | default='') 62 | parser.add_argument('--dataDir', 63 | help='data directory', 64 | type=str, 65 | default='') 66 | parser.add_argument('--prevModelDir', 67 | help='prev Model directory', 68 | type=str, 69 | default='') 70 | 71 | args = parser.parse_args() 72 | 73 | return args 74 | 75 | 76 | def main(): 77 | args = parse_args() 78 | update_config(cfg, args) 79 | 80 | logger, final_output_dir, tb_log_dir = create_logger( 81 | cfg, args.cfg, 'train') 82 | 83 | logger.info(pprint.pformat(args)) 84 | logger.info(cfg) 85 | 86 | # cudnn related setting 87 | cudnn.benchmark = cfg.CUDNN.BENCHMARK 88 | torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC 89 | torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED 90 | 91 | model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( 92 | cfg, is_train=True 93 | ) 94 | 95 | # copy model file 96 | this_dir = os.path.dirname(__file__) 97 | shutil.copy2( 98 | os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), 99 | final_output_dir) 100 | # logger.info(pprint.pformat(model)) 101 | 102 | writer_dict = { 103 | 'writer': SummaryWriter(log_dir=tb_log_dir), 104 | 'train_global_steps': 0, 105 | 'valid_global_steps': 0, 106 | } 107 | 108 | dump_input = torch.rand( 109 | (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]) 110 | ) 111 | writer_dict['writer'].add_graph(model, (dump_input, )) 112 | 113 | logger.info(get_model_summary(model, dump_input)) 114 | 115 | model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() 116 | 117 | # define loss function (criterion) and optimizer 118 | criterion = JointsMSELoss( 119 | use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT 120 | ).cuda() 121 | 122 | # Data loading code 123 | normalize = transforms.Normalize( 124 | mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] 125 | ) 126 | train_dataset = eval('dataset.'+cfg.DATASET.DATASET)( 127 | cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, 128 | transforms.Compose([ 129 | transforms.ToTensor(), 130 | normalize, 131 | ]) 132 | ) 133 | valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)( 134 | cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, 135 | transforms.Compose([ 136 | transforms.ToTensor(), 137 | normalize, 138 | ]) 139 | ) 140 | 141 | train_loader = torch.utils.data.DataLoader( 142 | train_dataset, 143 | batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS), 144 | shuffle=cfg.TRAIN.SHUFFLE, 145 | num_workers=cfg.WORKERS, 146 | pin_memory=cfg.PIN_MEMORY 147 | ) 148 | valid_loader = torch.utils.data.DataLoader( 149 | valid_dataset, 150 | batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS), 151 | shuffle=False, 152 | num_workers=cfg.WORKERS, 153 | pin_memory=cfg.PIN_MEMORY 154 | ) 155 | 156 | best_perf = 0.0 157 | best_model = False 158 | last_epoch = -1 159 | optimizer = get_optimizer(cfg, model) 160 | begin_epoch = cfg.TRAIN.BEGIN_EPOCH 161 | checkpoint_file = os.path.join( 162 | final_output_dir, 'checkpoint.pth' 163 | ) 164 | 165 | if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): 166 | logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) 167 | checkpoint = torch.load(checkpoint_file) 168 | begin_epoch = checkpoint['epoch'] 169 | best_perf = checkpoint['perf'] 170 | last_epoch = checkpoint['epoch'] 171 | model.load_state_dict(checkpoint['state_dict']) 172 | 173 | optimizer.load_state_dict(checkpoint['optimizer']) 174 | logger.info("=> loaded checkpoint '{}' (epoch {})".format( 175 | checkpoint_file, checkpoint['epoch'])) 176 | 177 | lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( 178 | optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, 179 | last_epoch=last_epoch 180 | ) 181 | 182 | for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): 183 | lr_scheduler.step() 184 | 185 | # train for one epoch 186 | train(cfg, train_loader, model, criterion, optimizer, epoch, 187 | final_output_dir, tb_log_dir, writer_dict) 188 | 189 | 190 | # evaluate on validation set 191 | perf_indicator = validate( 192 | cfg, valid_loader, valid_dataset, model, criterion, 193 | final_output_dir, tb_log_dir, writer_dict 194 | ) 195 | 196 | if perf_indicator >= best_perf: 197 | best_perf = perf_indicator 198 | best_model = True 199 | else: 200 | best_model = False 201 | 202 | logger.info('=> saving checkpoint to {}'.format(final_output_dir)) 203 | save_checkpoint({ 204 | 'epoch': epoch + 1, 205 | 'model': cfg.MODEL.NAME, 206 | 'state_dict': model.state_dict(), 207 | 'best_state_dict': model.module.state_dict(), 208 | 'perf': perf_indicator, 209 | 'optimizer': optimizer.state_dict(), 210 | }, best_model, final_output_dir) 211 | 212 | final_model_state_file = os.path.join( 213 | final_output_dir, 'final_state.pth' 214 | ) 215 | logger.info('=> saving final model state to {}'.format( 216 | final_model_state_file) 217 | ) 218 | torch.save(model.module.state_dict(), final_model_state_file) 219 | writer_dict['writer'].close() 220 | 221 | 222 | if __name__ == '__main__': 223 | main() 224 | -------------------------------------------------------------------------------- /visualization/plot_coco.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # Modified by Depu Meng (mdp@mail.ustc.edu.cn) 6 | # ------------------------------------------------------------------------------ 7 | 8 | import argparse 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | import cv2 12 | import json 13 | import matplotlib.lines as mlines 14 | import matplotlib.patches as mpatches 15 | from pycocotools.coco import COCO 16 | from pycocotools.cocoeval import COCOeval 17 | import os 18 | 19 | 20 | class ColorStyle: 21 | def __init__(self, color, link_pairs, point_color): 22 | self.color = color 23 | self.link_pairs = link_pairs 24 | self.point_color = point_color 25 | 26 | for i in range(len(self.color)): 27 | self.link_pairs[i].append(tuple(np.array(self.color[i])/255.)) 28 | 29 | self.ring_color = [] 30 | for i in range(len(self.point_color)): 31 | self.ring_color.append(tuple(np.array(self.point_color[i])/255.)) 32 | 33 | # Xiaochu Style 34 | # (R,G,B) 35 | color1 = [(179,0,0),(228,26,28),(255,255,51), 36 | (49,163,84), (0,109,45), (255,255,51), 37 | (240,2,127),(240,2,127),(240,2,127), (240,2,127), (240,2,127), 38 | (217,95,14), (254,153,41),(255,255,51), 39 | (44,127,184),(0,0,255)] 40 | 41 | link_pairs1 = [ 42 | [15, 13], [13, 11], [11, 5], 43 | [12, 14], [14, 16], [12, 6], 44 | [3, 1],[1, 2],[1, 0],[0, 2],[2,4], 45 | [9, 7], [7,5], [5, 6], 46 | [6, 8], [8, 10], 47 | ] 48 | 49 | point_color1 = [(240,2,127),(240,2,127),(240,2,127), 50 | (240,2,127), (240,2,127), 51 | (255,255,51),(255,255,51), 52 | (254,153,41),(44,127,184), 53 | (217,95,14),(0,0,255), 54 | (255,255,51),(255,255,51),(228,26,28), 55 | (49,163,84),(252,176,243),(0,176,240), 56 | (255,255,0),(169, 209, 142), 57 | (255,255,0),(169, 209, 142), 58 | (255,255,0),(169, 209, 142)] 59 | 60 | xiaochu_style = ColorStyle(color1, link_pairs1, point_color1) 61 | 62 | 63 | # Chunhua Style 64 | # (R,G,B) 65 | color2 = [(252,176,243),(252,176,243),(252,176,243), 66 | (0,176,240), (0,176,240), (0,176,240), 67 | (240,2,127),(240,2,127),(240,2,127), (240,2,127), (240,2,127), 68 | (255,255,0), (255,255,0),(169, 209, 142), 69 | (169, 209, 142),(169, 209, 142)] 70 | 71 | link_pairs2 = [ 72 | [15, 13], [13, 11], [11, 5], 73 | [12, 14], [14, 16], [12, 6], 74 | [3, 1],[1, 2],[1, 0],[0, 2],[2,4], 75 | [9, 7], [7,5], [5, 6], [6, 8], [8, 10], 76 | ] 77 | 78 | point_color2 = [(240,2,127),(240,2,127),(240,2,127), 79 | (240,2,127), (240,2,127), 80 | (255,255,0),(169, 209, 142), 81 | (255,255,0),(169, 209, 142), 82 | (255,255,0),(169, 209, 142), 83 | (252,176,243),(0,176,240),(252,176,243), 84 | (0,176,240),(252,176,243),(0,176,240), 85 | (255,255,0),(169, 209, 142), 86 | (255,255,0),(169, 209, 142), 87 | (255,255,0),(169, 209, 142)] 88 | 89 | chunhua_style = ColorStyle(color2, link_pairs2, point_color2) 90 | 91 | def parse_args(): 92 | parser = argparse.ArgumentParser(description='Visualize COCO predictions') 93 | # general 94 | parser.add_argument('--image-path', 95 | help='Path of COCO val images', 96 | type=str, 97 | default='data/coco/images/val2017/' 98 | ) 99 | 100 | parser.add_argument('--gt-anno', 101 | help='Path of COCO val annotation', 102 | type=str, 103 | default='data/coco/annotations/person_keypoints_val2017.json' 104 | ) 105 | 106 | parser.add_argument('--save-path', 107 | help="Path to save the visualizations", 108 | type=str, 109 | default='visualization/coco/') 110 | 111 | parser.add_argument('--prediction', 112 | help="Prediction file to visualize", 113 | type=str, 114 | required=True) 115 | 116 | parser.add_argument('--style', 117 | help="Style of the visualization: Chunhua style or Xiaochu style", 118 | type=str, 119 | default='chunhua') 120 | 121 | args = parser.parse_args() 122 | 123 | return args 124 | 125 | 126 | def map_joint_dict(joints): 127 | joints_dict = {} 128 | for i in range(joints.shape[0]): 129 | x = int(joints[i][0]) 130 | y = int(joints[i][1]) 131 | id = i 132 | joints_dict[id] = (x, y) 133 | 134 | return joints_dict 135 | 136 | def plot(data, gt_file, img_path, save_path, 137 | link_pairs, ring_color, save=True): 138 | 139 | # joints 140 | coco = COCO(gt_file) 141 | coco_dt = coco.loadRes(data) 142 | coco_eval = COCOeval(coco, coco_dt, 'keypoints') 143 | coco_eval._prepare() 144 | gts_ = coco_eval._gts 145 | dts_ = coco_eval._dts 146 | 147 | p = coco_eval.params 148 | p.imgIds = list(np.unique(p.imgIds)) 149 | if p.useCats: 150 | p.catIds = list(np.unique(p.catIds)) 151 | p.maxDets = sorted(p.maxDets) 152 | 153 | # loop through images, area range, max detection number 154 | catIds = p.catIds if p.useCats else [-1] 155 | threshold = 0.3 156 | joint_thres = 0.2 157 | for catId in catIds: 158 | for imgId in p.imgIds[:5000]: 159 | # dimention here should be Nxm 160 | gts = gts_[imgId, catId] 161 | dts = dts_[imgId, catId] 162 | inds = np.argsort([-d['score'] for d in dts], kind='mergesort') 163 | dts = [dts[i] for i in inds] 164 | if len(dts) > p.maxDets[-1]: 165 | dts = dts[0:p.maxDets[-1]] 166 | if len(gts) == 0 or len(dts) == 0: 167 | continue 168 | 169 | sum_score = 0 170 | num_box = 0 171 | img_name = str(imgId).zfill(12) 172 | 173 | # Read Images 174 | img_file = img_path + img_name + '.jpg' 175 | data_numpy = cv2.imread(img_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) 176 | h = data_numpy.shape[0] 177 | w = data_numpy.shape[1] 178 | 179 | # Plot 180 | fig = plt.figure(figsize=(w/100, h/100), dpi=100) 181 | ax = plt.subplot(1,1,1) 182 | bk = plt.imshow(data_numpy[:,:,::-1]) 183 | bk.set_zorder(-1) 184 | print(img_name) 185 | for j, gt in enumerate(gts): 186 | # matching dt_box and gt_box 187 | bb = gt['bbox'] 188 | x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2 189 | y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2 190 | 191 | # create bounds for ignore regions(double the gt bbox) 192 | g = np.array(gt['keypoints']) 193 | #xg = g[0::3]; yg = g[1::3]; 194 | vg = g[2::3] 195 | 196 | for i, dt in enumerate(dts): 197 | # Calculate IoU 198 | dt_bb = dt['bbox'] 199 | dt_x0 = dt_bb[0] - dt_bb[2]; dt_x1 = dt_bb[0] + dt_bb[2] * 2 200 | dt_y0 = dt_bb[1] - dt_bb[3]; dt_y1 = dt_bb[1] + dt_bb[3] * 2 201 | 202 | ol_x = min(x1, dt_x1) - max(x0, dt_x0) 203 | ol_y = min(y1, dt_y1) - max(y0, dt_y0) 204 | ol_area = ol_x * ol_y 205 | s_x = max(x1, dt_x1) - min(x0, dt_x0) 206 | s_y = max(y1, dt_y1) - min(y0, dt_y0) 207 | sum_area = s_x * s_y 208 | iou = ol_area / (sum_area + np.spacing(1)) 209 | score = dt['score'] 210 | 211 | if iou < 0.1 or score < threshold: 212 | continue 213 | else: 214 | print('iou: ', iou) 215 | dt_w = dt_x1 - dt_x0 216 | dt_h = dt_y1 - dt_y0 217 | ref = min(dt_w, dt_h) 218 | num_box += 1 219 | sum_score += dt['score'] 220 | dt_joints = np.array(dt['keypoints']).reshape(17,-1) 221 | joints_dict = map_joint_dict(dt_joints) 222 | 223 | # stick 224 | for k, link_pair in enumerate(link_pairs): 225 | if link_pair[0] in joints_dict \ 226 | and link_pair[1] in joints_dict: 227 | if dt_joints[link_pair[0],2] < joint_thres \ 228 | or dt_joints[link_pair[1],2] < joint_thres \ 229 | or vg[link_pair[0]] == 0 \ 230 | or vg[link_pair[1]] == 0: 231 | continue 232 | if k in range(6,11): 233 | lw = 1 234 | else: 235 | lw = ref / 100. 236 | line = mlines.Line2D( 237 | np.array([joints_dict[link_pair[0]][0], 238 | joints_dict[link_pair[1]][0]]), 239 | np.array([joints_dict[link_pair[0]][1], 240 | joints_dict[link_pair[1]][1]]), 241 | ls='-', lw=lw, alpha=1, color=link_pair[2],) 242 | line.set_zorder(0) 243 | ax.add_line(line) 244 | # black ring 245 | for k in range(dt_joints.shape[0]): 246 | if dt_joints[k,2] < joint_thres \ 247 | or vg[link_pair[0]] == 0 \ 248 | or vg[link_pair[1]] == 0: 249 | continue 250 | if dt_joints[k,0] > w or dt_joints[k,1] > h: 251 | continue 252 | if k in range(5): 253 | radius = 1 254 | else: 255 | radius = ref / 100 256 | 257 | circle = mpatches.Circle(tuple(dt_joints[k,:2]), 258 | radius=radius, 259 | ec='black', 260 | fc=ring_color[k], 261 | alpha=1, 262 | linewidth=1) 263 | circle.set_zorder(1) 264 | ax.add_patch(circle) 265 | 266 | avg_score = (sum_score / (num_box+np.spacing(1)))*1000 267 | 268 | plt.gca().xaxis.set_major_locator(plt.NullLocator()) 269 | plt.gca().yaxis.set_major_locator(plt.NullLocator()) 270 | plt.axis('off') 271 | plt.subplots_adjust(top=1,bottom=0,left=0,right=1,hspace=0,wspace=0) 272 | plt.margins(0,0) 273 | if save: 274 | plt.savefig(save_path + \ 275 | 'score_'+str(np.int(avg_score))+ \ 276 | '_id_'+str(imgId)+ \ 277 | '_'+img_name + '.png', 278 | format='png', bbox_inckes='tight', dpi=100) 279 | plt.savefig(save_path +'id_'+str(imgId)+ '.pdf', format='pdf', 280 | bbox_inckes='tight', dpi=100) 281 | # plt.show() 282 | plt.close() 283 | 284 | if __name__ == '__main__': 285 | 286 | args = parse_args() 287 | if args.style == 'xiaochu': 288 | # Xiaochu Style 289 | colorstyle = xiaochu_style 290 | elif args.style == 'chunhua': 291 | # Chunhua Style 292 | colorstyle = chunhua_style 293 | else: 294 | raise Exception('Invalid color style') 295 | 296 | save_path = args.save_path 297 | img_path = args.image_path 298 | if not os.path.exists(save_path): 299 | try: 300 | os.makedirs(save_path) 301 | except Exception: 302 | print('Fail to make {}'.format(save_path)) 303 | 304 | 305 | with open(args.prediction) as f: 306 | data = json.load(f) 307 | gt_file = args.gt_anno 308 | plot(data, gt_file, img_path, save_path, colorstyle.link_pairs, colorstyle.ring_color, save=True) 309 | 310 | --------------------------------------------------------------------------------