├── .gitignore ├── LICENSE ├── README.md ├── demo ├── .gitignore ├── Dockerfile ├── README.md ├── build-docker.sh ├── demo.sh ├── inference-config-w18_v1.yaml ├── inference-config-w18_v2.yaml ├── inference-config.yaml └── inference.py ├── experiments ├── coco │ ├── hrnet │ │ ├── w18_small_v1_256x192_adam_lr1e-3.yaml │ │ ├── w18_small_v2_256x192_adam_lr1e-3.yaml │ │ ├── w18_small_v2_256x192_adam_lr1e-3_softargmax.yaml │ │ ├── w32_256x192_adam_lr1e-3.yaml │ │ ├── w32_384x288_adam_lr1e-3.yaml │ │ ├── w48_256x192_adam_lr1e-3.yaml │ │ └── w48_384x288_adam_lr1e-3.yaml │ ├── lpn │ │ ├── lpn100_256x192_gd256x2_gc.yaml │ │ ├── lpn101_256x192_gd256x2_gc.yaml │ │ ├── lpn152_256x192_gd256x2_gc.yaml │ │ ├── lpn18_256x192_gd256x2_gc.yaml │ │ ├── lpn18h_256x192_gd256x2_gc.yaml │ │ ├── lpn34_256x192_gd256x2_gc.yaml │ │ ├── lpn34h_256x192_gd256x2_gc.yaml │ │ └── lpn50_256x192_gd256x2_gc.yaml │ └── resnet │ │ ├── res101_256x192_d256x3_adam_lr1e-3.yaml │ │ ├── res101_384x288_d256x3_adam_lr1e-3.yaml │ │ ├── res152_256x192_d256x3_adam_lr1e-3.yaml │ │ ├── res152_384x288_d256x3_adam_lr1e-3.yaml │ │ ├── res50_256x192_d256x3_adam_lr1e-3.yaml │ │ └── res50_384x288_d256x3_adam_lr1e-3.yaml └── mpii │ ├── hrnet │ ├── w32_256x256_adam_lr1e-3.yaml │ └── w48_256x256_adam_lr1e-3.yaml │ └── resnet │ ├── res101_256x256_d256x3_adam_lr1e-3.yaml │ ├── res152_256x256_d256x3_adam_lr1e-3.yaml │ └── res50_256x256_d256x3_adam_lr1e-3.yaml ├── lib ├── Makefile ├── config │ ├── __init__.py │ ├── default.py │ └── models.py ├── core │ ├── evaluate.py │ ├── function.py │ ├── inference.py │ └── loss.py ├── dataset │ ├── JointsDataset.py │ ├── __init__.py │ ├── coco.py │ └── mpii.py ├── models │ ├── __init__.py │ ├── lightweight_modules.py │ ├── lpn.py │ ├── pose_hrnet.py │ └── pose_resnet.py ├── nms │ ├── __init__.py │ ├── cpu_nms.c │ ├── cpu_nms.pyx │ ├── gpu_nms.cpp │ ├── gpu_nms.cu │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── nms.py │ ├── nms_kernel.cu │ └── setup_linux.py └── utils │ ├── __init__.py │ ├── transforms.py │ ├── utils.py │ ├── vis.py │ └── zipreader.py ├── requirements.txt ├── tools ├── _init_paths.py ├── test.py ├── train.ori.py └── train.py ├── train_coco_w18_v1.sh ├── train_coco_w18_v2.sh ├── train_coco_w32.sh ├── train_lpn.sh ├── train_mpii.sh └── visualization └── plot_coco.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | #lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # custom 132 | models/* 133 | .history 134 | demo/*.mp4 135 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 cavalleria 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # humanpose.pytorch 2 | 3 | ## Introduction 4 | 5 | This is an human pose estimation pytorch implementation derivated from [deep-high-resolution-net.pytorch](https://github.com/leoxiaobin/deep-high-resolution-net.pytorch), aims to achieve lightweight real-time application. 6 | 7 | ## Features 8 | 9 | - [x] It support Distributed DataParallel training, much faster than origin repo. 10 | - [x] support lightweight pose backbones. 11 | - [x] support lightweight mobile hunman detector [yolov3mobile](https://github.com/cavalleria/yolov3mobile). 12 | 13 | ## Main Results 14 | 15 | ### Results on MPII val 16 | 17 | | Arch | Head | Shoulder | Elbow | Wrist | Hip | Knee | Ankle | Mean | Mean@0.1 | 18 | |--------------------|------|----------|-------|-------|------|------|-------|------|----------| 19 | | **pose_hrnet_w32** | 97.067 | 95.686 | 90.21 | 85.644 | 89.077 | 85.795 | 82.711 | 89.927 | 37.931 | 20 | | **pose_hrnet_w48** | 96.930 | 95.771 | 90.864 | 86.329 | 88.731 | 86.862 | 82.829 | 90.208 | 38.002 | 21 | 22 | ### Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset 23 | 24 | | Arch | Input size | #Params | FLOPs | Weight size | AP | Ap .5 | AP .75 | AP (M) | AP (L) | AR | AR .5 | AR .75 | AR (M) | AR (L) | 25 | |--------------------|------------|-------|-------|-------|-------|-------|--------|--------|--------|-------|-------|--------|--------|--------| 26 | | **pose_hrnet_w18_v1** | 256x192 | 1.3M | 0.68G | 5.3M | 0.572 | 0.863 | 0.644 | 0.545 | 0.614 | 0.612 | 0.876 | 0.687 | 0.579 | 0.661 | 27 | | **pose_hrnet_w18_v2** | 256x192 | 3.7M | 1.8G | 15M | 0.710 | 0.916 | 0.784 | 0.685 | 0.753 | 0.740 | 0.922 | 0.806 | 0.710 | 0.786 | 28 | | **pose_hrnet_w18_v2_softargmax** | 256x192 | 3.7M | 1.8G | 15M | 0.713 | 0.916 | 0.783 | 0.685 | 0.758 | 0.743 | 0.923 | 0.809 | 0.711 | 0.792 | 29 | | **pose_hrnet_w32** | 256x192 | 28.5M | 7.1G | 110M | 0.765 | 0.936 | 0.838 | 0.740 | 0.810 | 0.794 | 0.945 | 0.858 | 0.763 | 0.842 | 30 | | **lpn_18** | 256x192 | 0.47M | 0.42G | 1.9M | 0.445 | 0.773 | 0.445 | 0.434 | 0.467 | 0.497 | 0.798 | 0.519 | 0.474 | 0.531 | 31 | | **lpn_18h** | 256x192 | 0.50M | 0.43G | 2.1M | 0.486 | 0.806 | 0.506 | 0.472 | 0.511 | 0.533 | 0.821 | 0.567 | 0.508 | 0.570 | 32 | | **lpn_34** | 256x192 | 0.59M | 0.43G | 2.5M | 0.493 | 0.808 | 0.522 | 0.478 | 0.515 | 0.538 | 0.825 | 0.577 | 0.514 | 0.573 | 33 | | **lpn_34h** | 256x192 | 0.66M | 0.46G | 2.7M | 0.536 | 0.830 | 0.579 | 0.520 | 0.564 | 0.579 | 0.849 | 0.630 | 0.552 | 0.618 | 34 | | **lpn_50** | 256x192 | 2.9M | 1.0G | 12M | 0.684 | 0.904 | 0.762 | 0.659 | 0.724 | 0.717 | 0.914 | 0.789 | 0.687 | 0.763 | 35 | | **lpn_100** | 256x192 | 6.7M | 1.8G | 27M | 0.721 | 0.915 | 0.805 | 0.699 | 0.764 | 0.754 | 0.929 | 0.825 | 0.725 | 0.799 | 36 | 37 | 38 | ### Iterative training strategy 39 | 40 | | lpn18h | AP | Ap .5 | AP .75 | AP (M) | AP (L) | AR | AR .5 | AR .75 | AR (M) | AR (L) | 41 | |--------|--------|-------|--------|-------|--------|-------|--------|-------|--------|-------| 42 | | stage0 | 0.486 | 0.806 | 0.506 | 0.472 | 0.511 | 0.533 | 0.821 | 0.567 | 0.508 | 0.570 | 43 | | stage1 | 0.496 | 0.807 | 0.521 | 0.483 | 0.521 | 0.541 | 0.822 | 0.577 | 0.517 | 0.577 | 44 | | stage2 | 0.505 | 0.808 | 0.540 | 0.491 | 0.529 | 0.549 | 0.825 | 0.591 | 0.524 | 0.586 | 45 | | stage3 | 0.510 | 0.819 | 0.542 | 0.497 | 0.536 | 0.555 | 0.832 | 0.598 | 0.530 | 0.591 | 46 | | stage4 | 0.514 | 0.819 | 0.543 | 0.500 | 0.538 | 0.558 | 0.832 | 0.599 | 0.533 | 0.595 | 47 | | stage5 | 0.517 | 0.819 | 0.553 | 0.500 | 0.544 | 0.559 | 0.834 | 0.602 | 0.533 | 0.598 | 48 | | stage6 | 0.520 | 0.820 | 0.557 | 0.503 | 0.546 | 0.563 | 0.836 | 0.607 | 0.537 | 0.601 | 49 | 50 | 51 | ## Environment 52 | 53 | The code is developed using python 3.6 on Ubuntu 16.04. NVIDIA GPUs are needed. The code is developed and tested using 8 NVIDIA V100 GPU cards. Other platforms or GPU cards are not fully tested. 54 | 55 | ## Quick start 56 | 57 | ### Installation 58 | 59 | 1. Install pytorch >= v1.0.0 following [official instruction](https://pytorch.org/). 60 | 2. Clone this repo, and we'll call the directory that you cloned as ${POSE_ROOT}. 61 | 3. Install dependencies: 62 | 63 | ``` 64 | pip install -r requirements.txt 65 | ``` 66 | 4. Make libs: 67 | 68 | ``` 69 | cd ${POSE_ROOT}/lib 70 | make 71 | ``` 72 | 5. Install [COCOAPI](https://github.com/cocodataset/cocoapi): 73 | ``` 74 | # COCOAPI=/path/to/clone/cocoapi 75 | git clone https://github.com/cocodataset/cocoapi.git $COCOAPI 76 | cd $COCOAPI/PythonAPI 77 | # Install into global site-packages 78 | make install 79 | # Alternatively, if you do not have permissions or prefer 80 | # not to install the COCO API into global site-packages 81 | python3 setup.py install --user 82 | ``` 83 | Note that instructions like # COCOAPI=/path/to/install/cocoapi indicate that you should pick a path where you'd like to have the software cloned and then set an environment variable (COCOAPI in this case) accordingly. 84 | 6. Init output(training model output directory) and log(tensorboard log directory) directory: 85 | 86 | ``` 87 | mkdir output 88 | mkdir log 89 | ``` 90 | 91 | Your directory tree should look like this: 92 | 93 | ``` 94 | ${POSE_ROOT} 95 | ├── data 96 | ├── experiments 97 | ├── lib 98 | ├── log 99 | ├── models 100 | ├── output 101 | ├── tools 102 | ├── README.md 103 | └── requirements.txt 104 | ``` 105 | 106 | 7. Download pretrained models from our model zoo([GoogleDrive](https://drive.google.com/drive/folders/1hOTihvbyIxsm5ygDpbUuJ7O_tzv4oXjC?usp=sharing) or [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blW231MH2krnmLq5kkQ)) 107 | ``` 108 | ${POSE_ROOT} 109 | `-- models 110 | `-- pytorch 111 | |-- imagenet 112 | | |-- hrnet_w32-36af842e.pth 113 | | |-- hrnet_w48-8ef0771d.pth 114 | | |-- resnet50-19c8e357.pth 115 | |-- pose_coco 116 | | |-- pose_hrnet_w32_256x192.pth 117 | | |-- pose_hrnet_w32_384x288.pth 118 | | |-- pose_hrnet_w48_256x192.pth 119 | | |-- pose_hrnet_w48_384x288.pth 120 | | |-- pose_resnet_50_256x192.pth 121 | | `-- pose_resnet_50_384x288.pth 122 | `-- pose_mpii 123 | |-- pose_hrnet_w32_256x256.pth 124 | |-- pose_hrnet_w48_256x256.pth 125 | `-- pose_resnet_50_256x256.pth 126 | 127 | ``` 128 | 129 | ### Data preparation 130 | **For MPII data**, please download from [MPII Human Pose Dataset](http://human-pose.mpi-inf.mpg.de/). The original annotation files are in matlab format. We have converted them into json format, you also need to download them from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blW00SqrairNetmeVu4) or [GoogleDrive](https://drive.google.com/drive/folders/1En_VqmStnsXMdldXA6qpqEyDQulnmS3a?usp=sharing). 131 | Extract them under {POSE_ROOT}/data, and make them look like this: 132 | ``` 133 | ${POSE_ROOT} 134 | |-- data 135 | `-- |-- mpii 136 | `-- |-- annot 137 | | |-- gt_valid.mat 138 | | |-- test.json 139 | | |-- train.json 140 | | |-- trainval.json 141 | | `-- valid.json 142 | `-- images 143 | |-- 000001163.jpg 144 | |-- 000003072.jpg 145 | ``` 146 | 147 | **For COCO data**, please download from [COCO download](http://cocodataset.org/#download), 2017 Train/Val is needed for COCO keypoints training and validation. We also provide person detection result of COCO val2017 and test-dev2017 to reproduce our multi-person pose estimation results. Please download from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blWzzDXoz5BeFl8sWM-) or [GoogleDrive](https://drive.google.com/drive/folders/1fRUDNUDxe9fjqcRZ2bnF_TKMlO0nB_dk?usp=sharing). 148 | Download and extract them under {POSE_ROOT}/data, and make them look like this: 149 | ``` 150 | ${POSE_ROOT} 151 | |-- data 152 | `-- |-- coco 153 | `-- |-- annotations 154 | | |-- person_keypoints_train2017.json 155 | | `-- person_keypoints_val2017.json 156 | |-- person_detection_results 157 | | |-- COCO_val2017_detections_AP_H_56_person.json 158 | | |-- COCO_test-dev2017_detections_AP_H_609_person.json 159 | `-- images 160 | |-- train2017 161 | | |-- 000000000009.jpg 162 | | |-- 000000000025.jpg 163 | | |-- 000000000030.jpg 164 | | |-- ... 165 | `-- val2017 166 | |-- 000000000139.jpg 167 | |-- 000000000285.jpg 168 | |-- 000000000632.jpg 169 | |-- ... 170 | ``` 171 | 172 | ### Training and Testing 173 | 174 | #### Testing on MPII dataset using model zoo's models([GoogleDrive](https://drive.google.com/drive/folders/1hOTihvbyIxsm5ygDpbUuJ7O_tzv4oXjC?usp=sharing) or [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blW231MH2krnmLq5kkQ)) 175 | 176 | 177 | ``` 178 | python tools/test.py \ 179 | --cfg experiments/mpii/hrnet/w32_256x256_adam_lr1e-3.yaml \ 180 | TEST.MODEL_FILE models/pytorch/pose_mpii/pose_hrnet_w32_256x256.pth 181 | ``` 182 | 183 | #### Training on MPII dataset 184 | 185 | ``` 186 | python tools/train.py \ 187 | --cfg experiments/mpii/hrnet/w32_256x256_adam_lr1e-3.yaml 188 | ``` 189 | 190 | #### Testing on COCO val2017 dataset using model zoo's models([GoogleDrive](https://drive.google.com/drive/folders/1hOTihvbyIxsm5ygDpbUuJ7O_tzv4oXjC?usp=sharing) or [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blW231MH2krnmLq5kkQ)) 191 | 192 | 193 | ``` 194 | python tools/test.py \ 195 | --cfg experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml \ 196 | TEST.MODEL_FILE models/pytorch/pose_coco/pose_hrnet_w32_256x192.pth \ 197 | TEST.USE_GT_BBOX False 198 | ``` 199 | 200 | #### Training on COCO train2017 dataset 201 | 202 | ``` 203 | python tools/train.py \ 204 | --cfg experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml \ 205 | ``` 206 | 207 | ### Visualization 208 | 209 | #### Visualizing predictions on COCO val 210 | 211 | ``` 212 | python visualization/plot_coco.py \ 213 | --prediction output/coco/w48_384x288_adam_lr1e-3/results/keypoints_val2017_results_0.json \ 214 | --save-path visualization/results 215 | 216 | ``` 217 | 218 | ## Acknowledgement 219 | 220 | * This repo is modified and adapted on these great repositories [deep-high-resolution-net.pytorch](https://github.com/leoxiaobin/deep-high-resolution-net.pytorch) 221 | 222 | ## Contact 223 | 224 | ``` 225 | cavallyb@gmail.com 226 | ``` 227 | 228 | -------------------------------------------------------------------------------- /demo/.gitignore: -------------------------------------------------------------------------------- 1 | output 2 | models 3 | videos 4 | -------------------------------------------------------------------------------- /demo/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu16.04 2 | 3 | ENV OPENCV_VERSION="3.4.6" 4 | 5 | # Basic toolchain 6 | RUN apt-get update && apt-get install -y \ 7 | apt-utils \ 8 | build-essential \ 9 | git \ 10 | wget \ 11 | unzip \ 12 | yasm \ 13 | pkg-config \ 14 | libcurl4-openssl-dev \ 15 | zlib1g-dev \ 16 | htop \ 17 | cmake \ 18 | nano \ 19 | python3-pip \ 20 | python3-dev \ 21 | python3-tk \ 22 | libx264-dev \ 23 | && cd /usr/local/bin \ 24 | && ln -s /usr/bin/python3 python \ 25 | && pip3 install --upgrade pip \ 26 | && apt-get autoremove -y 27 | 28 | # Getting OpenCV dependencies available with apt 29 | RUN apt-get update && apt-get install -y \ 30 | libeigen3-dev \ 31 | libjpeg-dev \ 32 | libpng-dev \ 33 | libtiff-dev \ 34 | libjasper-dev \ 35 | libswscale-dev \ 36 | libavcodec-dev \ 37 | libavformat-dev && \ 38 | apt-get autoremove -y 39 | 40 | # Getting other dependencies 41 | RUN apt-get update && apt-get install -y \ 42 | cppcheck \ 43 | graphviz \ 44 | doxygen \ 45 | p7zip-full \ 46 | libdlib18 \ 47 | libdlib-dev && \ 48 | apt-get autoremove -y 49 | 50 | 51 | # Install OpenCV + OpenCV contrib (takes forever) 52 | RUN mkdir -p /tmp && \ 53 | cd /tmp && \ 54 | wget --no-check-certificate -O opencv.zip https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.zip && \ 55 | wget --no-check-certificate -O opencv_contrib.zip https://github.com/opencv/opencv_contrib/archive/${OPENCV_VERSION}.zip && \ 56 | unzip opencv.zip && \ 57 | unzip opencv_contrib.zip && \ 58 | mkdir opencv-${OPENCV_VERSION}/build && \ 59 | cd opencv-${OPENCV_VERSION}/build && \ 60 | cmake -D CMAKE_BUILD_TYPE=RELEASE \ 61 | -D CMAKE_INSTALL_PREFIX=/usr/local \ 62 | -D WITH_CUDA=ON \ 63 | -D CUDA_FAST_MATH=1 \ 64 | -D WITH_CUBLAS=1 \ 65 | -D WITH_FFMPEG=ON \ 66 | -D WITH_OPENCL=ON \ 67 | -D WITH_V4L=ON \ 68 | -D WITH_OPENGL=ON \ 69 | -D OPENCV_EXTRA_MODULES_PATH=/tmp/opencv_contrib-${OPENCV_VERSION}/modules \ 70 | .. && \ 71 | make -j$(nproc) && \ 72 | make install && \ 73 | echo "/usr/local/lib" > /etc/ld.so.conf.d/opencv.conf && \ 74 | ldconfig && \ 75 | cd /tmp && \ 76 | rm -rf opencv-${OPENCV_VERSION} opencv.zip opencv_contrib-${OPENCV_VERSION} opencv_contrib.zip && \ 77 | cd / 78 | 79 | # Compile and install ffmpeg from source 80 | RUN git clone https://github.com/FFmpeg/FFmpeg /root/ffmpeg && \ 81 | cd /root/ffmpeg && \ 82 | ./configure --enable-gpl --enable-libx264 --enable-nonfree --disable-shared --extra-cflags=-I/usr/local/include && \ 83 | make -j8 && make install -j8 84 | 85 | # clone deep-high-resolution-net 86 | ARG POSE_ROOT=/pose_root 87 | RUN git clone https://github.com/leoxiaobin/deep-high-resolution-net.pytorch.git $POSE_ROOT 88 | WORKDIR $POSE_ROOT 89 | RUN mkdir output && mkdir log 90 | 91 | RUN pip3 install -r requirements.txt && \ 92 | pip3 install torch==1.1.0 \ 93 | torchvision==0.3.0 \ 94 | opencv-python \ 95 | pillow==6.2.1 96 | 97 | # build deep-high-resolution-net lib 98 | WORKDIR $POSE_ROOT/lib 99 | RUN make 100 | 101 | # install COCO API 102 | ARG COCOAPI=/cocoapi 103 | RUN git clone https://github.com/cocodataset/cocoapi.git $COCOAPI 104 | WORKDIR $COCOAPI/PythonAPI 105 | # Install into global site-packages 106 | RUN make install 107 | 108 | # download fastrrnn pretrained model for person detection 109 | RUN python -c "import torchvision; model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True); model.eval()" 110 | 111 | COPY inference.py $POSE_ROOT/tools 112 | COPY inference-config.yaml $POSE_ROOT/ 113 | -------------------------------------------------------------------------------- /demo/README.md: -------------------------------------------------------------------------------- 1 | # Inference hrnet 2 | 3 | Inferencing the deep-high-resolution-net.pytoch without using Docker. 4 | 5 | ## Prep 6 | 1. Download the researchers' pretrained pose estimator from [google drive](https://drive.google.com/drive/folders/1hOTihvbyIxsm5ygDpbUuJ7O_tzv4oXjC?usp=sharing) to this directory under `models/` 7 | 2. Put the video file you'd like to infer on in this directory under `videos` 8 | 3. build the docker container in this directory with `./build-docker.sh` (this can take time because it involves compiling opencv) 9 | 4. update the `inference-config.yaml` file to reflect the number of GPUs you have available 10 | 11 | ## Running the Model 12 | ``` 13 | python inference.py --cfg inference-config.yaml \ 14 | --videoFile ../../multi_people.mp4 \ 15 | --writeBoxFrames \ 16 | --outputDir output \ 17 | TEST.MODEL_FILE ../models/pytorch/pose_coco/pose_hrnet_w32_256x192.pth 18 | 19 | ``` 20 | 21 | The above command will create a video under *output* directory and a lot of pose image under *output/pose* directory. 22 | Even with usage of GPU (GTX1080 in my case), the person detection will take nearly **0.06 sec**, the person pose match will 23 | take nearly **0.07 sec**. In total. inference time per frame will be **0.13 sec**, nearly 10fps. So if you prefer a real-time (fps >= 20) 24 | pose estimation then you should try other approach. 25 | 26 | ## Result 27 | 28 | Some output image is as: 29 | 30 | ![1 person](inference_1.jpg) 31 | Fig: 1 person inference 32 | 33 | ![3 person](inference_3.jpg) 34 | Fig: 3 person inference 35 | 36 | ![3 person](inference_5.jpg) 37 | Fig: 3 person inference -------------------------------------------------------------------------------- /demo/build-docker.sh: -------------------------------------------------------------------------------- 1 | docker build -t hrnet_demo_inference . 2 | -------------------------------------------------------------------------------- /demo/demo.sh: -------------------------------------------------------------------------------- 1 | python inference.py --cfg inference-config-w18_v1.yaml \ 2 | --videoFile ./posetest.mp4 \ 3 | --writeBoxFrames \ 4 | --outputDir output \ 5 | TEST.MODEL_FILE ../../output/coco/pose_hrnet/w18_small_v1_256x192_adam_lr1e-3/model_best.pth 6 | -------------------------------------------------------------------------------- /demo/inference-config-w18_v1.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: 'data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 288 33 | - 384 34 | HEATMAP_SIZE: 35 | - 72 36 | - 96 37 | SIGMA: 3 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 2 58 | - 2 59 | NUM_CHANNELS: 60 | - 16 61 | - 32 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 1 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 2 69 | - 2 70 | - 2 71 | NUM_CHANNELS: 72 | - 16 73 | - 32 74 | - 64 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 1 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 2 82 | - 2 83 | - 2 84 | - 2 85 | NUM_CHANNELS: 86 | - 16 87 | - 32 88 | - 64 89 | - 128 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: '../../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /demo/inference-config-w18_v2.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: 'data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 288 33 | - 384 34 | HEATMAP_SIZE: 35 | - 72 36 | - 96 37 | SIGMA: 3 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 2 58 | - 2 59 | NUM_CHANNELS: 60 | - 18 61 | - 36 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 3 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 2 69 | - 2 70 | - 2 71 | NUM_CHANNELS: 72 | - 18 73 | - 36 74 | - 72 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 2 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 2 82 | - 2 83 | - 2 84 | - 2 85 | NUM_CHANNELS: 86 | - 18 87 | - 36 88 | - 72 89 | - 144 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: '../../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /demo/inference-config.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: 'data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 288 33 | - 384 34 | HEATMAP_SIZE: 35 | - 72 36 | - 96 37 | SIGMA: 3 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: '../../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/w18_small_v1_256x192_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: '../output' 9 | LOG_DIR: '../log' 10 | WORKERS: 4 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '../data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w18_small_model_v1.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 192 33 | - 256 34 | HEATMAP_SIZE: 35 | - 48 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 2 58 | - 2 59 | NUM_CHANNELS: 60 | - 16 61 | - 32 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 1 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 2 69 | - 2 70 | - 2 71 | NUM_CHANNELS: 72 | - 16 73 | - 32 74 | - 64 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 1 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 2 82 | - 2 83 | - 2 84 | - 2 85 | NUM_CHANNELS: 86 | - 16 87 | - 32 88 | - 64 89 | - 128 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/w18_small_v2_256x192_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: '../output' 9 | LOG_DIR: '../log' 10 | WORKERS: 4 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '../data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w18_small_model_v2.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 192 33 | - 256 34 | HEATMAP_SIZE: 35 | - 48 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 2 58 | - 2 59 | NUM_CHANNELS: 60 | - 18 61 | - 36 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 3 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 2 69 | - 2 70 | - 2 71 | NUM_CHANNELS: 72 | - 18 73 | - 36 74 | - 72 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 2 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 2 82 | - 2 83 | - 2 84 | - 2 85 | NUM_CHANNELS: 86 | - 18 87 | - 36 88 | - 72 89 | - 144 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/w18_small_v2_256x192_adam_lr1e-3_softargmax.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: '../output' 9 | LOG_DIR: '../log' 10 | WORKERS: 4 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '../data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w18_small_model_v2.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 192 33 | - 256 34 | HEATMAP_SIZE: 35 | - 48 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 2 58 | - 2 59 | NUM_CHANNELS: 60 | - 18 61 | - 36 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 3 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 2 69 | - 2 70 | - 2 71 | NUM_CHANNELS: 72 | - 18 73 | - 36 74 | - 72 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 2 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 2 82 | - 2 83 | - 2 84 | - 2 85 | NUM_CHANNELS: 86 | - 18 87 | - 36 88 | - 72 89 | - 144 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | SOFT_ARGMAX: true 123 | DEBUG: 124 | DEBUG: true 125 | SAVE_BATCH_IMAGES_GT: true 126 | SAVE_BATCH_IMAGES_PRED: true 127 | SAVE_HEATMAPS_GT: true 128 | SAVE_HEATMAPS_PRED: true 129 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: '../output' 9 | LOG_DIR: '../log' 10 | WORKERS: 4 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '../data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 192 33 | - 256 34 | HEATMAP_SIZE: 35 | - 48 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/w32_384x288_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: 'data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 288 33 | - 384 34 | HEATMAP_SIZE: 35 | - 72 36 | - 96 37 | SIGMA: 3 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/w48_256x192_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: 'data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 192 33 | - 256 34 | HEATMAP_SIZE: 35 | - 48 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 48 61 | - 96 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 48 73 | - 96 74 | - 192 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 48 87 | - 96 88 | - 192 89 | - 384 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: 'data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 288 33 | - 384 34 | HEATMAP_SIZE: 35 | - 72 36 | - 96 37 | SIGMA: 3 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 48 61 | - 96 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 48 73 | - 96 74 | - 192 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 48 87 | - 96 88 | - 192 89 | - 384 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 24 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 24 111 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /experiments/coco/lpn/lpn100_256x192_gd256x2_gc.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: false 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: '../output' 9 | LOG_DIR: '../log' 10 | WORKERS: 4 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '../data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | NAME: 'lpn' 27 | PRETRAINED: '' 28 | IMAGE_SIZE: 29 | - 192 30 | - 256 31 | HEATMAP_SIZE: 32 | - 48 33 | - 64 34 | SIGMA: 2 35 | NUM_JOINTS: 17 36 | TARGET_TYPE: 'gaussian' 37 | EXTRA: 38 | ATTENTION: 'GC' 39 | FINAL_CONV_KERNEL: 1 40 | DECONV_WITH_BIAS: false 41 | NUM_DECONV_LAYERS: 2 42 | NUM_DECONV_FILTERS: 43 | - 256 44 | - 256 45 | NUM_DECONV_KERNELS: 46 | - 4 47 | - 4 48 | NUM_LAYERS: 100 49 | LOSS: 50 | USE_TARGET_WEIGHT: true 51 | TRAIN: 52 | BATCH_SIZE_PER_GPU: 32 53 | SHUFFLE: true 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 150 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 90 61 | - 120 62 | WD: 0.0001 63 | GAMMA1: 0.99 64 | GAMMA2: 0.0 65 | MOMENTUM: 0.9 66 | NESTEROV: false 67 | TEST: 68 | BATCH_SIZE_PER_GPU: 32 69 | COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 70 | BBOX_THRE: 1.0 71 | IMAGE_THRE: 0.0 72 | IN_VIS_THRE: 0.2 73 | MODEL_FILE: '' 74 | NMS_THRE: 1.0 75 | OKS_THRE: 0.9 76 | USE_GT_BBOX: true 77 | FLIP_TEST: true 78 | POST_PROCESS: true 79 | SHIFT_HEATMAP: true 80 | SOFT_ARGMAX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /experiments/coco/lpn/lpn101_256x192_gd256x2_gc.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: false 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: '../output' 9 | LOG_DIR: '../log' 10 | WORKERS: 10 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | ROOT: '../data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'lpn' 24 | PRETRAINED: '' 25 | IMAGE_SIZE: 26 | - 192 27 | - 256 28 | HEATMAP_SIZE: 29 | - 48 30 | - 64 31 | SIGMA: 2 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | ATTENTION: 'GC' 36 | FINAL_CONV_KERNEL: 1 37 | DECONV_WITH_BIAS: false 38 | NUM_DECONV_LAYERS: 2 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | NUM_LAYERS: 101 46 | LOSS: 47 | USE_TARGET_WEIGHT: true 48 | TRAIN: 49 | BATCH_SIZE_PER_GPU: 32 50 | SHUFFLE: true 51 | BEGIN_EPOCH: 0 52 | END_EPOCH: 150 53 | OPTIMIZER: 'adam' 54 | LR: 0.001 55 | LR_FACTOR: 0.1 56 | LR_STEP: 57 | - 90 58 | - 120 59 | WD: 0.0001 60 | GAMMA1: 0.99 61 | GAMMA2: 0.0 62 | MOMENTUM: 0.9 63 | NESTEROV: false 64 | TEST: 65 | BATCH_SIZE_PER_GPU: 32 66 | COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 67 | BBOX_THRE: 1.0 68 | IMAGE_THRE: 0.0 69 | IN_VIS_THRE: 0.2 70 | MODEL_FILE: '' 71 | NMS_THRE: 1.0 72 | OKS_THRE: 0.9 73 | FLIP_TEST: true 74 | POST_PROCESS: true 75 | SHIFT_HEATMAP: true 76 | SOFT_ARGMAX: true 77 | DEBUG: 78 | DEBUG: true 79 | SAVE_BATCH_IMAGES_GT: true 80 | SAVE_BATCH_IMAGES_PRED: true 81 | SAVE_HEATMAPS_GT: true 82 | SAVE_HEATMAPS_PRED: true 83 | -------------------------------------------------------------------------------- /experiments/coco/lpn/lpn152_256x192_gd256x2_gc.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: false 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: '../output' 9 | LOG_DIR: '../log' 10 | WORKERS: 10 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | ROOT: '../data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'lpn' 24 | PRETRAINED: '' 25 | IMAGE_SIZE: 26 | - 192 27 | - 256 28 | HEATMAP_SIZE: 29 | - 48 30 | - 64 31 | SIGMA: 2 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | ATTENTION: 'GC' 36 | FINAL_CONV_KERNEL: 1 37 | DECONV_WITH_BIAS: false 38 | NUM_DECONV_LAYERS: 2 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | NUM_LAYERS: 152 46 | LOSS: 47 | USE_TARGET_WEIGHT: true 48 | TRAIN: 49 | BATCH_SIZE_PER_GPU: 32 50 | SHUFFLE: true 51 | BEGIN_EPOCH: 0 52 | END_EPOCH: 150 53 | OPTIMIZER: 'adam' 54 | LR: 0.001 55 | LR_FACTOR: 0.1 56 | LR_STEP: 57 | - 90 58 | - 120 59 | WD: 0.0001 60 | GAMMA1: 0.99 61 | GAMMA2: 0.0 62 | MOMENTUM: 0.9 63 | NESTEROV: false 64 | TEST: 65 | BATCH_SIZE_PER_GPU: 32 66 | COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 67 | BBOX_THRE: 1.0 68 | IMAGE_THRE: 0.0 69 | IN_VIS_THRE: 0.2 70 | MODEL_FILE: '' 71 | NMS_THRE: 1.0 72 | OKS_THRE: 0.9 73 | FLIP_TEST: true 74 | POST_PROCESS: true 75 | SHIFT_HEATMAP: true 76 | SOFT_ARGMAX: true 77 | DEBUG: 78 | DEBUG: false 79 | SAVE_BATCH_IMAGES_GT: true 80 | SAVE_BATCH_IMAGES_PRED: true 81 | SAVE_HEATMAPS_GT: true 82 | SAVE_HEATMAPS_PRED: true 83 | -------------------------------------------------------------------------------- /experiments/coco/lpn/lpn18_256x192_gd256x2_gc.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: false 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: '../output' 9 | LOG_DIR: '../log' 10 | WORKERS: 4 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '../data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | NAME: 'lpn' 27 | PRETRAINED: '' 28 | IMAGE_SIZE: 29 | - 192 30 | - 256 31 | HEATMAP_SIZE: 32 | - 48 33 | - 64 34 | SIGMA: 2 35 | NUM_JOINTS: 17 36 | TARGET_TYPE: 'gaussian' 37 | EXTRA: 38 | ATTENTION: 'GC' 39 | FINAL_CONV_KERNEL: 1 40 | DECONV_WITH_BIAS: false 41 | NUM_DECONV_LAYERS: 2 42 | NUM_DECONV_FILTERS: 43 | - 256 44 | - 256 45 | NUM_DECONV_KERNELS: 46 | - 4 47 | - 4 48 | NUM_LAYERS: 18 49 | LOSS: 50 | USE_TARGET_WEIGHT: true 51 | TRAIN: 52 | BATCH_SIZE_PER_GPU: 32 53 | SHUFFLE: true 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 150 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 90 61 | - 120 62 | WD: 0.0001 63 | GAMMA1: 0.99 64 | GAMMA2: 0.0 65 | MOMENTUM: 0.9 66 | NESTEROV: false 67 | TEST: 68 | BATCH_SIZE_PER_GPU: 32 69 | COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 70 | BBOX_THRE: 1.0 71 | IMAGE_THRE: 0.0 72 | IN_VIS_THRE: 0.2 73 | MODEL_FILE: '' 74 | NMS_THRE: 1.0 75 | OKS_THRE: 0.9 76 | USE_GT_BBOX: true 77 | FLIP_TEST: true 78 | POST_PROCESS: true 79 | SHIFT_HEATMAP: true 80 | SOFT_ARGMAX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /experiments/coco/lpn/lpn18h_256x192_gd256x2_gc.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: false 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: '../output' 9 | LOG_DIR: '../log' 10 | WORKERS: 4 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '../data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | NAME: 'lpn' 27 | PRETRAINED: '' 28 | IMAGE_SIZE: 29 | - 192 30 | - 256 31 | HEATMAP_SIZE: 32 | - 48 33 | - 64 34 | SIGMA: 2 35 | NUM_JOINTS: 17 36 | TARGET_TYPE: 'gaussian' 37 | EXTRA: 38 | ATTENTION: 'GC' 39 | FINAL_CONV_KERNEL: 1 40 | DECONV_WITH_BIAS: false 41 | NUM_DECONV_LAYERS: 2 42 | NUM_DECONV_FILTERS: 43 | - 256 44 | - 256 45 | NUM_DECONV_KERNELS: 46 | - 4 47 | - 4 48 | NUM_LAYERS: 18 49 | LOSS: 50 | USE_TARGET_WEIGHT: true 51 | TRAIN: 52 | BATCH_SIZE_PER_GPU: 32 53 | SHUFFLE: true 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 150 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 90 61 | - 120 62 | WD: 0.0001 63 | GAMMA1: 0.99 64 | GAMMA2: 0.0 65 | MOMENTUM: 0.9 66 | NESTEROV: false 67 | TEST: 68 | BATCH_SIZE_PER_GPU: 32 69 | COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 70 | BBOX_THRE: 1.0 71 | IMAGE_THRE: 0.0 72 | IN_VIS_THRE: 0.2 73 | MODEL_FILE: '' 74 | NMS_THRE: 1.0 75 | OKS_THRE: 0.9 76 | USE_GT_BBOX: true 77 | FLIP_TEST: true 78 | POST_PROCESS: true 79 | SHIFT_HEATMAP: true 80 | SOFT_ARGMAX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /experiments/coco/lpn/lpn34_256x192_gd256x2_gc.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: false 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: '../output' 9 | LOG_DIR: '../log' 10 | WORKERS: 4 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '../data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | NAME: 'lpn' 27 | PRETRAINED: '' 28 | IMAGE_SIZE: 29 | - 192 30 | - 256 31 | HEATMAP_SIZE: 32 | - 48 33 | - 64 34 | SIGMA: 2 35 | NUM_JOINTS: 17 36 | TARGET_TYPE: 'gaussian' 37 | EXTRA: 38 | ATTENTION: 'GC' 39 | FINAL_CONV_KERNEL: 1 40 | DECONV_WITH_BIAS: false 41 | NUM_DECONV_LAYERS: 2 42 | NUM_DECONV_FILTERS: 43 | - 256 44 | - 256 45 | NUM_DECONV_KERNELS: 46 | - 4 47 | - 4 48 | NUM_LAYERS: 34 49 | LOSS: 50 | USE_TARGET_WEIGHT: true 51 | TRAIN: 52 | BATCH_SIZE_PER_GPU: 32 53 | SHUFFLE: true 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 150 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 90 61 | - 120 62 | WD: 0.0001 63 | GAMMA1: 0.99 64 | GAMMA2: 0.0 65 | MOMENTUM: 0.9 66 | NESTEROV: false 67 | TEST: 68 | BATCH_SIZE_PER_GPU: 32 69 | COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 70 | BBOX_THRE: 1.0 71 | IMAGE_THRE: 0.0 72 | IN_VIS_THRE: 0.2 73 | MODEL_FILE: '' 74 | NMS_THRE: 1.0 75 | OKS_THRE: 0.9 76 | USE_GT_BBOX: true 77 | FLIP_TEST: true 78 | POST_PROCESS: true 79 | SHIFT_HEATMAP: true 80 | SOFT_ARGMAX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /experiments/coco/lpn/lpn34h_256x192_gd256x2_gc.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: false 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: '../output' 9 | LOG_DIR: '../log' 10 | WORKERS: 4 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '../data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | NAME: 'lpn' 27 | PRETRAINED: '' 28 | IMAGE_SIZE: 29 | - 192 30 | - 256 31 | HEATMAP_SIZE: 32 | - 48 33 | - 64 34 | SIGMA: 2 35 | NUM_JOINTS: 17 36 | TARGET_TYPE: 'gaussian' 37 | EXTRA: 38 | ATTENTION: 'GC' 39 | FINAL_CONV_KERNEL: 1 40 | DECONV_WITH_BIAS: false 41 | NUM_DECONV_LAYERS: 2 42 | NUM_DECONV_FILTERS: 43 | - 256 44 | - 256 45 | NUM_DECONV_KERNELS: 46 | - 4 47 | - 4 48 | NUM_LAYERS: 34 49 | LOSS: 50 | USE_TARGET_WEIGHT: true 51 | TRAIN: 52 | BATCH_SIZE_PER_GPU: 32 53 | SHUFFLE: true 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 150 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 90 61 | - 120 62 | WD: 0.0001 63 | GAMMA1: 0.99 64 | GAMMA2: 0.0 65 | MOMENTUM: 0.9 66 | NESTEROV: false 67 | TEST: 68 | BATCH_SIZE_PER_GPU: 32 69 | COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 70 | BBOX_THRE: 1.0 71 | IMAGE_THRE: 0.0 72 | IN_VIS_THRE: 0.2 73 | MODEL_FILE: '' 74 | NMS_THRE: 1.0 75 | OKS_THRE: 0.9 76 | USE_GT_BBOX: true 77 | FLIP_TEST: true 78 | POST_PROCESS: true 79 | SHIFT_HEATMAP: true 80 | SOFT_ARGMAX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /experiments/coco/lpn/lpn50_256x192_gd256x2_gc.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: false 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: '../output' 9 | LOG_DIR: '../log' 10 | WORKERS: 4 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '../data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | NAME: 'lpn' 27 | PRETRAINED: '' 28 | IMAGE_SIZE: 29 | - 192 30 | - 256 31 | HEATMAP_SIZE: 32 | - 48 33 | - 64 34 | SIGMA: 2 35 | NUM_JOINTS: 17 36 | TARGET_TYPE: 'gaussian' 37 | EXTRA: 38 | ATTENTION: 'GC' 39 | FINAL_CONV_KERNEL: 1 40 | DECONV_WITH_BIAS: false 41 | NUM_DECONV_LAYERS: 2 42 | NUM_DECONV_FILTERS: 43 | - 256 44 | - 256 45 | NUM_DECONV_KERNELS: 46 | - 4 47 | - 4 48 | NUM_LAYERS: 50 49 | LOSS: 50 | USE_TARGET_WEIGHT: true 51 | TRAIN: 52 | BATCH_SIZE_PER_GPU: 32 53 | SHUFFLE: true 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 150 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 90 61 | - 120 62 | WD: 0.0001 63 | GAMMA1: 0.99 64 | GAMMA2: 0.0 65 | MOMENTUM: 0.9 66 | NESTEROV: false 67 | TEST: 68 | BATCH_SIZE_PER_GPU: 32 69 | COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 70 | BBOX_THRE: 1.0 71 | IMAGE_THRE: 0.0 72 | IN_VIS_THRE: 0.2 73 | MODEL_FILE: '' 74 | NMS_THRE: 1.0 75 | OKS_THRE: 0.9 76 | USE_GT_BBOX: true 77 | FLIP_TEST: true 78 | POST_PROCESS: true 79 | SHIFT_HEATMAP: true 80 | SOFT_ARGMAX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /experiments/coco/resnet/res101_256x192_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth' 25 | IMAGE_SIZE: 26 | - 192 27 | - 256 28 | HEATMAP_SIZE: 29 | - 48 30 | - 64 31 | SIGMA: 2 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 101 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/coco/resnet/res101_384x288_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth' 25 | IMAGE_SIZE: 26 | - 288 27 | - 384 28 | HEATMAP_SIZE: 29 | - 72 30 | - 96 31 | SIGMA: 3 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 101 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/coco/resnet/res152_256x192_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth' 25 | IMAGE_SIZE: 26 | - 192 27 | - 256 28 | HEATMAP_SIZE: 29 | - 48 30 | - 64 31 | SIGMA: 2 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 152 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/coco/resnet/res152_384x288_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth' 25 | IMAGE_SIZE: 26 | - 288 27 | - 384 28 | HEATMAP_SIZE: 29 | - 72 30 | - 96 31 | SIGMA: 3 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 152 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/coco/resnet/res50_256x192_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth' 25 | IMAGE_SIZE: 26 | - 192 27 | - 256 28 | HEATMAP_SIZE: 29 | - 48 30 | - 64 31 | SIGMA: 2 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 50 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/coco/resnet/res50_384x288_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth' 25 | IMAGE_SIZE: 26 | - 288 27 | - 384 28 | HEATMAP_SIZE: 29 | - 72 30 | - 96 31 | SIGMA: 3 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 50 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/mpii/hrnet/w32_256x256_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: '../output' 9 | LOG_DIR: '../log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: '../data/mpii/' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 16 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 256 33 | - 256 34 | HEATMAP_SIZE: 35 | - 64 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | MODEL_FILE: '' 112 | FLIP_TEST: true 113 | POST_PROCESS: true 114 | SHIFT_HEATMAP: true 115 | DEBUG: 116 | DEBUG: true 117 | SAVE_BATCH_IMAGES_GT: true 118 | SAVE_BATCH_IMAGES_PRED: true 119 | SAVE_HEATMAPS_GT: true 120 | SAVE_HEATMAPS_PRED: true 121 | -------------------------------------------------------------------------------- /experiments/mpii/hrnet/w48_256x256_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: '../output' 9 | LOG_DIR: '../log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: '../data/mpii/' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 16 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 256 33 | - 256 34 | HEATMAP_SIZE: 35 | - 64 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 48 61 | - 96 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 48 73 | - 96 74 | - 192 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 48 87 | - 96 88 | - 192 89 | - 384 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | MODEL_FILE: '' 112 | FLIP_TEST: true 113 | POST_PROCESS: true 114 | SHIFT_HEATMAP: true 115 | DEBUG: 116 | DEBUG: true 117 | SAVE_BATCH_IMAGES_GT: true 118 | SAVE_BATCH_IMAGES_PRED: true 119 | SAVE_HEATMAPS_GT: true 120 | SAVE_HEATMAPS_PRED: true 121 | -------------------------------------------------------------------------------- /experiments/mpii/resnet/res101_256x256_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: 'data/mpii/' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | NAME: 'pose_resnet' 27 | PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth' 28 | IMAGE_SIZE: 29 | - 256 30 | - 256 31 | HEATMAP_SIZE: 32 | - 64 33 | - 64 34 | SIGMA: 2 35 | NUM_JOINTS: 16 36 | TARGET_TYPE: 'gaussian' 37 | EXTRA: 38 | FINAL_CONV_KERNEL: 1 39 | DECONV_WITH_BIAS: false 40 | NUM_DECONV_LAYERS: 3 41 | NUM_DECONV_FILTERS: 42 | - 256 43 | - 256 44 | - 256 45 | NUM_DECONV_KERNELS: 46 | - 4 47 | - 4 48 | - 4 49 | NUM_LAYERS: 101 50 | LOSS: 51 | USE_TARGET_WEIGHT: true 52 | TRAIN: 53 | BATCH_SIZE_PER_GPU: 32 54 | SHUFFLE: true 55 | BEGIN_EPOCH: 0 56 | END_EPOCH: 140 57 | OPTIMIZER: 'adam' 58 | LR: 0.001 59 | LR_FACTOR: 0.1 60 | LR_STEP: 61 | - 90 62 | - 120 63 | WD: 0.0001 64 | GAMMA1: 0.99 65 | GAMMA2: 0.0 66 | MOMENTUM: 0.9 67 | NESTEROV: false 68 | TEST: 69 | BATCH_SIZE_PER_GPU: 32 70 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 71 | BBOX_THRE: 1.0 72 | IMAGE_THRE: 0.0 73 | IN_VIS_THRE: 0.2 74 | MODEL_FILE: '' 75 | NMS_THRE: 1.0 76 | OKS_THRE: 0.9 77 | FLIP_TEST: true 78 | POST_PROCESS: true 79 | SHIFT_HEATMAP: true 80 | USE_GT_BBOX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /experiments/mpii/resnet/res152_256x256_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: 'data/mpii/' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | NAME: 'pose_resnet' 27 | PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth' 28 | IMAGE_SIZE: 29 | - 256 30 | - 256 31 | HEATMAP_SIZE: 32 | - 64 33 | - 64 34 | SIGMA: 2 35 | NUM_JOINTS: 16 36 | TARGET_TYPE: 'gaussian' 37 | EXTRA: 38 | FINAL_CONV_KERNEL: 1 39 | DECONV_WITH_BIAS: false 40 | NUM_DECONV_LAYERS: 3 41 | NUM_DECONV_FILTERS: 42 | - 256 43 | - 256 44 | - 256 45 | NUM_DECONV_KERNELS: 46 | - 4 47 | - 4 48 | - 4 49 | NUM_LAYERS: 152 50 | LOSS: 51 | USE_TARGET_WEIGHT: true 52 | TRAIN: 53 | BATCH_SIZE_PER_GPU: 32 54 | SHUFFLE: true 55 | BEGIN_EPOCH: 0 56 | END_EPOCH: 140 57 | OPTIMIZER: 'adam' 58 | LR: 0.001 59 | LR_FACTOR: 0.1 60 | LR_STEP: 61 | - 90 62 | - 120 63 | WD: 0.0001 64 | GAMMA1: 0.99 65 | GAMMA2: 0.0 66 | MOMENTUM: 0.9 67 | NESTEROV: false 68 | TEST: 69 | BATCH_SIZE_PER_GPU: 32 70 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 71 | BBOX_THRE: 1.0 72 | IMAGE_THRE: 0.0 73 | IN_VIS_THRE: 0.2 74 | MODEL_FILE: '' 75 | NMS_THRE: 1.0 76 | OKS_THRE: 0.9 77 | FLIP_TEST: true 78 | POST_PROCESS: true 79 | SHIFT_HEATMAP: true 80 | USE_GT_BBOX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /experiments/mpii/resnet/res50_256x256_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: 'data/mpii/' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | NAME: 'pose_resnet' 27 | PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth' 28 | IMAGE_SIZE: 29 | - 256 30 | - 256 31 | HEATMAP_SIZE: 32 | - 64 33 | - 64 34 | SIGMA: 2 35 | NUM_JOINTS: 16 36 | TARGET_TYPE: 'gaussian' 37 | EXTRA: 38 | FINAL_CONV_KERNEL: 1 39 | DECONV_WITH_BIAS: false 40 | NUM_DECONV_LAYERS: 3 41 | NUM_DECONV_FILTERS: 42 | - 256 43 | - 256 44 | - 256 45 | NUM_DECONV_KERNELS: 46 | - 4 47 | - 4 48 | - 4 49 | NUM_LAYERS: 50 50 | LOSS: 51 | USE_TARGET_WEIGHT: true 52 | TRAIN: 53 | BATCH_SIZE_PER_GPU: 32 54 | SHUFFLE: true 55 | BEGIN_EPOCH: 0 56 | END_EPOCH: 140 57 | OPTIMIZER: 'adam' 58 | LR: 0.001 59 | LR_FACTOR: 0.1 60 | LR_STEP: 61 | - 90 62 | - 120 63 | WD: 0.0001 64 | GAMMA1: 0.99 65 | GAMMA2: 0.0 66 | MOMENTUM: 0.9 67 | NESTEROV: false 68 | TEST: 69 | BATCH_SIZE_PER_GPU: 32 70 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 71 | BBOX_THRE: 1.0 72 | IMAGE_THRE: 0.0 73 | IN_VIS_THRE: 0.2 74 | MODEL_FILE: '' 75 | NMS_THRE: 1.0 76 | OKS_THRE: 0.9 77 | FLIP_TEST: true 78 | POST_PROCESS: true 79 | SHIFT_HEATMAP: true 80 | USE_GT_BBOX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /lib/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | cd nms; python setup_linux.py build_ext --inplace; rm -rf build; cd ../../ 3 | clean: 4 | cd nms; rm *.so; cd ../../ 5 | -------------------------------------------------------------------------------- /lib/config/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from .default import _C as cfg 8 | from .default import update_config 9 | from .models import MODEL_EXTRAS 10 | -------------------------------------------------------------------------------- /lib/config/default.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | 7 | from yacs.config import CfgNode as CN 8 | 9 | 10 | _C = CN() 11 | 12 | _C.OUTPUT_DIR = '' 13 | _C.LOG_DIR = '' 14 | _C.DATA_DIR = '' 15 | _C.GPUS = (0,1,2,3) 16 | _C.WORKERS = 4 17 | _C.PRINT_FREQ = 20 18 | _C.AUTO_RESUME = False 19 | _C.PIN_MEMORY = True 20 | _C.VERBOSE = True 21 | _C.RANK = 0 22 | _C.DIST_BACKEND = 'nccl' 23 | _C.DIST_URL = 'tcp://localhost:23456' 24 | _C.MULTIPROCESSING_DISTRIBUTED = True 25 | 26 | # Cudnn related params 27 | _C.CUDNN = CN() 28 | _C.CUDNN.BENCHMARK = True 29 | _C.CUDNN.DETERMINISTIC = False 30 | _C.CUDNN.ENABLED = True 31 | 32 | # common params for NETWORK 33 | _C.MODEL = CN() 34 | _C.MODEL.NAME = 'pose_hrnet' 35 | _C.MODEL.INIT_WEIGHTS = True 36 | _C.MODEL.PRETRAINED = '' 37 | _C.MODEL.NUM_JOINTS = 17 38 | _C.MODEL.TAG_PER_JOINT = True 39 | _C.MODEL.TARGET_TYPE = 'gaussian' 40 | _C.MODEL.IMAGE_SIZE = [256, 256] # width * height, ex: 192 * 256 41 | _C.MODEL.HEATMAP_SIZE = [64, 64] # width * height, ex: 24 * 32 42 | _C.MODEL.SIGMA = 2 43 | _C.MODEL.EXTRA = CN(new_allowed=True) 44 | _C.MODEL.SYNC_BN = False 45 | 46 | _C.LOSS = CN() 47 | _C.LOSS.USE_OHKM = False 48 | _C.LOSS.TOPK = 8 49 | _C.LOSS.USE_TARGET_WEIGHT = True 50 | _C.LOSS.USE_DIFFERENT_JOINTS_WEIGHT = False 51 | 52 | # DATASET related params 53 | _C.DATASET = CN() 54 | _C.DATASET.ROOT = '' 55 | _C.DATASET.DATASET = 'mpii' 56 | _C.DATASET.TRAIN_SET = 'train' 57 | _C.DATASET.TEST_SET = 'valid' 58 | _C.DATASET.DATA_FORMAT = 'jpg' 59 | _C.DATASET.HYBRID_JOINTS_TYPE = '' 60 | _C.DATASET.SELECT_DATA = False 61 | 62 | # training data augmentation 63 | _C.DATASET.FLIP = True 64 | _C.DATASET.SCALE_FACTOR = 0.25 65 | _C.DATASET.ROT_FACTOR = 30 66 | _C.DATASET.PROB_HALF_BODY = 0.0 67 | _C.DATASET.NUM_JOINTS_HALF_BODY = 8 68 | _C.DATASET.COLOR_RGB = False 69 | 70 | # train 71 | _C.TRAIN = CN() 72 | 73 | _C.TRAIN.LR_FACTOR = 0.1 74 | _C.TRAIN.LR_STEP = [90, 110] 75 | _C.TRAIN.LR = 0.001 76 | 77 | _C.TRAIN.OPTIMIZER = 'adam' 78 | _C.TRAIN.MOMENTUM = 0.9 79 | _C.TRAIN.WD = 0.0001 80 | _C.TRAIN.NESTEROV = False 81 | _C.TRAIN.GAMMA1 = 0.99 82 | _C.TRAIN.GAMMA2 = 0.0 83 | 84 | _C.TRAIN.BEGIN_EPOCH = 0 85 | _C.TRAIN.END_EPOCH = 140 86 | 87 | _C.TRAIN.RESUME = False 88 | _C.TRAIN.CHECKPOINT = '' 89 | 90 | _C.TRAIN.BATCH_SIZE_PER_GPU = 32 91 | _C.TRAIN.SHUFFLE = True 92 | 93 | # testing 94 | _C.TEST = CN() 95 | 96 | # size of images for each device 97 | _C.TEST.BATCH_SIZE_PER_GPU = 32 98 | # Test Model Epoch 99 | _C.TEST.FLIP_TEST = False 100 | _C.TEST.POST_PROCESS = False 101 | _C.TEST.SHIFT_HEATMAP = False 102 | 103 | _C.TEST.USE_GT_BBOX = False 104 | 105 | # nms 106 | _C.TEST.IMAGE_THRE = 0.1 107 | _C.TEST.NMS_THRE = 0.6 108 | _C.TEST.SOFT_NMS = False 109 | _C.TEST.OKS_THRE = 0.5 110 | _C.TEST.IN_VIS_THRE = 0.0 111 | _C.TEST.COCO_BBOX_FILE = '' 112 | _C.TEST.BBOX_THRE = 1.0 113 | _C.TEST.MODEL_FILE = '' 114 | 115 | # soft_argmax 116 | _C.TEST.SOFT_ARGMAX = False 117 | _C.TEST.BIAS = 0.0 118 | 119 | # debug 120 | _C.DEBUG = CN() 121 | _C.DEBUG.DEBUG = False 122 | _C.DEBUG.SAVE_BATCH_IMAGES_GT = False 123 | _C.DEBUG.SAVE_BATCH_IMAGES_PRED = False 124 | _C.DEBUG.SAVE_HEATMAPS_GT = False 125 | _C.DEBUG.SAVE_HEATMAPS_PRED = False 126 | 127 | 128 | def update_config(cfg, args): 129 | cfg.defrost() 130 | cfg.merge_from_file(args.cfg) 131 | cfg.merge_from_list(args.opts) 132 | 133 | cfg.DATASET.ROOT = os.path.join( 134 | cfg.DATA_DIR, cfg.DATASET.ROOT 135 | ) 136 | 137 | cfg.MODEL.PRETRAINED = os.path.join( 138 | cfg.DATA_DIR, cfg.MODEL.PRETRAINED 139 | ) 140 | 141 | if cfg.TEST.MODEL_FILE: 142 | cfg.TEST.MODEL_FILE = os.path.join( 143 | cfg.DATA_DIR, cfg.TEST.MODEL_FILE 144 | ) 145 | 146 | cfg.freeze() 147 | 148 | 149 | if __name__ == '__main__': 150 | import sys 151 | with open(sys.argv[1], 'w') as f: 152 | print(_C, file=f) 153 | 154 | -------------------------------------------------------------------------------- /lib/config/models.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from yacs.config import CfgNode as CN 12 | 13 | 14 | # pose_resnet related params 15 | POSE_RESNET = CN() 16 | POSE_RESNET.NUM_LAYERS = 50 17 | POSE_RESNET.DECONV_WITH_BIAS = False 18 | POSE_RESNET.NUM_DECONV_LAYERS = 3 19 | POSE_RESNET.NUM_DECONV_FILTERS = [256, 256, 256] 20 | POSE_RESNET.NUM_DECONV_KERNELS = [4, 4, 4] 21 | POSE_RESNET.FINAL_CONV_KERNEL = 1 22 | POSE_RESNET.PRETRAINED_LAYERS = ['*'] 23 | 24 | # pose_multi_resoluton_net related params 25 | POSE_HIGH_RESOLUTION_NET = CN() 26 | POSE_HIGH_RESOLUTION_NET.PRETRAINED_LAYERS = ['*'] 27 | POSE_HIGH_RESOLUTION_NET.STEM_INPLANES = 64 28 | POSE_HIGH_RESOLUTION_NET.FINAL_CONV_KERNEL = 1 29 | 30 | POSE_HIGH_RESOLUTION_NET.STAGE2 = CN() 31 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_MODULES = 1 32 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BRANCHES = 2 33 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BLOCKS = [4, 4] 34 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_CHANNELS = [32, 64] 35 | POSE_HIGH_RESOLUTION_NET.STAGE2.BLOCK = 'BASIC' 36 | POSE_HIGH_RESOLUTION_NET.STAGE2.FUSE_METHOD = 'SUM' 37 | 38 | POSE_HIGH_RESOLUTION_NET.STAGE3 = CN() 39 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_MODULES = 1 40 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BRANCHES = 3 41 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BLOCKS = [4, 4, 4] 42 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_CHANNELS = [32, 64, 128] 43 | POSE_HIGH_RESOLUTION_NET.STAGE3.BLOCK = 'BASIC' 44 | POSE_HIGH_RESOLUTION_NET.STAGE3.FUSE_METHOD = 'SUM' 45 | 46 | POSE_HIGH_RESOLUTION_NET.STAGE4 = CN() 47 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_MODULES = 1 48 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BRANCHES = 4 49 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] 50 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256] 51 | POSE_HIGH_RESOLUTION_NET.STAGE4.BLOCK = 'BASIC' 52 | POSE_HIGH_RESOLUTION_NET.STAGE4.FUSE_METHOD = 'SUM' 53 | 54 | 55 | MODEL_EXTRAS = { 56 | 'pose_resnet': POSE_RESNET, 57 | 'pose_high_resolution_net': POSE_HIGH_RESOLUTION_NET, 58 | } 59 | -------------------------------------------------------------------------------- /lib/core/evaluate.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | 13 | from core.inference import get_max_preds 14 | 15 | 16 | def calc_dists(preds, target, normalize): 17 | preds = preds.astype(np.float32) 18 | target = target.astype(np.float32) 19 | dists = np.zeros((preds.shape[1], preds.shape[0])) 20 | for n in range(preds.shape[0]): 21 | for c in range(preds.shape[1]): 22 | if target[n, c, 0] > 1 and target[n, c, 1] > 1: 23 | normed_preds = preds[n, c, :] / normalize[n] 24 | normed_targets = target[n, c, :] / normalize[n] 25 | dists[c, n] = np.linalg.norm(normed_preds - normed_targets) 26 | else: 27 | dists[c, n] = -1 28 | return dists 29 | 30 | 31 | def dist_acc(dists, thr=0.5): 32 | ''' Return percentage below threshold while ignoring values with a -1 ''' 33 | dist_cal = np.not_equal(dists, -1) 34 | num_dist_cal = dist_cal.sum() 35 | if num_dist_cal > 0: 36 | return np.less(dists[dist_cal], thr).sum() * 1.0 / num_dist_cal 37 | else: 38 | return -1 39 | 40 | 41 | def accuracy(output, target, hm_type='gaussian', thr=0.5): 42 | ''' 43 | Calculate accuracy according to PCK, 44 | but uses ground truth heatmap rather than x,y locations 45 | First value to be returned is average accuracy across 'idxs', 46 | followed by individual accuracies 47 | ''' 48 | idx = list(range(output.shape[1])) 49 | norm = 1.0 50 | if hm_type == 'gaussian': 51 | pred, _ = get_max_preds(output) 52 | target, _ = get_max_preds(target) 53 | h = output.shape[2] 54 | w = output.shape[3] 55 | norm = np.ones((pred.shape[0], 2)) * np.array([h, w]) / 10 56 | dists = calc_dists(pred, target, norm) 57 | 58 | acc = np.zeros((len(idx) + 1)) 59 | avg_acc = 0 60 | cnt = 0 61 | 62 | for i in range(len(idx)): 63 | acc[i + 1] = dist_acc(dists[idx[i]]) 64 | if acc[i + 1] >= 0: 65 | avg_acc = avg_acc + acc[i + 1] 66 | cnt += 1 67 | 68 | avg_acc = avg_acc / cnt if cnt != 0 else 0 69 | if cnt != 0: 70 | acc[0] = avg_acc 71 | return acc, avg_acc, cnt, pred 72 | 73 | 74 | -------------------------------------------------------------------------------- /lib/core/function.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | # ------------------------------------------------------------------------------ 8 | # Updated by cavalleria (cavalleria@gmail.com) 9 | # ------------------------------------------------------------------------------ 10 | 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import time 16 | import logging 17 | import os 18 | 19 | import numpy as np 20 | import torch 21 | 22 | from core.evaluate import accuracy 23 | from core.inference import get_final_preds, get_final_preds_using_softargmax 24 | from utils.transforms import flip_back 25 | from utils.vis import save_debug_images 26 | from tqdm import tqdm 27 | 28 | logger = logging.getLogger(__name__) 29 | 30 | 31 | def train(config, train_loader, model, criterion, optimizer, epoch, 32 | output_dir, tb_log_dir, writer_dict): 33 | batch_time = AverageMeter() 34 | data_time = AverageMeter() 35 | losses = AverageMeter() 36 | acc = AverageMeter() 37 | 38 | # switch to train mode 39 | model.train() 40 | 41 | end = time.time() 42 | i = 0 43 | for (input, target, target_weight, meta) in tqdm(iter(train_loader)): 44 | # measure data loading time 45 | data_time.update(time.time() - end) 46 | # compute output 47 | outputs = model(input) 48 | target = target.cuda(non_blocking=True) 49 | target_weight = target_weight.cuda(non_blocking=True) 50 | 51 | if isinstance(outputs, list): 52 | loss = criterion(outputs[0], target, target_weight) 53 | for output in outputs[1:]: 54 | loss += criterion(output, target, target_weight) 55 | else: 56 | output = outputs 57 | loss = criterion(output, target, target_weight) 58 | # loss = criterion(output, target, target_weight) 59 | 60 | # compute gradient and do update step 61 | optimizer.zero_grad() 62 | loss.backward() 63 | optimizer.step() 64 | 65 | # measure accuracy and record loss 66 | losses.update(loss.item(), input.size(0)) 67 | 68 | _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(), 69 | target.detach().cpu().numpy()) 70 | acc.update(avg_acc, cnt) 71 | 72 | # measure elapsed time 73 | batch_time.update(time.time() - end) 74 | end = time.time() 75 | 76 | if i % config.PRINT_FREQ == 0: 77 | msg = 'Epoch: [{0}][{1}/{2}]\t' \ 78 | 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 79 | 'Speed {speed:.1f} samples/s\t' \ 80 | 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 81 | 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 82 | 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( 83 | epoch, i, len(train_loader), batch_time=batch_time, 84 | speed=input.size(0)/batch_time.val, 85 | data_time=data_time, loss=losses, acc=acc) 86 | logger.info(msg) 87 | 88 | writer = writer_dict['writer'] 89 | global_steps = writer_dict['train_global_steps'] 90 | writer.add_scalar('train_loss', losses.val, global_steps) 91 | writer.add_scalar('train_acc', acc.val, global_steps) 92 | writer_dict['train_global_steps'] = global_steps + 1 93 | 94 | prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) 95 | save_debug_images(config, input, meta, target, pred*4, output, prefix) 96 | i += 1 97 | 98 | def validate(args, config, val_loader, val_dataset, model, criterion, output_dir, 99 | tb_log_dir, writer_dict=None): 100 | batch_time = AverageMeter() 101 | losses = AverageMeter() 102 | acc = AverageMeter() 103 | 104 | # switch to evaluate mode 105 | model.eval() 106 | 107 | num_samples = len(val_dataset) 108 | all_preds = np.zeros( 109 | (num_samples, config.MODEL.NUM_JOINTS, 3), 110 | dtype=np.float32 111 | ) 112 | all_boxes = np.zeros((num_samples, 6)) 113 | image_path = [] 114 | filenames = [] 115 | imgnums = [] 116 | idx = 0 117 | with torch.no_grad(): 118 | end = time.time() 119 | for i, (input, target, target_weight, meta) in enumerate(val_loader): 120 | # compute output 121 | outputs = model(input) 122 | if isinstance(outputs, list): 123 | output = outputs[-1] 124 | else: 125 | output = outputs 126 | 127 | if config.TEST.FLIP_TEST: 128 | input_flipped = input.flip(3) 129 | outputs_flipped = model(input_flipped) 130 | 131 | if isinstance(outputs_flipped, list): 132 | output_flipped = outputs_flipped[-1] 133 | else: 134 | output_flipped = outputs_flipped 135 | 136 | output_flipped = flip_back(output_flipped.cpu().numpy(), 137 | val_dataset.flip_pairs) 138 | output_flipped = torch.from_numpy(output_flipped.copy()).cuda() 139 | 140 | 141 | # feature is not aligned, shift flipped heatmap for higher accuracy 142 | if config.TEST.SHIFT_HEATMAP: 143 | output_flipped[:, :, :, 1:] = \ 144 | output_flipped.clone()[:, :, :, 0:-1] 145 | 146 | output = (output + output_flipped) * 0.5 147 | 148 | target = target.cuda(non_blocking=True) 149 | target_weight = target_weight.cuda(non_blocking=True) 150 | 151 | loss = criterion(output, target, target_weight) 152 | 153 | num_images = input.size(0) 154 | # measure accuracy and record loss 155 | losses.update(loss.item(), num_images) 156 | _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), 157 | target.cpu().numpy()) 158 | 159 | acc.update(avg_acc, cnt) 160 | 161 | # measure elapsed time 162 | batch_time.update(time.time() - end) 163 | end = time.time() 164 | 165 | c = meta['center'].numpy() 166 | s = meta['scale'].numpy() 167 | score = meta['score'].numpy() 168 | 169 | #preds, maxvals = get_final_preds(config, output.clone().cpu().numpy(), c, s) 170 | 171 | if config.TEST.SOFT_ARGMAX: 172 | preds, maxvals = get_final_preds_using_softargmax(config, output.clone(), c, s) 173 | else: 174 | preds, maxvals = get_final_preds(config, output.clone().cpu().numpy(), c, s) 175 | 176 | preds = preds - config.TEST.BIAS 177 | 178 | 179 | all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] 180 | all_preds[idx:idx + num_images, :, 2:3] = maxvals 181 | # double check this all_boxes parts 182 | all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] 183 | all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] 184 | all_boxes[idx:idx + num_images, 4] = np.prod(s*200, 1) 185 | all_boxes[idx:idx + num_images, 5] = score 186 | image_path.extend(meta['image']) 187 | 188 | idx += num_images 189 | 190 | if i % config.PRINT_FREQ == 0: 191 | msg = 'Test: [{0}/{1}]\t' \ 192 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 193 | 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 194 | 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( 195 | i, len(val_loader), batch_time=batch_time, 196 | loss=losses, acc=acc) 197 | logger.info(msg) 198 | 199 | prefix = '{}_{}'.format( 200 | os.path.join(output_dir, 'val'), i 201 | ) 202 | save_debug_images(config, input, meta, target, pred*4, output, prefix) 203 | 204 | name_values, perf_indicator = val_dataset.evaluate( 205 | args, config, all_preds, output_dir, all_boxes, image_path, filenames, imgnums) 206 | 207 | 208 | model_name = config.MODEL.NAME 209 | if isinstance(name_values, list): 210 | for name_value in name_values: 211 | _print_name_value(name_value, model_name) 212 | else: 213 | _print_name_value(name_values, model_name) 214 | 215 | if writer_dict: 216 | writer = writer_dict['writer'] 217 | global_steps = writer_dict['valid_global_steps'] 218 | writer.add_scalar( 219 | 'valid_loss', 220 | losses.avg, 221 | global_steps 222 | ) 223 | writer.add_scalar( 224 | 'valid_acc', 225 | acc.avg, 226 | global_steps 227 | ) 228 | if isinstance(name_values, list): 229 | for name_value in name_values: 230 | writer.add_scalars( 231 | 'valid', 232 | dict(name_value), 233 | global_steps 234 | ) 235 | else: 236 | writer.add_scalars( 237 | 'valid', 238 | dict(name_values), 239 | global_steps 240 | ) 241 | writer_dict['valid_global_steps'] = global_steps + 1 242 | 243 | return perf_indicator 244 | 245 | 246 | # markdown format output 247 | def _print_name_value(name_value, full_arch_name): 248 | names = name_value.keys() 249 | values = name_value.values() 250 | num_values = len(name_value) 251 | logger.info( 252 | '| Arch ' + 253 | ' '.join(['| {}'.format(name) for name in names]) + 254 | ' |' 255 | ) 256 | logger.info('|---' * (num_values+1) + '|') 257 | 258 | if len(full_arch_name) > 15: 259 | full_arch_name = full_arch_name[:8] + '...' 260 | logger.info( 261 | '| ' + full_arch_name + ' ' + 262 | ' '.join(['| {:.3f}'.format(value) for value in values]) + 263 | ' |' 264 | ) 265 | 266 | 267 | class AverageMeter(object): 268 | """Computes and stores the average and current value""" 269 | def __init__(self): 270 | self.reset() 271 | 272 | def reset(self): 273 | self.val = 0 274 | self.avg = 0 275 | self.sum = 0 276 | self.count = 0 277 | 278 | def update(self, val, n=1): 279 | self.val = val 280 | self.sum += val * n 281 | self.count += n 282 | self.avg = self.sum / self.count if self.count != 0 else 0 283 | -------------------------------------------------------------------------------- /lib/core/inference.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | # ------------------------------------------------------------------------------ 8 | # Updated by cavalleria (cavalleria@gmail.com) 9 | # ------------------------------------------------------------------------------ 10 | 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import math 16 | 17 | import numpy as np 18 | import torch 19 | import torch.nn as nn 20 | from utils.transforms import transform_preds 21 | 22 | 23 | def get_max_preds(batch_heatmaps): 24 | ''' 25 | get predictions from score maps 26 | heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) 27 | ''' 28 | assert isinstance(batch_heatmaps, np.ndarray), \ 29 | 'batch_heatmaps should be numpy.ndarray' 30 | assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim' 31 | 32 | batch_size = batch_heatmaps.shape[0] 33 | num_joints = batch_heatmaps.shape[1] 34 | width = batch_heatmaps.shape[3] 35 | heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) 36 | idx = np.argmax(heatmaps_reshaped, 2) 37 | maxvals = np.amax(heatmaps_reshaped, 2) 38 | 39 | maxvals = maxvals.reshape((batch_size, num_joints, 1)) 40 | idx = idx.reshape((batch_size, num_joints, 1)) 41 | 42 | preds = np.tile(idx, (1, 1, 2)).astype(np.float32) 43 | 44 | preds[:, :, 0] = (preds[:, :, 0]) % width 45 | preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) 46 | 47 | pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) 48 | pred_mask = pred_mask.astype(np.float32) 49 | 50 | preds *= pred_mask 51 | return preds, maxvals 52 | 53 | 54 | def get_final_preds(config, batch_heatmaps, center, scale): 55 | coords, maxvals = get_max_preds(batch_heatmaps) 56 | 57 | heatmap_height = batch_heatmaps.shape[2] 58 | heatmap_width = batch_heatmaps.shape[3] 59 | 60 | # post-processing 61 | if config.TEST.POST_PROCESS: 62 | for n in range(coords.shape[0]): 63 | for p in range(coords.shape[1]): 64 | hm = batch_heatmaps[n][p] 65 | px = int(math.floor(coords[n][p][0] + 0.5)) 66 | py = int(math.floor(coords[n][p][1] + 0.5)) 67 | if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1: 68 | diff = np.array( 69 | [ 70 | hm[py][px + 1] - hm[py][px - 1], 71 | hm[py + 1][px] - hm[py - 1][px] 72 | ] 73 | ) 74 | coords[n][p] += np.sign(diff) * .25 75 | 76 | preds = coords.copy() 77 | 78 | # Transform back 79 | for i in range(coords.shape[0]): 80 | preds[i] = transform_preds( 81 | coords[i], center[i], scale[i], [heatmap_width, heatmap_height] 82 | ) 83 | 84 | return preds, maxvals 85 | 86 | class SoftArgmax2D(nn.Module): 87 | def __init__(self, height=64, width=48, beta=100): 88 | super(SoftArgmax2D, self).__init__() 89 | self.softmax = nn.Softmax(dim=-1) 90 | self.beta = beta 91 | # Note that meshgrid in pytorch behaves differently with numpy. 92 | self.WY, self.WX = torch.meshgrid(torch.arange(height, dtype=torch.float), 93 | torch.arange(width, dtype=torch.float)) 94 | 95 | def forward(self, x): 96 | b, c, h, w = x.shape 97 | device = x.device 98 | 99 | probs = self.softmax(x.view(b, c, -1) * self.beta) 100 | probs = probs.view(b, c, h, w) 101 | 102 | self.WY = self.WY.to(device) 103 | self.WX = self.WX.to(device) 104 | 105 | px = torch.sum(probs * self.WX, dim=(2, 3)) 106 | py = torch.sum(probs * self.WY, dim=(2, 3)) 107 | preds = torch.stack((px, py), dim=-1).cpu().numpy() 108 | 109 | idx = np.round(preds).astype(np.int32) 110 | maxvals = np.zeros(shape=(b, c, 1)) 111 | for bi in range(b): 112 | for ci in range(c): 113 | maxvals[bi, ci, 0] = x[bi, ci, idx[bi, ci, 1], idx[bi, ci, 0]] 114 | 115 | return preds, maxvals 116 | 117 | def get_final_preds_using_softargmax(config, batch_heatmaps, center, scale): 118 | soft_argmax = SoftArgmax2D(config.MODEL.HEATMAP_SIZE[1], config.MODEL.HEATMAP_SIZE[0], beta=160) 119 | coords, maxvals = soft_argmax(batch_heatmaps) 120 | 121 | heatmap_height = batch_heatmaps.shape[2] 122 | heatmap_width = batch_heatmaps.shape[3] 123 | 124 | batch_heatmaps = batch_heatmaps.cpu().numpy() 125 | 126 | # post-processing 127 | if config.TEST.POST_PROCESS: 128 | for n in range(coords.shape[0]): 129 | for p in range(coords.shape[1]): 130 | hm = batch_heatmaps[n][p] 131 | px = int(math.floor(coords[n][p][0] + 0.5)) 132 | py = int(math.floor(coords[n][p][1] + 0.5)) 133 | if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1: 134 | diff = np.array( 135 | [ 136 | hm[py][px + 1] - hm[py][px - 1], 137 | hm[py + 1][px] - hm[py - 1][px] 138 | ] 139 | ) 140 | coords[n][p] += np.sign(diff) * .25 141 | 142 | preds = coords.copy() 143 | 144 | # Transform back 145 | for i in range(coords.shape[0]): 146 | preds[i] = transform_preds( 147 | coords[i], center[i], scale[i], [heatmap_width, heatmap_height] 148 | ) 149 | 150 | return preds, maxvals -------------------------------------------------------------------------------- /lib/core/loss.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import torch 12 | import torch.nn as nn 13 | 14 | 15 | class JointsMSELoss(nn.Module): 16 | def __init__(self, use_target_weight): 17 | super(JointsMSELoss, self).__init__() 18 | self.criterion = nn.MSELoss(reduction='mean') 19 | self.use_target_weight = use_target_weight 20 | 21 | def forward(self, output, target, target_weight): 22 | batch_size = output.size(0) 23 | num_joints = output.size(1) 24 | heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1) 25 | heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1) 26 | loss = 0 27 | 28 | for idx in range(num_joints): 29 | heatmap_pred = heatmaps_pred[idx].squeeze() 30 | heatmap_gt = heatmaps_gt[idx].squeeze() 31 | if self.use_target_weight: 32 | loss += 0.5 * self.criterion( 33 | heatmap_pred.mul(target_weight[:, idx]), 34 | heatmap_gt.mul(target_weight[:, idx]) 35 | ) 36 | else: 37 | loss += 0.5 * self.criterion(heatmap_pred, heatmap_gt) 38 | 39 | return loss / num_joints 40 | 41 | 42 | class JointsOHKMMSELoss(nn.Module): 43 | def __init__(self, use_target_weight, topk=8): 44 | super(JointsOHKMMSELoss, self).__init__() 45 | self.criterion = nn.MSELoss(reduction='none') 46 | self.use_target_weight = use_target_weight 47 | self.topk = topk 48 | 49 | def ohkm(self, loss): 50 | ohkm_loss = 0. 51 | for i in range(loss.size()[0]): 52 | sub_loss = loss[i] 53 | topk_val, topk_idx = torch.topk( 54 | sub_loss, k=self.topk, dim=0, sorted=False 55 | ) 56 | tmp_loss = torch.gather(sub_loss, 0, topk_idx) 57 | ohkm_loss += torch.sum(tmp_loss) / self.topk 58 | ohkm_loss /= loss.size()[0] 59 | return ohkm_loss 60 | 61 | def forward(self, output, target, target_weight): 62 | batch_size = output.size(0) 63 | num_joints = output.size(1) 64 | heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1) 65 | heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1) 66 | 67 | loss = [] 68 | for idx in range(num_joints): 69 | heatmap_pred = heatmaps_pred[idx].squeeze() 70 | heatmap_gt = heatmaps_gt[idx].squeeze() 71 | if self.use_target_weight: 72 | loss.append(0.5 * self.criterion( 73 | heatmap_pred.mul(target_weight[:, idx]), 74 | heatmap_gt.mul(target_weight[:, idx]) 75 | )) 76 | else: 77 | loss.append( 78 | 0.5 * self.criterion(heatmap_pred, heatmap_gt) 79 | ) 80 | 81 | loss = [l.mean(dim=1).unsqueeze(dim=1) for l in loss] 82 | loss = torch.cat(loss, dim=1) 83 | 84 | return self.ohkm(loss) 85 | -------------------------------------------------------------------------------- /lib/dataset/JointsDataset.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import copy 12 | import logging 13 | import random 14 | 15 | import cv2 16 | import numpy as np 17 | import torch 18 | from torch.utils.data import Dataset 19 | 20 | from utils.transforms import get_affine_transform 21 | from utils.transforms import affine_transform 22 | from utils.transforms import fliplr_joints 23 | 24 | 25 | logger = logging.getLogger(__name__) 26 | 27 | 28 | class JointsDataset(Dataset): 29 | def __init__(self, cfg, root, image_set, is_train, transform=None): 30 | self.num_joints = 0 31 | self.pixel_std = 200 32 | self.flip_pairs = [] 33 | self.parent_ids = [] 34 | 35 | self.is_train = is_train 36 | self.root = root 37 | self.image_set = image_set 38 | 39 | self.output_path = cfg.OUTPUT_DIR 40 | self.data_format = cfg.DATASET.DATA_FORMAT 41 | 42 | self.scale_factor = cfg.DATASET.SCALE_FACTOR 43 | self.rotation_factor = cfg.DATASET.ROT_FACTOR 44 | self.flip = cfg.DATASET.FLIP 45 | self.num_joints_half_body = cfg.DATASET.NUM_JOINTS_HALF_BODY 46 | self.prob_half_body = cfg.DATASET.PROB_HALF_BODY 47 | self.color_rgb = cfg.DATASET.COLOR_RGB 48 | 49 | self.target_type = cfg.MODEL.TARGET_TYPE 50 | self.image_size = np.array(cfg.MODEL.IMAGE_SIZE) 51 | self.heatmap_size = np.array(cfg.MODEL.HEATMAP_SIZE) 52 | self.sigma = cfg.MODEL.SIGMA 53 | self.use_different_joints_weight = cfg.LOSS.USE_DIFFERENT_JOINTS_WEIGHT 54 | self.joints_weight = 1 55 | 56 | self.transform = transform 57 | self.db = [] 58 | 59 | def _get_db(self): 60 | raise NotImplementedError 61 | 62 | def evaluate(self, cfg, preds, output_dir, *args, **kwargs): 63 | raise NotImplementedError 64 | 65 | def half_body_transform(self, joints, joints_vis): 66 | upper_joints = [] 67 | lower_joints = [] 68 | for joint_id in range(self.num_joints): 69 | if joints_vis[joint_id][0] > 0: 70 | if joint_id in self.upper_body_ids: 71 | upper_joints.append(joints[joint_id]) 72 | else: 73 | lower_joints.append(joints[joint_id]) 74 | 75 | if np.random.randn() < 0.5 and len(upper_joints) > 2: 76 | selected_joints = upper_joints 77 | else: 78 | selected_joints = lower_joints \ 79 | if len(lower_joints) > 2 else upper_joints 80 | 81 | if len(selected_joints) < 2: 82 | return None, None 83 | 84 | selected_joints = np.array(selected_joints, dtype=np.float32) 85 | center = selected_joints.mean(axis=0)[:2] 86 | 87 | left_top = np.amin(selected_joints, axis=0) 88 | right_bottom = np.amax(selected_joints, axis=0) 89 | 90 | w = right_bottom[0] - left_top[0] 91 | h = right_bottom[1] - left_top[1] 92 | 93 | if w > self.aspect_ratio * h: 94 | h = w * 1.0 / self.aspect_ratio 95 | elif w < self.aspect_ratio * h: 96 | w = h * self.aspect_ratio 97 | 98 | scale = np.array( 99 | [ 100 | w * 1.0 / self.pixel_std, 101 | h * 1.0 / self.pixel_std 102 | ], 103 | dtype=np.float32 104 | ) 105 | 106 | scale = scale * 1.5 107 | 108 | return center, scale 109 | 110 | def __len__(self,): 111 | return len(self.db) 112 | 113 | def __getitem__(self, idx): 114 | db_rec = copy.deepcopy(self.db[idx]) 115 | 116 | image_file = db_rec['image'] 117 | filename = db_rec['filename'] if 'filename' in db_rec else '' 118 | imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' 119 | 120 | if self.data_format == 'zip': 121 | from utils import zipreader 122 | data_numpy = zipreader.imread( 123 | image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION 124 | ) 125 | else: 126 | data_numpy = cv2.imread( 127 | image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION 128 | ) 129 | 130 | if self.color_rgb: 131 | data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) 132 | 133 | if data_numpy is None: 134 | logger.error('=> fail to read {}'.format(image_file)) 135 | raise ValueError('Fail to read {}'.format(image_file)) 136 | 137 | joints = db_rec['joints_3d'] 138 | joints_vis = db_rec['joints_3d_vis'] 139 | 140 | c = db_rec['center'] 141 | s = db_rec['scale'] 142 | score = db_rec['score'] if 'score' in db_rec else 1 143 | r = 0 144 | 145 | if self.is_train: 146 | if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body 147 | and np.random.rand() < self.prob_half_body): 148 | c_half_body, s_half_body = self.half_body_transform( 149 | joints, joints_vis 150 | ) 151 | 152 | if c_half_body is not None and s_half_body is not None: 153 | c, s = c_half_body, s_half_body 154 | 155 | sf = self.scale_factor 156 | rf = self.rotation_factor 157 | s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) 158 | r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ 159 | if random.random() <= 0.6 else 0 160 | 161 | if self.flip and random.random() <= 0.5: 162 | data_numpy = data_numpy[:, ::-1, :] 163 | joints, joints_vis = fliplr_joints( 164 | joints, joints_vis, data_numpy.shape[1], self.flip_pairs) 165 | c[0] = data_numpy.shape[1] - c[0] - 1 166 | 167 | trans = get_affine_transform(c, s, r, self.image_size) 168 | input = cv2.warpAffine( 169 | data_numpy, 170 | trans, 171 | (int(self.image_size[0]), int(self.image_size[1])), 172 | flags=cv2.INTER_LINEAR) 173 | 174 | if self.transform: 175 | input = self.transform(input) 176 | 177 | for i in range(self.num_joints): 178 | if joints_vis[i, 0] > 0.0: 179 | joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) 180 | 181 | target, target_weight = self.generate_target(joints, joints_vis) 182 | 183 | target = torch.from_numpy(target) 184 | target_weight = torch.from_numpy(target_weight) 185 | 186 | meta = { 187 | 'image': image_file, 188 | 'filename': filename, 189 | 'imgnum': imgnum, 190 | 'joints': joints, 191 | 'joints_vis': joints_vis, 192 | 'center': c, 193 | 'scale': s, 194 | 'rotation': r, 195 | 'score': score 196 | } 197 | 198 | return input, target, target_weight, meta 199 | 200 | def select_data(self, db): 201 | db_selected = [] 202 | for rec in db: 203 | num_vis = 0 204 | joints_x = 0.0 205 | joints_y = 0.0 206 | for joint, joint_vis in zip( 207 | rec['joints_3d'], rec['joints_3d_vis']): 208 | if joint_vis[0] <= 0: 209 | continue 210 | num_vis += 1 211 | 212 | joints_x += joint[0] 213 | joints_y += joint[1] 214 | if num_vis == 0: 215 | continue 216 | 217 | joints_x, joints_y = joints_x / num_vis, joints_y / num_vis 218 | 219 | area = rec['scale'][0] * rec['scale'][1] * (self.pixel_std**2) 220 | joints_center = np.array([joints_x, joints_y]) 221 | bbox_center = np.array(rec['center']) 222 | diff_norm2 = np.linalg.norm((joints_center-bbox_center), 2) 223 | ks = np.exp(-1.0*(diff_norm2**2) / ((0.2)**2*2.0*area)) 224 | 225 | metric = (0.2 / 16) * num_vis + 0.45 - 0.2 / 16 226 | if ks > metric: 227 | db_selected.append(rec) 228 | 229 | logger.info('=> num db: {}'.format(len(db))) 230 | logger.info('=> num selected db: {}'.format(len(db_selected))) 231 | return db_selected 232 | 233 | def generate_target(self, joints, joints_vis): 234 | ''' 235 | :param joints: [num_joints, 3] 236 | :param joints_vis: [num_joints, 3] 237 | :return: target, target_weight(1: visible, 0: invisible) 238 | ''' 239 | target_weight = np.ones((self.num_joints, 1), dtype=np.float32) 240 | target_weight[:, 0] = joints_vis[:, 0] 241 | 242 | assert self.target_type == 'gaussian', \ 243 | 'Only support gaussian map now!' 244 | 245 | if self.target_type == 'gaussian': 246 | target = np.zeros((self.num_joints, 247 | self.heatmap_size[1], 248 | self.heatmap_size[0]), 249 | dtype=np.float32) 250 | 251 | tmp_size = self.sigma * 3 252 | 253 | for joint_id in range(self.num_joints): 254 | feat_stride = self.image_size / self.heatmap_size 255 | mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5) 256 | mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5) 257 | # Check that any part of the gaussian is in-bounds 258 | ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] 259 | br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] 260 | if ul[0] >= self.heatmap_size[0] or ul[1] >= self.heatmap_size[1] \ 261 | or br[0] < 0 or br[1] < 0: 262 | # If not, just return the image as is 263 | target_weight[joint_id] = 0 264 | continue 265 | 266 | # # Generate gaussian 267 | size = 2 * tmp_size + 1 268 | x = np.arange(0, size, 1, np.float32) 269 | y = x[:, np.newaxis] 270 | x0 = y0 = size // 2 271 | # The gaussian is not normalized, we want the center value to equal 1 272 | g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * self.sigma ** 2)) 273 | 274 | # Usable gaussian range 275 | g_x = max(0, -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0] 276 | g_y = max(0, -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1] 277 | # Image range 278 | img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0]) 279 | img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1]) 280 | 281 | v = target_weight[joint_id] 282 | if v > 0.5: 283 | target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \ 284 | g[g_y[0]:g_y[1], g_x[0]:g_x[1]] 285 | 286 | if self.use_different_joints_weight: 287 | target_weight = np.multiply(target_weight, self.joints_weight) 288 | 289 | return target, target_weight 290 | -------------------------------------------------------------------------------- /lib/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from .mpii import MPIIDataset as mpii 12 | from .coco import COCODataset as coco 13 | 14 | -------------------------------------------------------------------------------- /lib/dataset/mpii.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import logging 12 | import os 13 | import json_tricks as json 14 | from collections import OrderedDict 15 | 16 | import numpy as np 17 | from scipy.io import loadmat, savemat 18 | 19 | from dataset.JointsDataset import JointsDataset 20 | 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | 25 | class MPIIDataset(JointsDataset): 26 | def __init__(self, cfg, root, image_set, is_train, transform=None): 27 | super().__init__(cfg, root, image_set, is_train, transform) 28 | 29 | self.num_joints = 16 30 | self.flip_pairs = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]] 31 | self.parent_ids = [1, 2, 6, 6, 3, 4, 6, 6, 7, 8, 11, 12, 7, 7, 13, 14] 32 | 33 | self.upper_body_ids = (7, 8, 9, 10, 11, 12, 13, 14, 15) 34 | self.lower_body_ids = (0, 1, 2, 3, 4, 5, 6) 35 | 36 | self.db = self._get_db() 37 | 38 | if is_train and cfg.DATASET.SELECT_DATA: 39 | self.db = self.select_data(self.db) 40 | 41 | logger.info('=> load {} samples'.format(len(self.db))) 42 | 43 | def _get_db(self): 44 | # create train/val split 45 | file_name = os.path.join( 46 | self.root, 'annot', self.image_set+'.json' 47 | ) 48 | with open(file_name) as anno_file: 49 | anno = json.load(anno_file) 50 | 51 | gt_db = [] 52 | for a in anno: 53 | image_name = a['image'] 54 | 55 | c = np.array(a['center'], dtype=np.float) 56 | s = np.array([a['scale'], a['scale']], dtype=np.float) 57 | 58 | # Adjust center/scale slightly to avoid cropping limbs 59 | if c[0] != -1: 60 | c[1] = c[1] + 15 * s[1] 61 | s = s * 1.25 62 | 63 | # MPII uses matlab format, index is based 1, 64 | # we should first convert to 0-based index 65 | c = c - 1 66 | 67 | joints_3d = np.zeros((self.num_joints, 3), dtype=np.float) 68 | joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float) 69 | if self.image_set != 'test': 70 | joints = np.array(a['joints']) 71 | joints[:, 0:2] = joints[:, 0:2] - 1 72 | joints_vis = np.array(a['joints_vis']) 73 | assert len(joints) == self.num_joints, \ 74 | 'joint num diff: {} vs {}'.format(len(joints), 75 | self.num_joints) 76 | 77 | joints_3d[:, 0:2] = joints[:, 0:2] 78 | joints_3d_vis[:, 0] = joints_vis[:] 79 | joints_3d_vis[:, 1] = joints_vis[:] 80 | 81 | image_dir = 'images.zip@' if self.data_format == 'zip' else 'images' 82 | gt_db.append( 83 | { 84 | 'image': os.path.join(self.root, image_dir, image_name), 85 | 'center': c, 86 | 'scale': s, 87 | 'joints_3d': joints_3d, 88 | 'joints_3d_vis': joints_3d_vis, 89 | 'filename': '', 90 | 'imgnum': 0, 91 | } 92 | ) 93 | 94 | return gt_db 95 | 96 | def evaluate(self, cfg, preds, output_dir, *args, **kwargs): 97 | # convert 0-based index to 1-based index 98 | preds = preds[:, :, 0:2] + 1.0 99 | 100 | if output_dir: 101 | pred_file = os.path.join(output_dir, 'pred.mat') 102 | savemat(pred_file, mdict={'preds': preds}) 103 | 104 | if 'test' in cfg.DATASET.TEST_SET: 105 | return {'Null': 0.0}, 0.0 106 | 107 | SC_BIAS = 0.6 108 | threshold = 0.5 109 | 110 | gt_file = os.path.join(cfg.DATASET.ROOT, 111 | 'annot', 112 | 'gt_{}.mat'.format(cfg.DATASET.TEST_SET)) 113 | gt_dict = loadmat(gt_file) 114 | dataset_joints = gt_dict['dataset_joints'] 115 | jnt_missing = gt_dict['jnt_missing'] 116 | pos_gt_src = gt_dict['pos_gt_src'] 117 | headboxes_src = gt_dict['headboxes_src'] 118 | 119 | pos_pred_src = np.transpose(preds, [1, 2, 0]) 120 | 121 | head = np.where(dataset_joints == 'head')[1][0] 122 | lsho = np.where(dataset_joints == 'lsho')[1][0] 123 | lelb = np.where(dataset_joints == 'lelb')[1][0] 124 | lwri = np.where(dataset_joints == 'lwri')[1][0] 125 | lhip = np.where(dataset_joints == 'lhip')[1][0] 126 | lkne = np.where(dataset_joints == 'lkne')[1][0] 127 | lank = np.where(dataset_joints == 'lank')[1][0] 128 | 129 | rsho = np.where(dataset_joints == 'rsho')[1][0] 130 | relb = np.where(dataset_joints == 'relb')[1][0] 131 | rwri = np.where(dataset_joints == 'rwri')[1][0] 132 | rkne = np.where(dataset_joints == 'rkne')[1][0] 133 | rank = np.where(dataset_joints == 'rank')[1][0] 134 | rhip = np.where(dataset_joints == 'rhip')[1][0] 135 | 136 | jnt_visible = 1 - jnt_missing 137 | uv_error = pos_pred_src - pos_gt_src 138 | uv_err = np.linalg.norm(uv_error, axis=1) 139 | headsizes = headboxes_src[1, :, :] - headboxes_src[0, :, :] 140 | headsizes = np.linalg.norm(headsizes, axis=0) 141 | headsizes *= SC_BIAS 142 | scale = np.multiply(headsizes, np.ones((len(uv_err), 1))) 143 | scaled_uv_err = np.divide(uv_err, scale) 144 | scaled_uv_err = np.multiply(scaled_uv_err, jnt_visible) 145 | jnt_count = np.sum(jnt_visible, axis=1) 146 | less_than_threshold = np.multiply((scaled_uv_err <= threshold), 147 | jnt_visible) 148 | PCKh = np.divide(100.*np.sum(less_than_threshold, axis=1), jnt_count) 149 | 150 | # save 151 | rng = np.arange(0, 0.5+0.01, 0.01) 152 | pckAll = np.zeros((len(rng), 16)) 153 | 154 | for r in range(len(rng)): 155 | threshold = rng[r] 156 | less_than_threshold = np.multiply(scaled_uv_err <= threshold, 157 | jnt_visible) 158 | pckAll[r, :] = np.divide(100.*np.sum(less_than_threshold, axis=1), 159 | jnt_count) 160 | 161 | PCKh = np.ma.array(PCKh, mask=False) 162 | PCKh.mask[6:8] = True 163 | 164 | jnt_count = np.ma.array(jnt_count, mask=False) 165 | jnt_count.mask[6:8] = True 166 | jnt_ratio = jnt_count / np.sum(jnt_count).astype(np.float64) 167 | 168 | name_value = [ 169 | ('Head', PCKh[head]), 170 | ('Shoulder', 0.5 * (PCKh[lsho] + PCKh[rsho])), 171 | ('Elbow', 0.5 * (PCKh[lelb] + PCKh[relb])), 172 | ('Wrist', 0.5 * (PCKh[lwri] + PCKh[rwri])), 173 | ('Hip', 0.5 * (PCKh[lhip] + PCKh[rhip])), 174 | ('Knee', 0.5 * (PCKh[lkne] + PCKh[rkne])), 175 | ('Ankle', 0.5 * (PCKh[lank] + PCKh[rank])), 176 | ('Mean', np.sum(PCKh * jnt_ratio)), 177 | ('Mean@0.1', np.sum(pckAll[11, :] * jnt_ratio)) 178 | ] 179 | name_value = OrderedDict(name_value) 180 | 181 | return name_value, name_value['Mean'] 182 | -------------------------------------------------------------------------------- /lib/models/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import models.pose_resnet 16 | import models.pose_hrnet 17 | import models.lpn 18 | -------------------------------------------------------------------------------- /lib/models/lpn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import math 4 | import torch 5 | import torch.nn as nn 6 | from .lightweight_modules import LW_Bottleneck, LW_BasicBlock, MV2_BasicBlock 7 | 8 | BN_MOMENTUM = 0.1 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class LPN(nn.Module): 13 | 14 | def __init__(self, block, layers, cfg, **kwargs): 15 | super(LPN, self).__init__() 16 | extra = cfg.MODEL.EXTRA 17 | 18 | self.inplanes = 64 19 | self.deconv_with_bias = extra.DECONV_WITH_BIAS 20 | self.attention = extra.get('ATTENTION') 21 | 22 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 23 | self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) 24 | self.relu = nn.ReLU(inplace=True) 25 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 26 | 27 | self.layer1 = self._make_layer(block, 64, layers[0]) 28 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 29 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 30 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1) 31 | 32 | # used for deconv layers 33 | self.deconv_layers = self._make_deconv_layer( 34 | extra.NUM_DECONV_LAYERS, 35 | extra.NUM_DECONV_FILTERS, 36 | extra.NUM_DECONV_KERNELS, 37 | ) 38 | 39 | self.final_layer = nn.Conv2d( 40 | in_channels=extra.NUM_DECONV_FILTERS[-1], 41 | out_channels=cfg.MODEL.NUM_JOINTS, 42 | kernel_size=extra.FINAL_CONV_KERNEL, 43 | stride=1, 44 | padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0 45 | ) 46 | 47 | def _make_layer(self, block, planes, blocks, stride=1): 48 | downsample = None 49 | if stride != 1 or self.inplanes != planes * block.expansion: 50 | downsample = nn.Sequential( 51 | nn.Conv2d(self.inplanes, planes * block.expansion, 52 | kernel_size=1, stride=stride, bias=False), 53 | nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), 54 | ) 55 | 56 | layers = [] 57 | layers.append(block(self.inplanes, planes, stride, downsample, self.attention)) 58 | self.inplanes = planes * block.expansion 59 | for i in range(1, blocks): 60 | layers.append(block(self.inplanes, planes, attention=self.attention)) 61 | 62 | return nn.Sequential(*layers) 63 | 64 | def _get_deconv_cfg(self, deconv_kernel, index): 65 | if deconv_kernel == 4: 66 | padding = 1 67 | output_padding = 0 68 | elif deconv_kernel == 3: 69 | padding = 1 70 | output_padding = 1 71 | elif deconv_kernel == 2: 72 | padding = 0 73 | output_padding = 0 74 | 75 | return deconv_kernel, padding, output_padding 76 | 77 | def _make_deconv_layer(self, num_layers, num_filters, num_kernels): 78 | layers = [] 79 | for i in range(num_layers): 80 | kernel, padding, output_padding = \ 81 | self._get_deconv_cfg(num_kernels[i], i) 82 | 83 | planes = num_filters[i] 84 | layers.extend([ 85 | nn.ConvTranspose2d(in_channels=self.inplanes, out_channels=planes, kernel_size=kernel, 86 | stride=2, padding=padding, output_padding=output_padding, 87 | groups=math.gcd(self.inplanes, planes), bias=self.deconv_with_bias), 88 | nn.BatchNorm2d(planes, momentum=BN_MOMENTUM), 89 | nn.ReLU(inplace=True), 90 | nn.Conv2d(planes, planes, kernel_size=1, bias=False), 91 | nn.BatchNorm2d(planes, momentum=BN_MOMENTUM), 92 | nn.ReLU(inplace=True), 93 | ]) 94 | self.inplanes = planes 95 | 96 | return nn.Sequential(*layers) 97 | 98 | def forward(self, x): 99 | x = self.conv1(x) 100 | x = self.bn1(x) 101 | x = self.relu(x) 102 | x = self.maxpool(x) 103 | 104 | x = self.layer1(x) 105 | x = self.layer2(x) 106 | x = self.layer3(x) 107 | x = self.layer4(x) 108 | 109 | features = self.deconv_layers(x) 110 | x = self.final_layer(features) 111 | 112 | return x 113 | 114 | def init_weights(self, pretrained=''): 115 | if os.path.isfile(pretrained): 116 | logger.info('=> init deconv weights from normal distribution') 117 | for name, m in self.deconv_layers.named_modules(): 118 | if isinstance(m, nn.ConvTranspose2d): 119 | logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 120 | logger.info('=> init {}.bias as 0'.format(name)) 121 | nn.init.normal_(m.weight, std=0.001) 122 | if self.deconv_with_bias: 123 | nn.init.constant_(m.bias, 0) 124 | elif isinstance(m, nn.BatchNorm2d): 125 | logger.info('=> init {}.weight as 1'.format(name)) 126 | logger.info('=> init {}.bias as 0'.format(name)) 127 | nn.init.constant_(m.weight, 1) 128 | nn.init.constant_(m.bias, 0) 129 | logger.info('=> init final conv weights from normal distribution') 130 | for m in self.final_layer.modules(): 131 | if isinstance(m, nn.Conv2d): 132 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 133 | logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 134 | logger.info('=> init {}.bias as 0'.format(name)) 135 | nn.init.normal_(m.weight, std=0.001) 136 | nn.init.constant_(m.bias, 0) 137 | 138 | pretrained_state_dict = torch.load(pretrained) 139 | logger.info('=> loading pretrained model {}'.format(pretrained)) 140 | self.load_state_dict(pretrained_state_dict, strict=False) 141 | else: 142 | logger.info('=> init weights from normal distribution') 143 | for m in self.modules(): 144 | if isinstance(m, nn.Conv2d): 145 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 146 | nn.init.normal_(m.weight, std=0.001) 147 | # nn.init.constant_(m.bias, 0) 148 | elif isinstance(m, nn.BatchNorm2d): 149 | nn.init.constant_(m.weight, 1) 150 | nn.init.constant_(m.bias, 0) 151 | elif isinstance(m, nn.ConvTranspose2d): 152 | nn.init.normal_(m.weight, std=0.001) 153 | if self.deconv_with_bias: 154 | nn.init.constant_(m.bias, 0) 155 | 156 | 157 | resnet_spec = { 158 | '18m': (MV2_BasicBlock, [1, 1, 1, 1]), 159 | 18: (LW_BasicBlock, [2, 2, 2, 2]), 160 | 34: (LW_BasicBlock, [3, 4, 6, 3]), 161 | 50: (LW_Bottleneck, [3, 4, 6, 3]), 162 | 101: (LW_Bottleneck, [3, 4, 23, 3]), 163 | 100: (LW_Bottleneck, [3, 13, 30, 3]), 164 | 152: (LW_Bottleneck, [3, 8, 36, 3]) 165 | } 166 | 167 | 168 | def get_pose_net(cfg, is_train, **kwargs): 169 | num_layers = cfg.MODEL.EXTRA.NUM_LAYERS 170 | 171 | block_class, layers = resnet_spec[num_layers] 172 | 173 | model = LPN(block_class, layers, cfg, **kwargs) 174 | 175 | if is_train and cfg.MODEL.INIT_WEIGHTS: 176 | model.init_weights(cfg.MODEL.PRETRAINED) 177 | 178 | return model 179 | -------------------------------------------------------------------------------- /lib/models/pose_resnet.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import logging 13 | 14 | import torch 15 | import torch.nn as nn 16 | 17 | 18 | BN_MOMENTUM = 0.1 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | def conv3x3(in_planes, out_planes, stride=1): 23 | """3x3 convolution with padding""" 24 | return nn.Conv2d( 25 | in_planes, out_planes, kernel_size=3, stride=stride, 26 | padding=1, bias=False 27 | ) 28 | 29 | 30 | class BasicBlock(nn.Module): 31 | expansion = 1 32 | 33 | def __init__(self, inplanes, planes, stride=1, downsample=None): 34 | super(BasicBlock, self).__init__() 35 | self.conv1 = conv3x3(inplanes, planes, stride) 36 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 37 | self.relu = nn.ReLU(inplace=True) 38 | self.conv2 = conv3x3(planes, planes) 39 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 40 | self.downsample = downsample 41 | self.stride = stride 42 | 43 | def forward(self, x): 44 | residual = x 45 | 46 | out = self.conv1(x) 47 | out = self.bn1(out) 48 | out = self.relu(out) 49 | 50 | out = self.conv2(out) 51 | out = self.bn2(out) 52 | 53 | if self.downsample is not None: 54 | residual = self.downsample(x) 55 | 56 | out += residual 57 | out = self.relu(out) 58 | 59 | return out 60 | 61 | 62 | class Bottleneck(nn.Module): 63 | expansion = 4 64 | 65 | def __init__(self, inplanes, planes, stride=1, downsample=None): 66 | super(Bottleneck, self).__init__() 67 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 68 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 69 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 70 | padding=1, bias=False) 71 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 72 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, 73 | bias=False) 74 | self.bn3 = nn.BatchNorm2d(planes * self.expansion, 75 | momentum=BN_MOMENTUM) 76 | self.relu = nn.ReLU(inplace=True) 77 | self.downsample = downsample 78 | self.stride = stride 79 | 80 | def forward(self, x): 81 | residual = x 82 | 83 | out = self.conv1(x) 84 | out = self.bn1(out) 85 | out = self.relu(out) 86 | 87 | out = self.conv2(out) 88 | out = self.bn2(out) 89 | out = self.relu(out) 90 | 91 | out = self.conv3(out) 92 | out = self.bn3(out) 93 | 94 | if self.downsample is not None: 95 | residual = self.downsample(x) 96 | 97 | out += residual 98 | out = self.relu(out) 99 | 100 | return out 101 | 102 | 103 | class PoseResNet(nn.Module): 104 | 105 | def __init__(self, block, layers, cfg, **kwargs): 106 | self.inplanes = 64 107 | extra = cfg.MODEL.EXTRA 108 | self.deconv_with_bias = extra.DECONV_WITH_BIAS 109 | 110 | super(PoseResNet, self).__init__() 111 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 112 | bias=False) 113 | self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) 114 | self.relu = nn.ReLU(inplace=True) 115 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 116 | self.layer1 = self._make_layer(block, 64, layers[0]) 117 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 118 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 119 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 120 | 121 | # used for deconv layers 122 | self.deconv_layers = self._make_deconv_layer( 123 | extra.NUM_DECONV_LAYERS, 124 | extra.NUM_DECONV_FILTERS, 125 | extra.NUM_DECONV_KERNELS, 126 | ) 127 | 128 | self.final_layer = nn.Conv2d( 129 | in_channels=extra.NUM_DECONV_FILTERS[-1], 130 | out_channels=cfg.MODEL.NUM_JOINTS, 131 | kernel_size=extra.FINAL_CONV_KERNEL, 132 | stride=1, 133 | padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0 134 | ) 135 | 136 | def _make_layer(self, block, planes, blocks, stride=1): 137 | downsample = None 138 | if stride != 1 or self.inplanes != planes * block.expansion: 139 | downsample = nn.Sequential( 140 | nn.Conv2d(self.inplanes, planes * block.expansion, 141 | kernel_size=1, stride=stride, bias=False), 142 | nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), 143 | ) 144 | 145 | layers = [] 146 | layers.append(block(self.inplanes, planes, stride, downsample)) 147 | self.inplanes = planes * block.expansion 148 | for i in range(1, blocks): 149 | layers.append(block(self.inplanes, planes)) 150 | 151 | return nn.Sequential(*layers) 152 | 153 | def _get_deconv_cfg(self, deconv_kernel, index): 154 | if deconv_kernel == 4: 155 | padding = 1 156 | output_padding = 0 157 | elif deconv_kernel == 3: 158 | padding = 1 159 | output_padding = 1 160 | elif deconv_kernel == 2: 161 | padding = 0 162 | output_padding = 0 163 | 164 | return deconv_kernel, padding, output_padding 165 | 166 | def _make_deconv_layer(self, num_layers, num_filters, num_kernels): 167 | assert num_layers == len(num_filters), \ 168 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 169 | assert num_layers == len(num_kernels), \ 170 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 171 | 172 | layers = [] 173 | for i in range(num_layers): 174 | kernel, padding, output_padding = \ 175 | self._get_deconv_cfg(num_kernels[i], i) 176 | 177 | planes = num_filters[i] 178 | layers.append( 179 | nn.ConvTranspose2d( 180 | in_channels=self.inplanes, 181 | out_channels=planes, 182 | kernel_size=kernel, 183 | stride=2, 184 | padding=padding, 185 | output_padding=output_padding, 186 | bias=self.deconv_with_bias)) 187 | layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) 188 | layers.append(nn.ReLU(inplace=True)) 189 | self.inplanes = planes 190 | 191 | return nn.Sequential(*layers) 192 | 193 | def forward(self, x): 194 | x = self.conv1(x) 195 | x = self.bn1(x) 196 | x = self.relu(x) 197 | x = self.maxpool(x) 198 | 199 | x = self.layer1(x) 200 | x = self.layer2(x) 201 | x = self.layer3(x) 202 | x = self.layer4(x) 203 | 204 | x = self.deconv_layers(x) 205 | x = self.final_layer(x) 206 | 207 | return x 208 | 209 | def init_weights(self, pretrained=''): 210 | if os.path.isfile(pretrained): 211 | logger.info('=> init deconv weights from normal distribution') 212 | for name, m in self.deconv_layers.named_modules(): 213 | if isinstance(m, nn.ConvTranspose2d): 214 | logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 215 | logger.info('=> init {}.bias as 0'.format(name)) 216 | nn.init.normal_(m.weight, std=0.001) 217 | if self.deconv_with_bias: 218 | nn.init.constant_(m.bias, 0) 219 | elif isinstance(m, nn.BatchNorm2d): 220 | logger.info('=> init {}.weight as 1'.format(name)) 221 | logger.info('=> init {}.bias as 0'.format(name)) 222 | nn.init.constant_(m.weight, 1) 223 | nn.init.constant_(m.bias, 0) 224 | logger.info('=> init final conv weights from normal distribution') 225 | for m in self.final_layer.modules(): 226 | if isinstance(m, nn.Conv2d): 227 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 228 | logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 229 | logger.info('=> init {}.bias as 0'.format(name)) 230 | nn.init.normal_(m.weight, std=0.001) 231 | nn.init.constant_(m.bias, 0) 232 | 233 | pretrained_state_dict = torch.load(pretrained) 234 | logger.info('=> loading pretrained model {}'.format(pretrained)) 235 | self.load_state_dict(pretrained_state_dict, strict=False) 236 | else: 237 | logger.info('=> init weights from normal distribution') 238 | for m in self.modules(): 239 | if isinstance(m, nn.Conv2d): 240 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 241 | nn.init.normal_(m.weight, std=0.001) 242 | # nn.init.constant_(m.bias, 0) 243 | elif isinstance(m, nn.BatchNorm2d): 244 | nn.init.constant_(m.weight, 1) 245 | nn.init.constant_(m.bias, 0) 246 | elif isinstance(m, nn.ConvTranspose2d): 247 | nn.init.normal_(m.weight, std=0.001) 248 | if self.deconv_with_bias: 249 | nn.init.constant_(m.bias, 0) 250 | 251 | 252 | resnet_spec = { 253 | 18: (BasicBlock, [2, 2, 2, 2]), 254 | 34: (BasicBlock, [3, 4, 6, 3]), 255 | 50: (Bottleneck, [3, 4, 6, 3]), 256 | 101: (Bottleneck, [3, 4, 23, 3]), 257 | 152: (Bottleneck, [3, 8, 36, 3]) 258 | } 259 | 260 | 261 | def get_pose_net(cfg, is_train, **kwargs): 262 | num_layers = cfg.MODEL.EXTRA.NUM_LAYERS 263 | 264 | block_class, layers = resnet_spec[num_layers] 265 | 266 | model = PoseResNet(block_class, layers, cfg, **kwargs) 267 | 268 | if is_train and cfg.MODEL.INIT_WEIGHTS: 269 | model.init_weights(cfg.MODEL.PRETRAINED) 270 | 271 | return model 272 | -------------------------------------------------------------------------------- /lib/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cavalleria/humanpose.pytorch/08e0316b8f65e9fa45fefc8c9d0e28a6096a1d5f/lib/nms/__init__.py -------------------------------------------------------------------------------- /lib/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | cimport numpy as np 13 | 14 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 15 | return a if a >= b else b 16 | 17 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 18 | return a if a <= b else b 19 | 20 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 21 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 22 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 23 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 24 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 25 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 26 | 27 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 28 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1].astype('i') 29 | 30 | cdef int ndets = dets.shape[0] 31 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 32 | np.zeros((ndets), dtype=np.int) 33 | 34 | # nominal indices 35 | cdef int _i, _j 36 | # sorted indices 37 | cdef int i, j 38 | # temp variables for box i's (the box currently under consideration) 39 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 40 | # variables for computing overlap with box j (lower scoring box) 41 | cdef np.float32_t xx1, yy1, xx2, yy2 42 | cdef np.float32_t w, h 43 | cdef np.float32_t inter, ovr 44 | 45 | keep = [] 46 | for _i in range(ndets): 47 | i = order[_i] 48 | if suppressed[i] == 1: 49 | continue 50 | keep.append(i) 51 | ix1 = x1[i] 52 | iy1 = y1[i] 53 | ix2 = x2[i] 54 | iy2 = y2[i] 55 | iarea = areas[i] 56 | for _j in range(_i + 1, ndets): 57 | j = order[_j] 58 | if suppressed[j] == 1: 59 | continue 60 | xx1 = max(ix1, x1[j]) 61 | yy1 = max(iy1, y1[j]) 62 | xx2 = min(ix2, x2[j]) 63 | yy2 = min(iy2, y2[j]) 64 | w = max(0.0, xx2 - xx1 + 1) 65 | h = max(0.0, yy2 - yy1 + 1) 66 | inter = w * h 67 | ovr = inter / (iarea + areas[j] - inter) 68 | if ovr >= thresh: 69 | suppressed[j] = 1 70 | 71 | return keep 72 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | cimport numpy as np 13 | 14 | assert sizeof(int) == sizeof(np.int32_t) 15 | 16 | cdef extern from "gpu_nms.hpp": 17 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 18 | 19 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 20 | np.int32_t device_id=0): 21 | cdef int boxes_num = dets.shape[0] 22 | cdef int boxes_dim = dets.shape[1] 23 | cdef int num_out 24 | cdef np.ndarray[np.int32_t, ndim=1] \ 25 | keep = np.zeros(boxes_num, dtype=np.int32) 26 | cdef np.ndarray[np.float32_t, ndim=1] \ 27 | scores = dets[:, 4] 28 | cdef np.ndarray[np.int32_t, ndim=1] \ 29 | order = scores.argsort()[::-1].astype(np.int32) 30 | cdef np.ndarray[np.float32_t, ndim=2] \ 31 | sorted_dets = dets[order, :] 32 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 33 | keep = keep[:num_out] 34 | return list(order[keep]) 35 | -------------------------------------------------------------------------------- /lib/nms/nms.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | 13 | from .cpu_nms import cpu_nms 14 | from .gpu_nms import gpu_nms 15 | 16 | 17 | def py_nms_wrapper(thresh): 18 | def _nms(dets): 19 | return nms(dets, thresh) 20 | return _nms 21 | 22 | 23 | def cpu_nms_wrapper(thresh): 24 | def _nms(dets): 25 | return cpu_nms(dets, thresh) 26 | return _nms 27 | 28 | 29 | def gpu_nms_wrapper(thresh, device_id): 30 | def _nms(dets): 31 | return gpu_nms(dets, thresh, device_id) 32 | return _nms 33 | 34 | 35 | def nms(dets, thresh): 36 | """ 37 | greedily select boxes with high confidence and overlap with current maximum <= thresh 38 | rule out overlap >= thresh 39 | :param dets: [[x1, y1, x2, y2 score]] 40 | :param thresh: retain overlap < thresh 41 | :return: indexes to keep 42 | """ 43 | if dets.shape[0] == 0: 44 | return [] 45 | 46 | x1 = dets[:, 0] 47 | y1 = dets[:, 1] 48 | x2 = dets[:, 2] 49 | y2 = dets[:, 3] 50 | scores = dets[:, 4] 51 | 52 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 53 | order = scores.argsort()[::-1] 54 | 55 | keep = [] 56 | while order.size > 0: 57 | i = order[0] 58 | keep.append(i) 59 | xx1 = np.maximum(x1[i], x1[order[1:]]) 60 | yy1 = np.maximum(y1[i], y1[order[1:]]) 61 | xx2 = np.minimum(x2[i], x2[order[1:]]) 62 | yy2 = np.minimum(y2[i], y2[order[1:]]) 63 | 64 | w = np.maximum(0.0, xx2 - xx1 + 1) 65 | h = np.maximum(0.0, yy2 - yy1 + 1) 66 | inter = w * h 67 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 68 | 69 | inds = np.where(ovr <= thresh)[0] 70 | order = order[inds + 1] 71 | 72 | return keep 73 | 74 | 75 | def oks_iou(g, d, a_g, a_d, sigmas=None, in_vis_thre=None): 76 | if not isinstance(sigmas, np.ndarray): 77 | sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0 78 | vars = (sigmas * 2) ** 2 79 | xg = g[0::3] 80 | yg = g[1::3] 81 | vg = g[2::3] 82 | ious = np.zeros((d.shape[0])) 83 | for n_d in range(0, d.shape[0]): 84 | xd = d[n_d, 0::3] 85 | yd = d[n_d, 1::3] 86 | vd = d[n_d, 2::3] 87 | dx = xd - xg 88 | dy = yd - yg 89 | e = (dx ** 2 + dy ** 2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2 90 | if in_vis_thre is not None: 91 | ind = list(vg > in_vis_thre) and list(vd > in_vis_thre) 92 | e = e[ind] 93 | ious[n_d] = np.sum(np.exp(-e)) / e.shape[0] if e.shape[0] != 0 else 0.0 94 | return ious 95 | 96 | 97 | def oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None): 98 | """ 99 | greedily select boxes with high confidence and overlap with current maximum <= thresh 100 | rule out overlap >= thresh, overlap = oks 101 | :param kpts_db 102 | :param thresh: retain overlap < thresh 103 | :return: indexes to keep 104 | """ 105 | if len(kpts_db) == 0: 106 | return [] 107 | 108 | scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))]) 109 | kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))]) 110 | areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))]) 111 | 112 | order = scores.argsort()[::-1] 113 | 114 | keep = [] 115 | while order.size > 0: 116 | i = order[0] 117 | keep.append(i) 118 | 119 | oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre) 120 | 121 | inds = np.where(oks_ovr <= thresh)[0] 122 | order = order[inds + 1] 123 | 124 | return keep 125 | 126 | 127 | def rescore(overlap, scores, thresh, type='gaussian'): 128 | assert overlap.shape[0] == scores.shape[0] 129 | if type == 'linear': 130 | inds = np.where(overlap >= thresh)[0] 131 | scores[inds] = scores[inds] * (1 - overlap[inds]) 132 | else: 133 | scores = scores * np.exp(- overlap**2 / thresh) 134 | 135 | return scores 136 | 137 | 138 | def soft_oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None): 139 | """ 140 | greedily select boxes with high confidence and overlap with current maximum <= thresh 141 | rule out overlap >= thresh, overlap = oks 142 | :param kpts_db 143 | :param thresh: retain overlap < thresh 144 | :return: indexes to keep 145 | """ 146 | if len(kpts_db) == 0: 147 | return [] 148 | 149 | scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))]) 150 | kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))]) 151 | areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))]) 152 | 153 | order = scores.argsort()[::-1] 154 | scores = scores[order] 155 | 156 | # max_dets = order.size 157 | max_dets = 20 158 | keep = np.zeros(max_dets, dtype=np.intp) 159 | keep_cnt = 0 160 | while order.size > 0 and keep_cnt < max_dets: 161 | i = order[0] 162 | 163 | oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre) 164 | 165 | order = order[1:] 166 | scores = rescore(oks_ovr, scores[1:], thresh) 167 | 168 | tmp = scores.argsort()[::-1] 169 | order = order[tmp] 170 | scores = scores[tmp] 171 | 172 | keep[keep_cnt] = i 173 | keep_cnt += 1 174 | 175 | keep = keep[:keep_cnt] 176 | 177 | return keep 178 | # kpts_db = kpts_db[:keep_cnt] 179 | 180 | # return kpts_db 181 | -------------------------------------------------------------------------------- /lib/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Copyright (c) Microsoft 3 | // Licensed under The MIT License 4 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 5 | // ------------------------------------------------------------------ 6 | 7 | #include "gpu_nms.hpp" 8 | #include 9 | #include 10 | 11 | #define CUDA_CHECK(condition) \ 12 | /* Code block avoids redefinition of cudaError_t error */ \ 13 | do { \ 14 | cudaError_t error = condition; \ 15 | if (error != cudaSuccess) { \ 16 | std::cout << cudaGetErrorString(error) << std::endl; \ 17 | } \ 18 | } while (0) 19 | 20 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 21 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 22 | 23 | __device__ inline float devIoU(float const * const a, float const * const b) { 24 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 25 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 26 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 27 | float interS = width * height; 28 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 29 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 30 | return interS / (Sa + Sb - interS); 31 | } 32 | 33 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 34 | const float *dev_boxes, unsigned long long *dev_mask) { 35 | const int row_start = blockIdx.y; 36 | const int col_start = blockIdx.x; 37 | 38 | // if (row_start > col_start) return; 39 | 40 | const int row_size = 41 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 42 | const int col_size = 43 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 44 | 45 | __shared__ float block_boxes[threadsPerBlock * 5]; 46 | if (threadIdx.x < col_size) { 47 | block_boxes[threadIdx.x * 5 + 0] = 48 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 49 | block_boxes[threadIdx.x * 5 + 1] = 50 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 51 | block_boxes[threadIdx.x * 5 + 2] = 52 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 53 | block_boxes[threadIdx.x * 5 + 3] = 54 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 55 | block_boxes[threadIdx.x * 5 + 4] = 56 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 57 | } 58 | __syncthreads(); 59 | 60 | if (threadIdx.x < row_size) { 61 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 62 | const float *cur_box = dev_boxes + cur_box_idx * 5; 63 | int i = 0; 64 | unsigned long long t = 0; 65 | int start = 0; 66 | if (row_start == col_start) { 67 | start = threadIdx.x + 1; 68 | } 69 | for (i = start; i < col_size; i++) { 70 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 71 | t |= 1ULL << i; 72 | } 73 | } 74 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 75 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 76 | } 77 | } 78 | 79 | void _set_device(int device_id) { 80 | int current_device; 81 | CUDA_CHECK(cudaGetDevice(¤t_device)); 82 | if (current_device == device_id) { 83 | return; 84 | } 85 | // The call to cudaSetDevice must come before any calls to Get, which 86 | // may perform initialization using the GPU. 87 | CUDA_CHECK(cudaSetDevice(device_id)); 88 | } 89 | 90 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 91 | int boxes_dim, float nms_overlap_thresh, int device_id) { 92 | _set_device(device_id); 93 | 94 | float* boxes_dev = NULL; 95 | unsigned long long* mask_dev = NULL; 96 | 97 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 98 | 99 | CUDA_CHECK(cudaMalloc(&boxes_dev, 100 | boxes_num * boxes_dim * sizeof(float))); 101 | CUDA_CHECK(cudaMemcpy(boxes_dev, 102 | boxes_host, 103 | boxes_num * boxes_dim * sizeof(float), 104 | cudaMemcpyHostToDevice)); 105 | 106 | CUDA_CHECK(cudaMalloc(&mask_dev, 107 | boxes_num * col_blocks * sizeof(unsigned long long))); 108 | 109 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 110 | DIVUP(boxes_num, threadsPerBlock)); 111 | dim3 threads(threadsPerBlock); 112 | nms_kernel<<>>(boxes_num, 113 | nms_overlap_thresh, 114 | boxes_dev, 115 | mask_dev); 116 | 117 | std::vector mask_host(boxes_num * col_blocks); 118 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 119 | mask_dev, 120 | sizeof(unsigned long long) * boxes_num * col_blocks, 121 | cudaMemcpyDeviceToHost)); 122 | 123 | std::vector remv(col_blocks); 124 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 125 | 126 | int num_to_keep = 0; 127 | for (int i = 0; i < boxes_num; i++) { 128 | int nblock = i / threadsPerBlock; 129 | int inblock = i % threadsPerBlock; 130 | 131 | if (!(remv[nblock] & (1ULL << inblock))) { 132 | keep_out[num_to_keep++] = i; 133 | unsigned long long *p = &mask_host[0] + i * col_blocks; 134 | for (int j = nblock; j < col_blocks; j++) { 135 | remv[j] |= p[j]; 136 | } 137 | } 138 | } 139 | *num_out = num_to_keep; 140 | 141 | CUDA_CHECK(cudaFree(boxes_dev)); 142 | CUDA_CHECK(cudaFree(mask_dev)); 143 | } 144 | -------------------------------------------------------------------------------- /lib/nms/setup_linux.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Pose.gluon 3 | # Copyright (c) 2018-present Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | from setuptools import setup 11 | from distutils.extension import Extension 12 | from Cython.Distutils import build_ext 13 | import numpy as np 14 | 15 | 16 | def find_in_path(name, path): 17 | "Find a file in a search path" 18 | # Adapted fom 19 | # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 20 | for dir in path.split(os.pathsep): 21 | binpath = pjoin(dir, name) 22 | if os.path.exists(binpath): 23 | return os.path.abspath(binpath) 24 | return None 25 | 26 | 27 | def locate_cuda(): 28 | """Locate the CUDA environment on the system 29 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 30 | and values giving the absolute path to each directory. 31 | Starts by looking for the CUDAHOME env variable. If not found, everything 32 | is based on finding 'nvcc' in the PATH. 33 | """ 34 | 35 | # first check if the CUDAHOME env variable is in use 36 | if 'CUDAHOME' in os.environ: 37 | home = os.environ['CUDAHOME'] 38 | nvcc = pjoin(home, 'bin', 'nvcc') 39 | else: 40 | # otherwise, search the PATH for NVCC 41 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 42 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 43 | if nvcc is None: 44 | raise EnvironmentError('The nvcc binary could not be ' 45 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 46 | home = os.path.dirname(os.path.dirname(nvcc)) 47 | 48 | cudaconfig = {'home':home, 'nvcc':nvcc, 49 | 'include': pjoin(home, 'include'), 50 | 'lib64': pjoin(home, 'lib64')} 51 | for k, v in cudaconfig.items(): 52 | if not os.path.exists(v): 53 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 54 | 55 | return cudaconfig 56 | CUDA = locate_cuda() 57 | 58 | 59 | # Obtain the numpy include directory. This logic works across numpy versions. 60 | try: 61 | numpy_include = np.get_include() 62 | except AttributeError: 63 | numpy_include = np.get_numpy_include() 64 | 65 | 66 | def customize_compiler_for_nvcc(self): 67 | """inject deep into distutils to customize how the dispatch 68 | to gcc/nvcc works. 69 | If you subclass UnixCCompiler, it's not trivial to get your subclass 70 | injected in, and still have the right customizations (i.e. 71 | distutils.sysconfig.customize_compiler) run on it. So instead of going 72 | the OO route, I have this. Note, it's kindof like a wierd functional 73 | subclassing going on.""" 74 | 75 | # tell the compiler it can processes .cu 76 | self.src_extensions.append('.cu') 77 | 78 | # save references to the default compiler_so and _comple methods 79 | default_compiler_so = self.compiler_so 80 | super = self._compile 81 | 82 | # now redefine the _compile method. This gets executed for each 83 | # object but distutils doesn't have the ability to change compilers 84 | # based on source extension: we add it. 85 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 86 | if os.path.splitext(src)[1] == '.cu': 87 | # use the cuda for .cu files 88 | self.set_executable('compiler_so', CUDA['nvcc']) 89 | # use only a subset of the extra_postargs, which are 1-1 translated 90 | # from the extra_compile_args in the Extension class 91 | postargs = extra_postargs['nvcc'] 92 | else: 93 | postargs = extra_postargs['gcc'] 94 | 95 | super(obj, src, ext, cc_args, postargs, pp_opts) 96 | # reset the default compiler_so, which we might have changed for cuda 97 | self.compiler_so = default_compiler_so 98 | 99 | # inject our redefined _compile method into the class 100 | self._compile = _compile 101 | 102 | 103 | # run the customize_compiler 104 | class custom_build_ext(build_ext): 105 | def build_extensions(self): 106 | customize_compiler_for_nvcc(self.compiler) 107 | build_ext.build_extensions(self) 108 | 109 | 110 | ext_modules = [ 111 | Extension( 112 | "cpu_nms", 113 | ["cpu_nms.pyx"], 114 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 115 | include_dirs = [numpy_include] 116 | ), 117 | Extension('gpu_nms', 118 | ['nms_kernel.cu', 'gpu_nms.pyx'], 119 | library_dirs=[CUDA['lib64']], 120 | libraries=['cudart'], 121 | language='c++', 122 | runtime_library_dirs=[CUDA['lib64']], 123 | # this syntax is specific to this build system 124 | # we're only going to use certain compiler args with nvcc and not with 125 | # gcc the implementation of this trick is in customize_compiler() below 126 | extra_compile_args={'gcc': ["-Wno-unused-function"], 127 | 'nvcc': ['-arch=sm_35', 128 | '--ptxas-options=-v', 129 | '-c', 130 | '--compiler-options', 131 | "'-fPIC'"]}, 132 | include_dirs = [numpy_include, CUDA['include']] 133 | ), 134 | ] 135 | 136 | setup( 137 | name='nms', 138 | ext_modules=ext_modules, 139 | # inject our custom trigger 140 | cmdclass={'build_ext': custom_build_ext}, 141 | ) 142 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cavalleria/humanpose.pytorch/08e0316b8f65e9fa45fefc8c9d0e28a6096a1d5f/lib/utils/__init__.py -------------------------------------------------------------------------------- /lib/utils/transforms.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | import cv2 13 | 14 | 15 | def flip_back(output_flipped, matched_parts): 16 | ''' 17 | ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width) 18 | ''' 19 | assert output_flipped.ndim == 4,\ 20 | 'output_flipped should be [batch_size, num_joints, height, width]' 21 | 22 | output_flipped = output_flipped[:, :, :, ::-1] 23 | 24 | for pair in matched_parts: 25 | tmp = output_flipped[:, pair[0], :, :].copy() 26 | output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :] 27 | output_flipped[:, pair[1], :, :] = tmp 28 | 29 | return output_flipped 30 | 31 | 32 | def fliplr_joints(joints, joints_vis, width, matched_parts): 33 | """ 34 | flip coords 35 | """ 36 | # Flip horizontal 37 | joints[:, 0] = width - joints[:, 0] - 1 38 | 39 | # Change left-right parts 40 | for pair in matched_parts: 41 | joints[pair[0], :], joints[pair[1], :] = \ 42 | joints[pair[1], :], joints[pair[0], :].copy() 43 | joints_vis[pair[0], :], joints_vis[pair[1], :] = \ 44 | joints_vis[pair[1], :], joints_vis[pair[0], :].copy() 45 | 46 | return joints*joints_vis, joints_vis 47 | 48 | 49 | def transform_preds(coords, center, scale, output_size): 50 | target_coords = np.zeros(coords.shape) 51 | trans = get_affine_transform(center, scale, 0, output_size, inv=1) 52 | for p in range(coords.shape[0]): 53 | target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) 54 | return target_coords 55 | 56 | 57 | def get_affine_transform( 58 | center, scale, rot, output_size, 59 | shift=np.array([0, 0], dtype=np.float32), inv=0 60 | ): 61 | if not isinstance(scale, np.ndarray) and not isinstance(scale, list): 62 | print(scale) 63 | scale = np.array([scale, scale]) 64 | 65 | scale_tmp = scale * 200.0 66 | src_w = scale_tmp[0] 67 | dst_w = output_size[0] 68 | dst_h = output_size[1] 69 | 70 | rot_rad = np.pi * rot / 180 71 | src_dir = get_dir([0, src_w * -0.5], rot_rad) 72 | dst_dir = np.array([0, dst_w * -0.5], np.float32) 73 | 74 | src = np.zeros((3, 2), dtype=np.float32) 75 | dst = np.zeros((3, 2), dtype=np.float32) 76 | src[0, :] = center + scale_tmp * shift 77 | src[1, :] = center + src_dir + scale_tmp * shift 78 | dst[0, :] = [dst_w * 0.5, dst_h * 0.5] 79 | dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir 80 | 81 | src[2:, :] = get_3rd_point(src[0, :], src[1, :]) 82 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) 83 | 84 | if inv: 85 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) 86 | else: 87 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) 88 | 89 | return trans 90 | 91 | 92 | def affine_transform(pt, t): 93 | new_pt = np.array([pt[0], pt[1], 1.]).T 94 | new_pt = np.dot(t, new_pt) 95 | return new_pt[:2] 96 | 97 | 98 | def get_3rd_point(a, b): 99 | direct = a - b 100 | return b + np.array([-direct[1], direct[0]], dtype=np.float32) 101 | 102 | 103 | def get_dir(src_point, rot_rad): 104 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 105 | 106 | src_result = [0, 0] 107 | src_result[0] = src_point[0] * cs - src_point[1] * sn 108 | src_result[1] = src_point[0] * sn + src_point[1] * cs 109 | 110 | return src_result 111 | 112 | 113 | def crop(img, center, scale, output_size, rot=0): 114 | trans = get_affine_transform(center, scale, rot, output_size) 115 | 116 | dst_img = cv2.warpAffine( 117 | img, trans, (int(output_size[0]), int(output_size[1])), 118 | flags=cv2.INTER_LINEAR 119 | ) 120 | 121 | return dst_img 122 | -------------------------------------------------------------------------------- /lib/utils/utils.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import logging 13 | import time 14 | from collections import namedtuple 15 | from pathlib import Path 16 | 17 | import torch 18 | import torch.optim as optim 19 | import torch.nn as nn 20 | 21 | 22 | def setup_logger(final_output_dir, rank, phase): 23 | time_str = time.strftime('%Y-%m-%d-%H-%M') 24 | log_file = '{}_{}_rank{}.log'.format(phase, time_str, rank) 25 | final_log_file = os.path.join(final_output_dir, log_file) 26 | head = '%(asctime)-15s %(message)s' 27 | # logging.basicConfig(format=head) 28 | logging.basicConfig(filename=str(final_log_file), 29 | format=head) 30 | logger = logging.getLogger() 31 | logger.setLevel(logging.INFO) 32 | console = logging.StreamHandler() 33 | logging.getLogger('').addHandler(console) 34 | 35 | return logger, time_str 36 | 37 | def create_logger(cfg, cfg_name, phase='train'): 38 | root_output_dir = Path(cfg.OUTPUT_DIR) 39 | # set up logger 40 | if not root_output_dir.exists(): 41 | print('=> creating {}'.format(root_output_dir)) 42 | root_output_dir.mkdir() 43 | 44 | dataset = cfg.DATASET.DATASET + '_' + cfg.DATASET.HYBRID_JOINTS_TYPE \ 45 | if cfg.DATASET.HYBRID_JOINTS_TYPE else cfg.DATASET.DATASET 46 | dataset = dataset.replace(':', '_') 47 | model = cfg.MODEL.NAME 48 | cfg_name = os.path.basename(cfg_name).split('.')[0] 49 | 50 | final_output_dir = root_output_dir / dataset / model / cfg_name 51 | 52 | print('=> creating {}'.format(final_output_dir)) 53 | final_output_dir.mkdir(parents=True, exist_ok=True) 54 | 55 | time_str = time.strftime('%Y-%m-%d-%H-%M') 56 | log_file = '{}_{}_{}.log'.format(cfg_name, time_str, phase) 57 | final_log_file = final_output_dir / log_file 58 | head = '%(asctime)-15s %(message)s' 59 | logging.basicConfig(filename=str(final_log_file), 60 | format=head) 61 | logger = logging.getLogger() 62 | logger.setLevel(logging.INFO) 63 | console = logging.StreamHandler() 64 | logging.getLogger('').addHandler(console) 65 | 66 | tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / \ 67 | (cfg_name + '_' + time_str) 68 | 69 | print('=> creating {}'.format(tensorboard_log_dir)) 70 | tensorboard_log_dir.mkdir(parents=True, exist_ok=True) 71 | 72 | return logger, str(final_output_dir), str(tensorboard_log_dir) 73 | 74 | 75 | def get_optimizer(cfg, model): 76 | optimizer = None 77 | if cfg.TRAIN.OPTIMIZER == 'sgd': 78 | optimizer = optim.SGD( 79 | model.parameters(), 80 | lr=cfg.TRAIN.LR, 81 | momentum=cfg.TRAIN.MOMENTUM, 82 | weight_decay=cfg.TRAIN.WD, 83 | nesterov=cfg.TRAIN.NESTEROV 84 | ) 85 | elif cfg.TRAIN.OPTIMIZER == 'adam': 86 | optimizer = optim.Adam( 87 | model.parameters(), 88 | lr=cfg.TRAIN.LR 89 | ) 90 | 91 | return optimizer 92 | 93 | 94 | def save_checkpoint(states, is_best, output_dir, 95 | filename='checkpoint.pth'): 96 | torch.save(states, os.path.join(output_dir, filename)) 97 | if is_best and 'state_dict' in states: 98 | torch.save(states['best_state_dict'], 99 | os.path.join(output_dir, 'model_best.pth')) 100 | 101 | 102 | def get_model_summary(model, *input_tensors, item_length=26, verbose=False): 103 | """ 104 | :param model: 105 | :param input_tensors: 106 | :param item_length: 107 | :return: 108 | """ 109 | 110 | summary = [] 111 | 112 | ModuleDetails = namedtuple( 113 | "Layer", ["name", "input_size", "output_size", "num_parameters", "multiply_adds"]) 114 | hooks = [] 115 | layer_instances = {} 116 | 117 | def add_hooks(module): 118 | 119 | def hook(module, input, output): 120 | class_name = str(module.__class__.__name__) 121 | 122 | instance_index = 1 123 | if class_name not in layer_instances: 124 | layer_instances[class_name] = instance_index 125 | else: 126 | instance_index = layer_instances[class_name] + 1 127 | layer_instances[class_name] = instance_index 128 | 129 | layer_name = class_name + "_" + str(instance_index) 130 | 131 | params = 0 132 | 133 | if class_name.find("Conv") != -1 or class_name.find("BatchNorm") != -1 or \ 134 | class_name.find("Linear") != -1: 135 | for param_ in module.parameters(): 136 | params += param_.view(-1).size(0) 137 | 138 | flops = "Not Available" 139 | if class_name.find("Conv") != -1 and hasattr(module, "weight"): 140 | flops = ( 141 | torch.prod( 142 | torch.LongTensor(list(module.weight.data.size()))) * 143 | torch.prod( 144 | torch.LongTensor(list(output.size())[2:]))).item() 145 | elif isinstance(module, nn.Linear): 146 | flops = (torch.prod(torch.LongTensor(list(output.size()))) \ 147 | * input[0].size(1)).item() 148 | 149 | if isinstance(input[0], list): 150 | input = input[0] 151 | if isinstance(output, list): 152 | output = output[0] 153 | 154 | summary.append( 155 | ModuleDetails( 156 | name=layer_name, 157 | input_size=list(input[0].size()), 158 | output_size=list(output.size()), 159 | num_parameters=params, 160 | multiply_adds=flops) 161 | ) 162 | 163 | if not isinstance(module, nn.ModuleList) \ 164 | and not isinstance(module, nn.Sequential) \ 165 | and module != model: 166 | hooks.append(module.register_forward_hook(hook)) 167 | 168 | model.eval() 169 | model.apply(add_hooks) 170 | 171 | space_len = item_length 172 | 173 | model(*input_tensors) 174 | for hook in hooks: 175 | hook.remove() 176 | 177 | details = '' 178 | if verbose: 179 | details = "Model Summary" + \ 180 | os.linesep + \ 181 | "Name{}Input Size{}Output Size{}Parameters{}Multiply Adds (Flops){}".format( 182 | ' ' * (space_len - len("Name")), 183 | ' ' * (space_len - len("Input Size")), 184 | ' ' * (space_len - len("Output Size")), 185 | ' ' * (space_len - len("Parameters")), 186 | ' ' * (space_len - len("Multiply Adds (Flops)"))) \ 187 | + os.linesep + '-' * space_len * 5 + os.linesep 188 | 189 | params_sum = 0 190 | flops_sum = 0 191 | for layer in summary: 192 | params_sum += layer.num_parameters 193 | if layer.multiply_adds != "Not Available": 194 | flops_sum += layer.multiply_adds 195 | if verbose: 196 | details += "{}{}{}{}{}{}{}{}{}{}".format( 197 | layer.name, 198 | ' ' * (space_len - len(layer.name)), 199 | layer.input_size, 200 | ' ' * (space_len - len(str(layer.input_size))), 201 | layer.output_size, 202 | ' ' * (space_len - len(str(layer.output_size))), 203 | layer.num_parameters, 204 | ' ' * (space_len - len(str(layer.num_parameters))), 205 | layer.multiply_adds, 206 | ' ' * (space_len - len(str(layer.multiply_adds)))) \ 207 | + os.linesep + '-' * space_len * 5 + os.linesep 208 | 209 | details += os.linesep \ 210 | + "Total Parameters: {:,}".format(params_sum) \ 211 | + os.linesep + '-' * space_len * 5 + os.linesep 212 | details += "Total Multiply Adds (For Convolution and Linear Layers only): {:,} GFLOPs".format(flops_sum/(1024**3)) \ 213 | + os.linesep + '-' * space_len * 5 + os.linesep 214 | details += "Number of Layers" + os.linesep 215 | for layer in layer_instances: 216 | details += "{} : {} layers ".format(layer, layer_instances[layer]) 217 | 218 | return details 219 | -------------------------------------------------------------------------------- /lib/utils/vis.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import math 12 | 13 | import numpy as np 14 | import torchvision 15 | import cv2 16 | 17 | from core.inference import get_max_preds 18 | 19 | 20 | def save_batch_image_with_joints(batch_image, batch_joints, batch_joints_vis, 21 | file_name, nrow=8, padding=2): 22 | ''' 23 | batch_image: [batch_size, channel, height, width] 24 | batch_joints: [batch_size, num_joints, 3], 25 | batch_joints_vis: [batch_size, num_joints, 1], 26 | } 27 | ''' 28 | grid = torchvision.utils.make_grid(batch_image, nrow, padding, True) 29 | ndarr = grid.mul(255).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy() 30 | ndarr = ndarr.copy() 31 | 32 | nmaps = batch_image.size(0) 33 | xmaps = min(nrow, nmaps) 34 | ymaps = int(math.ceil(float(nmaps) / xmaps)) 35 | height = int(batch_image.size(2) + padding) 36 | width = int(batch_image.size(3) + padding) 37 | k = 0 38 | for y in range(ymaps): 39 | for x in range(xmaps): 40 | if k >= nmaps: 41 | break 42 | joints = batch_joints[k] 43 | joints_vis = batch_joints_vis[k] 44 | 45 | for joint, joint_vis in zip(joints, joints_vis): 46 | joint[0] = x * width + padding + joint[0] 47 | joint[1] = y * height + padding + joint[1] 48 | if joint_vis[0]: 49 | cv2.circle(ndarr, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2) 50 | k = k + 1 51 | cv2.imwrite(file_name, ndarr) 52 | 53 | 54 | def save_batch_heatmaps(batch_image, batch_heatmaps, file_name, 55 | normalize=True): 56 | ''' 57 | batch_image: [batch_size, channel, height, width] 58 | batch_heatmaps: ['batch_size, num_joints, height, width] 59 | file_name: saved file name 60 | ''' 61 | if normalize: 62 | batch_image = batch_image.clone() 63 | min = float(batch_image.min()) 64 | max = float(batch_image.max()) 65 | 66 | batch_image.add_(-min).div_(max - min + 1e-5) 67 | 68 | batch_size = batch_heatmaps.size(0) 69 | num_joints = batch_heatmaps.size(1) 70 | heatmap_height = batch_heatmaps.size(2) 71 | heatmap_width = batch_heatmaps.size(3) 72 | 73 | grid_image = np.zeros((batch_size*heatmap_height, 74 | (num_joints+1)*heatmap_width, 75 | 3), 76 | dtype=np.uint8) 77 | 78 | preds, maxvals = get_max_preds(batch_heatmaps.detach().cpu().numpy()) 79 | 80 | for i in range(batch_size): 81 | image = batch_image[i].mul(255)\ 82 | .clamp(0, 255)\ 83 | .byte()\ 84 | .permute(1, 2, 0)\ 85 | .cpu().numpy() 86 | heatmaps = batch_heatmaps[i].mul(255)\ 87 | .clamp(0, 255)\ 88 | .byte()\ 89 | .cpu().numpy() 90 | 91 | resized_image = cv2.resize(image, 92 | (int(heatmap_width), int(heatmap_height))) 93 | 94 | height_begin = heatmap_height * i 95 | height_end = heatmap_height * (i + 1) 96 | for j in range(num_joints): 97 | cv2.circle(resized_image, 98 | (int(preds[i][j][0]), int(preds[i][j][1])), 99 | 1, [0, 0, 255], 1) 100 | heatmap = heatmaps[j, :, :] 101 | colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) 102 | masked_image = colored_heatmap*0.7 + resized_image*0.3 103 | cv2.circle(masked_image, 104 | (int(preds[i][j][0]), int(preds[i][j][1])), 105 | 1, [0, 0, 255], 1) 106 | 107 | width_begin = heatmap_width * (j+1) 108 | width_end = heatmap_width * (j+2) 109 | grid_image[height_begin:height_end, width_begin:width_end, :] = \ 110 | masked_image 111 | # grid_image[height_begin:height_end, width_begin:width_end, :] = \ 112 | # colored_heatmap*0.7 + resized_image*0.3 113 | 114 | grid_image[height_begin:height_end, 0:heatmap_width, :] = resized_image 115 | 116 | cv2.imwrite(file_name, grid_image) 117 | 118 | 119 | def save_debug_images(config, input, meta, target, joints_pred, output, 120 | prefix): 121 | if not config.DEBUG.DEBUG: 122 | return 123 | 124 | if config.DEBUG.SAVE_BATCH_IMAGES_GT: 125 | save_batch_image_with_joints( 126 | input, meta['joints'], meta['joints_vis'], 127 | '{}_gt.jpg'.format(prefix) 128 | ) 129 | if config.DEBUG.SAVE_BATCH_IMAGES_PRED: 130 | save_batch_image_with_joints( 131 | input, joints_pred, meta['joints_vis'], 132 | '{}_pred.jpg'.format(prefix) 133 | ) 134 | if config.DEBUG.SAVE_HEATMAPS_GT: 135 | save_batch_heatmaps( 136 | input, target, '{}_hm_gt.jpg'.format(prefix) 137 | ) 138 | if config.DEBUG.SAVE_HEATMAPS_PRED: 139 | save_batch_heatmaps( 140 | input, output, '{}_hm_pred.jpg'.format(prefix) 141 | ) 142 | -------------------------------------------------------------------------------- /lib/utils/zipreader.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import zipfile 13 | import xml.etree.ElementTree as ET 14 | 15 | import cv2 16 | import numpy as np 17 | 18 | _im_zfile = [] 19 | _xml_path_zip = [] 20 | _xml_zfile = [] 21 | 22 | 23 | def imread(filename, flags=cv2.IMREAD_COLOR): 24 | global _im_zfile 25 | path = filename 26 | pos_at = path.index('@') 27 | if pos_at == -1: 28 | print("character '@' is not found from the given path '%s'"%(path)) 29 | assert 0 30 | path_zip = path[0: pos_at] 31 | path_img = path[pos_at + 2:] 32 | if not os.path.isfile(path_zip): 33 | print("zip file '%s' is not found"%(path_zip)) 34 | assert 0 35 | for i in range(len(_im_zfile)): 36 | if _im_zfile[i]['path'] == path_zip: 37 | data = _im_zfile[i]['zipfile'].read(path_img) 38 | return cv2.imdecode(np.frombuffer(data, np.uint8), flags) 39 | 40 | _im_zfile.append({ 41 | 'path': path_zip, 42 | 'zipfile': zipfile.ZipFile(path_zip, 'r') 43 | }) 44 | data = _im_zfile[-1]['zipfile'].read(path_img) 45 | 46 | return cv2.imdecode(np.frombuffer(data, np.uint8), flags) 47 | 48 | 49 | def xmlread(filename): 50 | global _xml_path_zip 51 | global _xml_zfile 52 | path = filename 53 | pos_at = path.index('@') 54 | if pos_at == -1: 55 | print("character '@' is not found from the given path '%s'"%(path)) 56 | assert 0 57 | path_zip = path[0: pos_at] 58 | path_xml = path[pos_at + 2:] 59 | if not os.path.isfile(path_zip): 60 | print("zip file '%s' is not found"%(path_zip)) 61 | assert 0 62 | for i in xrange(len(_xml_path_zip)): 63 | if _xml_path_zip[i] == path_zip: 64 | data = _xml_zfile[i].open(path_xml) 65 | return ET.fromstring(data.read()) 66 | _xml_path_zip.append(path_zip) 67 | print("read new xml file '%s'"%(path_zip)) 68 | _xml_zfile.append(zipfile.ZipFile(path_zip, 'r')) 69 | data = _xml_zfile[-1].open(path_xml) 70 | return ET.fromstring(data.read()) 71 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | shapely==1.6.4 2 | Cython 3 | pyyaml 4 | json_tricks 5 | yacs>=0.1.5 6 | -------------------------------------------------------------------------------- /tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # pose.pytorch 3 | # Copyright (c) 2018-present Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import os.path as osp 13 | import sys 14 | 15 | 16 | def add_path(path): 17 | if path not in sys.path: 18 | sys.path.insert(0, path) 19 | 20 | 21 | this_dir = osp.dirname(__file__) 22 | 23 | lib_path = osp.join(this_dir, '..', 'lib') 24 | add_path(lib_path) 25 | 26 | mm_path = osp.join(this_dir, '..', 'lib/poseeval/py-motmetrics') 27 | add_path(mm_path) 28 | -------------------------------------------------------------------------------- /tools/test.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | # ------------------------------------------------------------------------------ 8 | # Updated by cavalleria (cavalleria@gmail.com) 9 | # ------------------------------------------------------------------------------ 10 | 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import argparse 16 | import os 17 | import pprint 18 | 19 | import torch 20 | import torch.nn.parallel 21 | import torch.backends.cudnn as cudnn 22 | import torch.optim 23 | import torch.utils.data 24 | import torch.utils.data.distributed 25 | import torchvision.transforms as transforms 26 | 27 | import _init_paths 28 | from config import cfg 29 | from config import update_config 30 | from core.loss import JointsMSELoss 31 | from core.function import validate 32 | from utils.utils import create_logger 33 | 34 | import dataset 35 | import models 36 | 37 | 38 | def parse_args(): 39 | parser = argparse.ArgumentParser(description='Train keypoints network') 40 | # general 41 | parser.add_argument('--cfg', 42 | help='experiment configure file name', 43 | required=True, 44 | type=str) 45 | 46 | parser.add_argument('opts', 47 | help="Modify config options using the command-line", 48 | default=None, 49 | nargs=argparse.REMAINDER) 50 | 51 | parser.add_argument('--modelDir', 52 | help='model directory', 53 | type=str, 54 | default='') 55 | parser.add_argument('--logDir', 56 | help='log directory', 57 | type=str, 58 | default='') 59 | parser.add_argument('--dataDir', 60 | help='data directory', 61 | type=str, 62 | default='') 63 | parser.add_argument('--prevModelDir', 64 | help='prev Model directory', 65 | type=str, 66 | default='') 67 | 68 | args = parser.parse_args() 69 | return args 70 | 71 | 72 | def main(): 73 | args = parse_args() 74 | update_config(cfg, args) 75 | 76 | logger, final_output_dir, tb_log_dir = create_logger( 77 | cfg, args.cfg, 'valid') 78 | 79 | logger.info(pprint.pformat(args)) 80 | logger.info(cfg) 81 | 82 | # cudnn related setting 83 | cudnn.benchmark = cfg.CUDNN.BENCHMARK 84 | torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC 85 | torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED 86 | 87 | model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( 88 | cfg, is_train=False 89 | ) 90 | 91 | if cfg.TEST.MODEL_FILE: 92 | logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) 93 | model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False) 94 | else: 95 | model_state_file = os.path.join( 96 | final_output_dir, 'final_state.pth' 97 | ) 98 | logger.info('=> loading model from {}'.format(model_state_file)) 99 | model.load_state_dict(torch.load(model_state_file)) 100 | 101 | model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() 102 | 103 | # define loss function (criterion) and optimizer 104 | criterion = JointsMSELoss( 105 | use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT 106 | ).cuda() 107 | 108 | # Data loading code 109 | normalize = transforms.Normalize( 110 | mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] 111 | ) 112 | valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)( 113 | cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, 114 | transforms.Compose([ 115 | transforms.ToTensor(), 116 | normalize, 117 | ]) 118 | ) 119 | valid_loader = torch.utils.data.DataLoader( 120 | valid_dataset, 121 | batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS), 122 | shuffle=False, 123 | num_workers=cfg.WORKERS, 124 | pin_memory=True 125 | ) 126 | 127 | # evaluate on validation set 128 | validate(cfg, valid_loader, valid_dataset, model, criterion, 129 | final_output_dir, tb_log_dir) 130 | 131 | 132 | if __name__ == '__main__': 133 | main() 134 | -------------------------------------------------------------------------------- /tools/train.ori.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import argparse 12 | import os 13 | import pprint 14 | import shutil 15 | 16 | import torch 17 | import torch.nn.parallel 18 | import torch.backends.cudnn as cudnn 19 | import torch.optim 20 | import torch.utils.data 21 | import torch.utils.data.distributed 22 | import torchvision.transforms as transforms 23 | from tensorboardX import SummaryWriter 24 | 25 | import _init_paths 26 | from config import cfg 27 | from config import update_config 28 | from core.loss import JointsMSELoss 29 | from core.function import train 30 | from core.function import validate 31 | from utils.utils import get_optimizer 32 | from utils.utils import save_checkpoint 33 | from utils.utils import create_logger 34 | from utils.utils import get_model_summary 35 | 36 | import dataset 37 | import models 38 | 39 | 40 | def parse_args(): 41 | parser = argparse.ArgumentParser(description='Train keypoints network') 42 | # general 43 | parser.add_argument('--cfg', 44 | help='experiment configure file name', 45 | required=True, 46 | type=str) 47 | 48 | parser.add_argument('opts', 49 | help="Modify config options using the command-line", 50 | default=None, 51 | nargs=argparse.REMAINDER) 52 | 53 | # philly 54 | parser.add_argument('--modelDir', 55 | help='model directory', 56 | type=str, 57 | default='') 58 | parser.add_argument('--logDir', 59 | help='log directory', 60 | type=str, 61 | default='') 62 | parser.add_argument('--dataDir', 63 | help='data directory', 64 | type=str, 65 | default='') 66 | parser.add_argument('--prevModelDir', 67 | help='prev Model directory', 68 | type=str, 69 | default='') 70 | 71 | args = parser.parse_args() 72 | 73 | return args 74 | 75 | 76 | def main(): 77 | args = parse_args() 78 | update_config(cfg, args) 79 | 80 | logger, final_output_dir, tb_log_dir = create_logger( 81 | cfg, args.cfg, 'train') 82 | 83 | logger.info(pprint.pformat(args)) 84 | logger.info(cfg) 85 | 86 | # cudnn related setting 87 | cudnn.benchmark = cfg.CUDNN.BENCHMARK 88 | torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC 89 | torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED 90 | 91 | model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( 92 | cfg, is_train=True 93 | ) 94 | 95 | # copy model file 96 | this_dir = os.path.dirname(__file__) 97 | shutil.copy2( 98 | os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), 99 | final_output_dir) 100 | # logger.info(pprint.pformat(model)) 101 | 102 | writer_dict = { 103 | 'writer': SummaryWriter(log_dir=tb_log_dir), 104 | 'train_global_steps': 0, 105 | 'valid_global_steps': 0, 106 | } 107 | 108 | dump_input = torch.rand( 109 | (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]) 110 | ) 111 | writer_dict['writer'].add_graph(model, (dump_input, )) 112 | 113 | logger.info(get_model_summary(model, dump_input)) 114 | 115 | model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() 116 | 117 | # define loss function (criterion) and optimizer 118 | criterion = JointsMSELoss( 119 | use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT 120 | ).cuda() 121 | 122 | # Data loading code 123 | normalize = transforms.Normalize( 124 | mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] 125 | ) 126 | train_dataset = eval('dataset.'+cfg.DATASET.DATASET)( 127 | cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, 128 | transforms.Compose([ 129 | transforms.ToTensor(), 130 | normalize, 131 | ]) 132 | ) 133 | valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)( 134 | cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, 135 | transforms.Compose([ 136 | transforms.ToTensor(), 137 | normalize, 138 | ]) 139 | ) 140 | 141 | train_loader = torch.utils.data.DataLoader( 142 | train_dataset, 143 | batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS), 144 | shuffle=cfg.TRAIN.SHUFFLE, 145 | num_workers=cfg.WORKERS, 146 | pin_memory=cfg.PIN_MEMORY 147 | ) 148 | valid_loader = torch.utils.data.DataLoader( 149 | valid_dataset, 150 | batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS), 151 | shuffle=False, 152 | num_workers=cfg.WORKERS, 153 | pin_memory=cfg.PIN_MEMORY 154 | ) 155 | 156 | best_perf = 0.0 157 | best_model = False 158 | last_epoch = -1 159 | optimizer = get_optimizer(cfg, model) 160 | begin_epoch = cfg.TRAIN.BEGIN_EPOCH 161 | checkpoint_file = os.path.join( 162 | final_output_dir, 'checkpoint.pth' 163 | ) 164 | 165 | if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): 166 | logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) 167 | checkpoint = torch.load(checkpoint_file) 168 | begin_epoch = checkpoint['epoch'] 169 | best_perf = checkpoint['perf'] 170 | last_epoch = checkpoint['epoch'] 171 | model.load_state_dict(checkpoint['state_dict']) 172 | 173 | optimizer.load_state_dict(checkpoint['optimizer']) 174 | logger.info("=> loaded checkpoint '{}' (epoch {})".format( 175 | checkpoint_file, checkpoint['epoch'])) 176 | 177 | lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( 178 | optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, 179 | last_epoch=last_epoch 180 | ) 181 | 182 | for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): 183 | lr_scheduler.step() 184 | 185 | # train for one epoch 186 | train(args, cfg, train_loader, model, criterion, optimizer, epoch, 187 | final_output_dir, tb_log_dir, writer_dict) 188 | 189 | 190 | # evaluate on validation set 191 | perf_indicator = validate( 192 | cfg, valid_loader, valid_dataset, model, criterion, 193 | final_output_dir, tb_log_dir, writer_dict 194 | ) 195 | 196 | if perf_indicator >= best_perf: 197 | best_perf = perf_indicator 198 | best_model = True 199 | else: 200 | best_model = False 201 | 202 | logger.info('=> saving checkpoint to {}'.format(final_output_dir)) 203 | save_checkpoint({ 204 | 'epoch': epoch + 1, 205 | 'model': cfg.MODEL.NAME, 206 | 'state_dict': model.state_dict(), 207 | 'best_state_dict': model.module.state_dict(), 208 | 'perf': perf_indicator, 209 | 'optimizer': optimizer.state_dict(), 210 | }, best_model, final_output_dir) 211 | 212 | final_model_state_file = os.path.join( 213 | final_output_dir, 'final_state.pth' 214 | ) 215 | logger.info('=> saving final model state to {}'.format( 216 | final_model_state_file) 217 | ) 218 | torch.save(model.module.state_dict(), final_model_state_file) 219 | writer_dict['writer'].close() 220 | 221 | 222 | if __name__ == '__main__': 223 | main() -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | # ------------------------------------------------------------------------------ 8 | # Updated by cavalleria (cavalleria@gmail.com) 9 | # ------------------------------------------------------------------------------ 10 | 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import argparse 16 | import os 17 | import pprint 18 | import shutil 19 | import warnings 20 | import random 21 | import numpy as np 22 | 23 | import torch 24 | import torch.backends.cudnn as cudnn 25 | import torch.distributed as dist 26 | import torch.multiprocessing as mp 27 | import torch.nn as nn 28 | import torch.nn.parallel 29 | import torch.optim 30 | import torch.utils.data 31 | import torch.utils.data.distributed 32 | import torchvision.transforms as transforms 33 | from tensorboardX import SummaryWriter 34 | 35 | import _init_paths 36 | import dataset 37 | import models 38 | from tqdm import tqdm 39 | 40 | from config import cfg 41 | from config import update_config 42 | from core.loss import JointsMSELoss 43 | from core.function import train 44 | from core.function import validate 45 | 46 | from utils.utils import create_logger 47 | from utils.utils import get_optimizer 48 | from utils.utils import save_checkpoint 49 | from utils.utils import setup_logger 50 | from utils.utils import get_model_summary 51 | 52 | 53 | def parse_args(): 54 | parser = argparse.ArgumentParser(description='Train keypoints network') 55 | # general 56 | parser.add_argument('--cfg', 57 | help='experiment configure file name', 58 | required=True, 59 | type=str) 60 | parser.add_argument('opts', 61 | help="Modify config options using the command-line", 62 | default=None, 63 | nargs=argparse.REMAINDER) 64 | parser.add_argument('--seed', 65 | help='random seed', 66 | default=1337, 67 | type=int) 68 | 69 | parser.add_argument('--gpu', 70 | help='gpu id for multiprocessing training', 71 | type=str) 72 | parser.add_argument('--world-size', 73 | default=1, 74 | type=int, 75 | help='number of nodes for distributed training') 76 | parser.add_argument('--rank', 77 | default=0, 78 | type=int, 79 | help='node rank for distributed training') 80 | args = parser.parse_args() 81 | 82 | return args 83 | 84 | def set_seed(seed): 85 | random.seed(seed) 86 | np.random.seed(seed) 87 | torch.manual_seed(seed) 88 | torch.cuda.manual_seed(seed) 89 | torch.cuda.manual_seed_all(seed) 90 | 91 | def main(): 92 | args = parse_args() 93 | set_seed(int(args.seed)) 94 | update_config(cfg, args) 95 | 96 | cfg.defrost() 97 | cfg.RANK = args.rank 98 | cfg.freeze() 99 | 100 | logger, final_output_dir, tb_log_dir = create_logger(cfg, args.cfg, 'train') 101 | 102 | logger.info(pprint.pformat(args)) 103 | logger.info(cfg) 104 | 105 | # cudnn related setting 106 | cudnn.benchmark = cfg.CUDNN.BENCHMARK 107 | torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC 108 | torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED 109 | 110 | ngpus_per_node = torch.cuda.device_count() 111 | 112 | args.world_size = ngpus_per_node * args.world_size 113 | mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args, final_output_dir, tb_log_dir)) 114 | 115 | 116 | 117 | def main_worker(gpu, ngpus_per_node, args, final_output_dir, tb_log_dir): 118 | 119 | args.gpu = gpu 120 | args.rank = args.rank * ngpus_per_node + gpu 121 | print('Init process group: dist_url: {}, world_size: {}, rank: {}'.format(cfg.DIST_URL, args.world_size, args.rank)) 122 | dist.init_process_group(backend=cfg.DIST_BACKEND, init_method=cfg.DIST_URL, world_size=args.world_size, rank=args.rank) 123 | 124 | update_config(cfg, args) 125 | 126 | # setup logger 127 | logger, _ = setup_logger(final_output_dir, args.rank, 'train') 128 | 129 | model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(cfg, is_train=True) 130 | logger.info(get_model_summary(model, torch.zeros(1, 3, *cfg.MODEL.IMAGE_SIZE))) 131 | 132 | # copy model file 133 | if not cfg.MULTIPROCESSING_DISTRIBUTED or (cfg.MULTIPROCESSING_DISTRIBUTED and args.rank % ngpus_per_node == 0): 134 | this_dir = os.path.dirname(__file__) 135 | shutil.copy2(os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) 136 | 137 | writer_dict = { 138 | 'writer': SummaryWriter(log_dir=tb_log_dir), 139 | 'train_global_steps': 0, 140 | 'valid_global_steps': 0, 141 | } 142 | 143 | if not cfg.MULTIPROCESSING_DISTRIBUTED or (cfg.MULTIPROCESSING_DISTRIBUTED and args.rank % ngpus_per_node == 0): 144 | dump_input = torch.rand((1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) 145 | writer_dict['writer'].add_graph(model, (dump_input, )) 146 | # logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) 147 | 148 | if cfg.MODEL.SYNC_BN: 149 | model = nn.SyncBatchNorm.convert_sync_batchnorm(model) 150 | 151 | torch.cuda.set_device(args.gpu) 152 | model.cuda(args.gpu) 153 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) 154 | 155 | # define loss function (criterion) and optimizer 156 | criterion = JointsMSELoss(use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda(args.gpu) 157 | 158 | # Data loading code 159 | train_dataset = eval('dataset.'+cfg.DATASET.DATASET)( 160 | cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, 161 | transforms.Compose([ 162 | transforms.ToTensor(), 163 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 164 | ]) 165 | ) 166 | valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)( 167 | cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, 168 | transforms.Compose([ 169 | transforms.ToTensor(), 170 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 171 | ]) 172 | ) 173 | 174 | train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) 175 | train_loader = torch.utils.data.DataLoader( 176 | train_dataset, 177 | batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS), 178 | shuffle=(train_sampler is None), 179 | num_workers=cfg.WORKERS, 180 | pin_memory=cfg.PIN_MEMORY, 181 | sampler=train_sampler 182 | ) 183 | 184 | valid_loader = torch.utils.data.DataLoader( 185 | valid_dataset, 186 | batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS), 187 | shuffle=False, 188 | num_workers=cfg.WORKERS, 189 | pin_memory=cfg.PIN_MEMORY 190 | ) 191 | logger.info(train_loader.dataset) 192 | 193 | best_perf = -1 194 | best_model = False 195 | last_epoch = -1 196 | optimizer = get_optimizer(cfg, model) 197 | begin_epoch = cfg.TRAIN.BEGIN_EPOCH 198 | checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') 199 | if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): 200 | logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) 201 | checkpoint = torch.load(checkpoint_file) 202 | begin_epoch = checkpoint['epoch'] 203 | best_perf = checkpoint['perf'] 204 | last_epoch = checkpoint['epoch'] 205 | model.load_state_dict(checkpoint['state_dict']) 206 | 207 | optimizer.load_state_dict(checkpoint['optimizer']) 208 | logger.info("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_file, checkpoint['epoch'])) 209 | 210 | lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( 211 | optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, 212 | last_epoch=last_epoch) 213 | 214 | for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): 215 | 216 | # train for one epoch 217 | train(cfg, train_loader, model, criterion, optimizer, epoch, 218 | final_output_dir, tb_log_dir, writer_dict) 219 | # In PyTorch 1.1.0 and later, you should call `lr_scheduler.step()` after `optimizer.step()`. 220 | lr_scheduler.step() 221 | 222 | # evaluate on validation set 223 | perf_indicator = validate( 224 | args, cfg, valid_loader, valid_dataset, model, criterion, 225 | final_output_dir, tb_log_dir, writer_dict 226 | ) 227 | 228 | if perf_indicator >= best_perf: 229 | best_perf = perf_indicator 230 | best_model = True 231 | else: 232 | best_model = False 233 | 234 | if not cfg.MULTIPROCESSING_DISTRIBUTED or ( 235 | cfg.MULTIPROCESSING_DISTRIBUTED 236 | and args.rank == 0 237 | ): 238 | logger.info('=> saving checkpoint to {}'.format(final_output_dir)) 239 | save_checkpoint({ 240 | 'epoch': epoch + 1, 241 | 'model': cfg.MODEL.NAME, 242 | 'state_dict': model.state_dict(), 243 | 'best_state_dict': model.module.state_dict(), 244 | 'perf': perf_indicator, 245 | 'optimizer': optimizer.state_dict(), 246 | }, best_model, final_output_dir) 247 | 248 | final_model_state_file = os.path.join( 249 | final_output_dir, 'final_state{}.pth.tar'.format(gpu) 250 | ) 251 | 252 | logger.info('saving final model state to {}'.format( 253 | final_model_state_file)) 254 | torch.save(model.module.state_dict(), final_model_state_file) 255 | writer_dict['writer'].close() 256 | 257 | 258 | if __name__ == '__main__': 259 | main() -------------------------------------------------------------------------------- /train_coco_w18_v1.sh: -------------------------------------------------------------------------------- 1 | 2 | CUDA_VISIBLE_DEVICES="0,1,2,3" python tools/train.py \ 3 | --cfg experiments/coco/hrnet/w18_small_v1_256x192_adam_lr1e-3.yaml 4 | -------------------------------------------------------------------------------- /train_coco_w18_v2.sh: -------------------------------------------------------------------------------- 1 | 2 | CUDA_VISIBLE_DEVICES="0,1,2,3" python tools/train.py \ 3 | --cfg experiments/coco/hrnet/w18_small_v2_256x192_adam_lr1e-3_softargmax.yaml 4 | -------------------------------------------------------------------------------- /train_coco_w32.sh: -------------------------------------------------------------------------------- 1 | 2 | CUDA_VISIBLE_DEVICES="0,1,2,3" python tools/train.py \ 3 | --cfg experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml 4 | -------------------------------------------------------------------------------- /train_lpn.sh: -------------------------------------------------------------------------------- 1 | 2 | CUDA_VISIBLE_DEVICES="0,1,2,3" python tools/train.py \ 3 | --cfg experiments/coco/lpn/lpn50_256x192_gd256x2_gc.yaml 4 | -------------------------------------------------------------------------------- /train_mpii.sh: -------------------------------------------------------------------------------- 1 | 2 | CUDA_VISIBLE_DEVICES="0,1,2,3" python tools/train.py \ 3 | --cfg experiments/mpii/hrnet/w48_256x256_adam_lr1e-3.yaml 4 | --------------------------------------------------------------------------------