├── .gitignore
├── LICENSE
├── README.md
├── demo
    ├── .gitignore
    ├── Dockerfile
    ├── README.md
    ├── build-docker.sh
    ├── demo.sh
    ├── inference-config-w18_v1.yaml
    ├── inference-config-w18_v2.yaml
    ├── inference-config.yaml
    └── inference.py
├── experiments
    ├── coco
    │   ├── hrnet
    │   │   ├── w18_small_v1_256x192_adam_lr1e-3.yaml
    │   │   ├── w18_small_v2_256x192_adam_lr1e-3.yaml
    │   │   ├── w18_small_v2_256x192_adam_lr1e-3_softargmax.yaml
    │   │   ├── w32_256x192_adam_lr1e-3.yaml
    │   │   ├── w32_384x288_adam_lr1e-3.yaml
    │   │   ├── w48_256x192_adam_lr1e-3.yaml
    │   │   └── w48_384x288_adam_lr1e-3.yaml
    │   ├── lpn
    │   │   ├── lpn100_256x192_gd256x2_gc.yaml
    │   │   ├── lpn101_256x192_gd256x2_gc.yaml
    │   │   ├── lpn152_256x192_gd256x2_gc.yaml
    │   │   ├── lpn18_256x192_gd256x2_gc.yaml
    │   │   ├── lpn18h_256x192_gd256x2_gc.yaml
    │   │   ├── lpn34_256x192_gd256x2_gc.yaml
    │   │   ├── lpn34h_256x192_gd256x2_gc.yaml
    │   │   └── lpn50_256x192_gd256x2_gc.yaml
    │   └── resnet
    │   │   ├── res101_256x192_d256x3_adam_lr1e-3.yaml
    │   │   ├── res101_384x288_d256x3_adam_lr1e-3.yaml
    │   │   ├── res152_256x192_d256x3_adam_lr1e-3.yaml
    │   │   ├── res152_384x288_d256x3_adam_lr1e-3.yaml
    │   │   ├── res50_256x192_d256x3_adam_lr1e-3.yaml
    │   │   └── res50_384x288_d256x3_adam_lr1e-3.yaml
    └── mpii
    │   ├── hrnet
    │       ├── w32_256x256_adam_lr1e-3.yaml
    │       └── w48_256x256_adam_lr1e-3.yaml
    │   └── resnet
    │       ├── res101_256x256_d256x3_adam_lr1e-3.yaml
    │       ├── res152_256x256_d256x3_adam_lr1e-3.yaml
    │       └── res50_256x256_d256x3_adam_lr1e-3.yaml
├── lib
    ├── Makefile
    ├── config
    │   ├── __init__.py
    │   ├── default.py
    │   └── models.py
    ├── core
    │   ├── evaluate.py
    │   ├── function.py
    │   ├── inference.py
    │   └── loss.py
    ├── dataset
    │   ├── JointsDataset.py
    │   ├── __init__.py
    │   ├── coco.py
    │   └── mpii.py
    ├── models
    │   ├── __init__.py
    │   ├── lightweight_modules.py
    │   ├── lpn.py
    │   ├── pose_hrnet.py
    │   └── pose_resnet.py
    ├── nms
    │   ├── __init__.py
    │   ├── cpu_nms.c
    │   ├── cpu_nms.pyx
    │   ├── gpu_nms.cpp
    │   ├── gpu_nms.cu
    │   ├── gpu_nms.hpp
    │   ├── gpu_nms.pyx
    │   ├── nms.py
    │   ├── nms_kernel.cu
    │   └── setup_linux.py
    └── utils
    │   ├── __init__.py
    │   ├── transforms.py
    │   ├── utils.py
    │   ├── vis.py
    │   └── zipreader.py
├── requirements.txt
├── tools
    ├── _init_paths.py
    ├── test.py
    ├── train.ori.py
    └── train.py
├── train_coco_w18_v1.sh
├── train_coco_w18_v2.sh
├── train_coco_w32.sh
├── train_lpn.sh
├── train_mpii.sh
└── visualization
    └── plot_coco.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | #lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # custom
132 | models/*
133 | .history
134 | demo/*.mp4
135 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 cavalleria
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # humanpose.pytorch
  2 | 
  3 | ## Introduction
  4 | 
  5 | This is an human pose estimation pytorch implementation derivated from [deep-high-resolution-net.pytorch](https://github.com/leoxiaobin/deep-high-resolution-net.pytorch), aims to achieve lightweight real-time application.
  6 | 
  7 | ## Features
  8 | 
  9 | - [x] It support Distributed DataParallel training, much faster than origin repo.
 10 | - [x] support lightweight pose backbones.
 11 | - [x] support lightweight mobile hunman detector [yolov3mobile](https://github.com/cavalleria/yolov3mobile).
 12 | 
 13 | ## Main Results
 14 | 
 15 | ### Results on MPII val
 16 | 
 17 | | Arch               | Head | Shoulder | Elbow | Wrist |  Hip | Knee | Ankle | Mean | Mean@0.1 |
 18 | |--------------------|------|----------|-------|-------|------|------|-------|------|----------|
 19 | | **pose_hrnet_w32** | 97.067 | 95.686 | 90.21 | 85.644 | 89.077 | 85.795 | 82.711 | 89.927 | 37.931 |
 20 | | **pose_hrnet_w48** | 96.930 | 95.771 | 90.864 | 86.329 | 88.731 | 86.862 | 82.829 | 90.208 | 38.002 |
 21 | 
 22 | ### Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
 23 | 
 24 | | Arch               | Input size | #Params | FLOPs | Weight size | AP | Ap .5 | AP .75 | AP (M) | AP (L) | AR | AR .5 | AR .75 | AR (M) | AR (L) |
 25 | |--------------------|------------|-------|-------|-------|-------|-------|--------|--------|--------|-------|-------|--------|--------|--------|
 26 | | **pose_hrnet_w18_v1** | 256x192 | 1.3M  | 0.68G | 5.3M | 0.572 | 0.863 | 0.644 | 0.545 | 0.614 | 0.612 | 0.876 | 0.687 | 0.579 | 0.661 |
 27 | | **pose_hrnet_w18_v2** | 256x192 | 3.7M  | 1.8G  | 15M | 0.710 | 0.916 | 0.784 | 0.685 | 0.753 | 0.740 | 0.922 | 0.806 | 0.710 | 0.786 |
 28 | | **pose_hrnet_w18_v2_softargmax** | 256x192 | 3.7M  | 1.8G  | 15M | 0.713 | 0.916 | 0.783 | 0.685 | 0.758 | 0.743 | 0.923 | 0.809 | 0.711 | 0.792 |
 29 | | **pose_hrnet_w32**    | 256x192 | 28.5M | 7.1G  | 110M | 0.765 | 0.936 | 0.838 | 0.740 | 0.810 | 0.794 | 0.945 | 0.858 | 0.763 | 0.842 |
 30 | | **lpn_18**            | 256x192 | 0.47M | 0.42G | 1.9M | 0.445 | 0.773 | 0.445 | 0.434 | 0.467 | 0.497 | 0.798 | 0.519 | 0.474 | 0.531 |
 31 | | **lpn_18h**           | 256x192 | 0.50M | 0.43G | 2.1M | 0.486 | 0.806 | 0.506 | 0.472 | 0.511 | 0.533 | 0.821 | 0.567 | 0.508 | 0.570 |
 32 | | **lpn_34**            | 256x192 | 0.59M | 0.43G | 2.5M | 0.493 | 0.808 | 0.522 | 0.478 | 0.515 | 0.538 | 0.825 | 0.577 | 0.514 | 0.573 |
 33 | | **lpn_34h**           | 256x192 | 0.66M | 0.46G | 2.7M | 0.536 | 0.830 | 0.579 | 0.520  | 0.564 | 0.579 | 0.849 | 0.630 | 0.552 | 0.618 |
 34 | | **lpn_50**            | 256x192 | 2.9M | 1.0G | 12M | 0.684 | 0.904 | 0.762 | 0.659 | 0.724 | 0.717 | 0.914 | 0.789 | 0.687 | 0.763 |
 35 | | **lpn_100**           | 256x192 | 6.7M | 1.8G | 27M | 0.721 | 0.915 | 0.805 | 0.699 | 0.764 | 0.754 | 0.929 | 0.825 | 0.725 | 0.799 |
 36 | 
 37 | 
 38 | ### Iterative training strategy
 39 | 
 40 | | lpn18h | AP | Ap .5 | AP .75 | AP (M) | AP (L) | AR | AR .5 | AR .75 | AR (M) | AR (L) |
 41 | |--------|--------|-------|--------|-------|--------|-------|--------|-------|--------|-------|
 42 | | stage0 | 0.486 | 0.806 | 0.506 | 0.472 | 0.511 | 0.533 | 0.821 | 0.567 | 0.508 | 0.570 |
 43 | | stage1 | 0.496 | 0.807 | 0.521 | 0.483 | 0.521 | 0.541 | 0.822 | 0.577 | 0.517 | 0.577 |
 44 | | stage2 | 0.505 | 0.808 | 0.540 | 0.491 | 0.529 | 0.549 | 0.825 | 0.591 | 0.524 | 0.586 |
 45 | | stage3 | 0.510 | 0.819 | 0.542 | 0.497 | 0.536 | 0.555 | 0.832 | 0.598 | 0.530 | 0.591 |
 46 | | stage4 | 0.514 | 0.819 | 0.543 | 0.500 | 0.538 | 0.558 | 0.832 | 0.599 | 0.533 | 0.595 |
 47 | | stage5 | 0.517 | 0.819 | 0.553 | 0.500 | 0.544 | 0.559 | 0.834 | 0.602 | 0.533 | 0.598 |
 48 | | stage6 | 0.520 | 0.820 | 0.557 | 0.503 | 0.546 | 0.563 | 0.836 | 0.607 | 0.537 | 0.601 |
 49 | 
 50 | 
 51 | ## Environment
 52 | 
 53 | The code is developed using python 3.6 on Ubuntu 16.04. NVIDIA GPUs are needed. The code is developed and tested using 8 NVIDIA V100 GPU cards. Other platforms or GPU cards are not fully tested.
 54 | 
 55 | ## Quick start
 56 | 
 57 | ### Installation
 58 | 
 59 | 1. Install pytorch >= v1.0.0 following [official instruction](https://pytorch.org/).
 60 | 2. Clone this repo, and we'll call the directory that you cloned as ${POSE_ROOT}.
 61 | 3. Install dependencies:
 62 | 
 63 |     ```
 64 |     pip install -r requirements.txt
 65 |     ```
 66 | 4. Make libs:
 67 | 
 68 |    ```
 69 |    cd ${POSE_ROOT}/lib
 70 |    make
 71 |    ```
 72 | 5. Install [COCOAPI](https://github.com/cocodataset/cocoapi):
 73 |    ```
 74 |    # COCOAPI=/path/to/clone/cocoapi
 75 |    git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
 76 |    cd $COCOAPI/PythonAPI
 77 |    # Install into global site-packages
 78 |    make install
 79 |    # Alternatively, if you do not have permissions or prefer
 80 |    # not to install the COCO API into global site-packages
 81 |    python3 setup.py install --user
 82 |    ```
 83 |    Note that instructions like # COCOAPI=/path/to/install/cocoapi indicate that you should pick a path where you'd like to have the software cloned and then set an environment variable (COCOAPI in this case) accordingly.
 84 | 6. Init output(training model output directory) and log(tensorboard log directory) directory:
 85 | 
 86 |    ```
 87 |    mkdir output 
 88 |    mkdir log
 89 |    ```
 90 | 
 91 |    Your directory tree should look like this:
 92 | 
 93 |    ```
 94 |    ${POSE_ROOT}
 95 |    ├── data
 96 |    ├── experiments
 97 |    ├── lib
 98 |    ├── log
 99 |    ├── models
100 |    ├── output
101 |    ├── tools 
102 |    ├── README.md
103 |    └── requirements.txt
104 |    ```
105 | 
106 | 7. Download pretrained models from our model zoo([GoogleDrive](https://drive.google.com/drive/folders/1hOTihvbyIxsm5ygDpbUuJ7O_tzv4oXjC?usp=sharing) or [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blW231MH2krnmLq5kkQ))
107 |    ```
108 |    ${POSE_ROOT}
109 |     `-- models
110 |         `-- pytorch
111 |             |-- imagenet
112 |             |   |-- hrnet_w32-36af842e.pth
113 |             |   |-- hrnet_w48-8ef0771d.pth
114 |             |   |-- resnet50-19c8e357.pth
115 |             |-- pose_coco
116 |             |   |-- pose_hrnet_w32_256x192.pth
117 |             |   |-- pose_hrnet_w32_384x288.pth
118 |             |   |-- pose_hrnet_w48_256x192.pth
119 |             |   |-- pose_hrnet_w48_384x288.pth
120 |             |   |-- pose_resnet_50_256x192.pth
121 |             |   `-- pose_resnet_50_384x288.pth
122 |             `-- pose_mpii
123 |                 |-- pose_hrnet_w32_256x256.pth
124 |                 |-- pose_hrnet_w48_256x256.pth
125 |                 `-- pose_resnet_50_256x256.pth
126 | 
127 |    ```
128 |    
129 | ### Data preparation
130 | **For MPII data**, please download from [MPII Human Pose Dataset](http://human-pose.mpi-inf.mpg.de/). The original annotation files are in matlab format. We have converted them into json format, you also need to download them from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blW00SqrairNetmeVu4) or [GoogleDrive](https://drive.google.com/drive/folders/1En_VqmStnsXMdldXA6qpqEyDQulnmS3a?usp=sharing).
131 | Extract them under {POSE_ROOT}/data, and make them look like this:
132 | ```
133 | ${POSE_ROOT}
134 | |-- data
135 | `-- |-- mpii
136 |     `-- |-- annot
137 |         |   |-- gt_valid.mat
138 |         |   |-- test.json
139 |         |   |-- train.json
140 |         |   |-- trainval.json
141 |         |   `-- valid.json
142 |         `-- images
143 |             |-- 000001163.jpg
144 |             |-- 000003072.jpg
145 | ```
146 | 
147 | **For COCO data**, please download from [COCO download](http://cocodataset.org/#download), 2017 Train/Val is needed for COCO keypoints training and validation. We also provide person detection result of COCO val2017 and test-dev2017 to reproduce our multi-person pose estimation results. Please download from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blWzzDXoz5BeFl8sWM-) or [GoogleDrive](https://drive.google.com/drive/folders/1fRUDNUDxe9fjqcRZ2bnF_TKMlO0nB_dk?usp=sharing).
148 | Download and extract them under {POSE_ROOT}/data, and make them look like this:
149 | ```
150 | ${POSE_ROOT}
151 | |-- data
152 | `-- |-- coco
153 |     `-- |-- annotations
154 |         |   |-- person_keypoints_train2017.json
155 |         |   `-- person_keypoints_val2017.json
156 |         |-- person_detection_results
157 |         |   |-- COCO_val2017_detections_AP_H_56_person.json
158 |         |   |-- COCO_test-dev2017_detections_AP_H_609_person.json
159 |         `-- images
160 |             |-- train2017
161 |             |   |-- 000000000009.jpg
162 |             |   |-- 000000000025.jpg
163 |             |   |-- 000000000030.jpg
164 |             |   |-- ... 
165 |             `-- val2017
166 |                 |-- 000000000139.jpg
167 |                 |-- 000000000285.jpg
168 |                 |-- 000000000632.jpg
169 |                 |-- ... 
170 | ```
171 | 
172 | ### Training and Testing
173 | 
174 | #### Testing on MPII dataset using model zoo's models([GoogleDrive](https://drive.google.com/drive/folders/1hOTihvbyIxsm5ygDpbUuJ7O_tzv4oXjC?usp=sharing) or [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blW231MH2krnmLq5kkQ))
175 |  
176 | 
177 | ```
178 | python tools/test.py \
179 |     --cfg experiments/mpii/hrnet/w32_256x256_adam_lr1e-3.yaml \
180 |     TEST.MODEL_FILE models/pytorch/pose_mpii/pose_hrnet_w32_256x256.pth
181 | ```
182 | 
183 | #### Training on MPII dataset
184 | 
185 | ```
186 | python tools/train.py \
187 |     --cfg experiments/mpii/hrnet/w32_256x256_adam_lr1e-3.yaml
188 | ```
189 | 
190 | #### Testing on COCO val2017 dataset using model zoo's models([GoogleDrive](https://drive.google.com/drive/folders/1hOTihvbyIxsm5ygDpbUuJ7O_tzv4oXjC?usp=sharing) or [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blW231MH2krnmLq5kkQ))
191 |  
192 | 
193 | ```
194 | python tools/test.py \
195 |     --cfg experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml \
196 |     TEST.MODEL_FILE models/pytorch/pose_coco/pose_hrnet_w32_256x192.pth \
197 |     TEST.USE_GT_BBOX False
198 | ```
199 | 
200 | #### Training on COCO train2017 dataset
201 | 
202 | ```
203 | python tools/train.py \
204 |     --cfg experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml \
205 | ```
206 | 
207 | ### Visualization
208 | 
209 | #### Visualizing predictions on COCO val
210 | 
211 | ```
212 | python visualization/plot_coco.py \
213 |     --prediction output/coco/w48_384x288_adam_lr1e-3/results/keypoints_val2017_results_0.json \
214 |     --save-path visualization/results
215 | 
216 | ```
217 | 
218 | ## Acknowledgement
219 | 
220 | * This repo is modified and adapted on these great repositories [deep-high-resolution-net.pytorch](https://github.com/leoxiaobin/deep-high-resolution-net.pytorch)
221 | 
222 | ## Contact
223 | 
224 | ```
225 | cavallyb@gmail.com
226 | ```
227 | 
228 | 


--------------------------------------------------------------------------------
/demo/.gitignore:
--------------------------------------------------------------------------------
1 | output
2 | models
3 | videos
4 | 


--------------------------------------------------------------------------------
/demo/Dockerfile:
--------------------------------------------------------------------------------
  1 | FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu16.04
  2 | 
  3 | ENV OPENCV_VERSION="3.4.6"
  4 | 
  5 | # Basic toolchain
  6 | RUN apt-get update && apt-get install -y \
  7 |         apt-utils \
  8 |         build-essential \
  9 |         git \
 10 |         wget \
 11 |         unzip \
 12 |         yasm \
 13 |         pkg-config \
 14 |         libcurl4-openssl-dev \
 15 |         zlib1g-dev \
 16 |         htop \
 17 |         cmake \
 18 |         nano \
 19 |         python3-pip \
 20 |         python3-dev \
 21 |         python3-tk \
 22 |         libx264-dev \
 23 |     && cd /usr/local/bin \
 24 |     && ln -s /usr/bin/python3 python \
 25 |     && pip3 install --upgrade pip \
 26 |     && apt-get autoremove -y
 27 | 
 28 | # Getting OpenCV dependencies available with apt
 29 | RUN apt-get update && apt-get install -y \
 30 |         libeigen3-dev \
 31 |         libjpeg-dev \
 32 |         libpng-dev \
 33 |         libtiff-dev \
 34 |         libjasper-dev \
 35 |         libswscale-dev \
 36 |         libavcodec-dev \
 37 |         libavformat-dev && \
 38 |     apt-get autoremove -y
 39 | 
 40 | # Getting other dependencies
 41 | RUN apt-get update && apt-get install -y \
 42 |         cppcheck \
 43 |         graphviz \
 44 |         doxygen \
 45 |         p7zip-full \
 46 |         libdlib18 \
 47 |         libdlib-dev && \
 48 |     apt-get autoremove -y
 49 | 
 50 | 
 51 | # Install OpenCV + OpenCV contrib (takes forever)
 52 | RUN mkdir -p /tmp && \
 53 |     cd /tmp && \
 54 |     wget --no-check-certificate -O opencv.zip https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.zip && \
 55 |     wget --no-check-certificate -O opencv_contrib.zip https://github.com/opencv/opencv_contrib/archive/${OPENCV_VERSION}.zip && \
 56 |     unzip opencv.zip && \
 57 |     unzip opencv_contrib.zip && \
 58 |     mkdir opencv-${OPENCV_VERSION}/build && \
 59 |     cd opencv-${OPENCV_VERSION}/build && \
 60 |     cmake -D CMAKE_BUILD_TYPE=RELEASE \
 61 |         -D CMAKE_INSTALL_PREFIX=/usr/local \
 62 |         -D WITH_CUDA=ON \
 63 |         -D CUDA_FAST_MATH=1 \
 64 |         -D WITH_CUBLAS=1 \
 65 |         -D WITH_FFMPEG=ON \
 66 |         -D WITH_OPENCL=ON \
 67 |         -D WITH_V4L=ON \
 68 |         -D WITH_OPENGL=ON \
 69 |         -D OPENCV_EXTRA_MODULES_PATH=/tmp/opencv_contrib-${OPENCV_VERSION}/modules \
 70 |         .. && \
 71 |     make -j$(nproc) && \
 72 |     make install && \
 73 |     echo "/usr/local/lib" > /etc/ld.so.conf.d/opencv.conf && \
 74 |     ldconfig && \
 75 |     cd /tmp && \
 76 |     rm -rf opencv-${OPENCV_VERSION} opencv.zip opencv_contrib-${OPENCV_VERSION} opencv_contrib.zip && \
 77 |     cd /
 78 | 
 79 | # Compile and install ffmpeg from source
 80 | RUN git clone https://github.com/FFmpeg/FFmpeg /root/ffmpeg && \
 81 |     cd /root/ffmpeg && \
 82 |     ./configure --enable-gpl --enable-libx264 --enable-nonfree --disable-shared --extra-cflags=-I/usr/local/include && \
 83 |     make -j8 && make install -j8
 84 | 
 85 | # clone deep-high-resolution-net
 86 | ARG POSE_ROOT=/pose_root
 87 | RUN git clone https://github.com/leoxiaobin/deep-high-resolution-net.pytorch.git $POSE_ROOT
 88 | WORKDIR $POSE_ROOT
 89 | RUN mkdir output && mkdir log
 90 | 
 91 | RUN pip3 install -r requirements.txt && \
 92 |     pip3 install torch==1.1.0 \
 93 |     torchvision==0.3.0 \
 94 |     opencv-python \
 95 |     pillow==6.2.1
 96 | 
 97 | # build deep-high-resolution-net lib
 98 | WORKDIR $POSE_ROOT/lib
 99 | RUN make
100 | 
101 | # install COCO API
102 | ARG COCOAPI=/cocoapi
103 | RUN git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
104 | WORKDIR $COCOAPI/PythonAPI
105 | # Install into global site-packages
106 | RUN make install
107 | 
108 | # download fastrrnn pretrained model for person detection
109 | RUN python -c "import torchvision; model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True); model.eval()"
110 | 
111 | COPY inference.py $POSE_ROOT/tools
112 | COPY inference-config.yaml $POSE_ROOT/
113 | 


--------------------------------------------------------------------------------
/demo/README.md:
--------------------------------------------------------------------------------
 1 | # Inference hrnet
 2 | 
 3 | Inferencing the deep-high-resolution-net.pytoch without using Docker. 
 4 | 
 5 | ## Prep
 6 | 1. Download the researchers' pretrained pose estimator from [google drive](https://drive.google.com/drive/folders/1hOTihvbyIxsm5ygDpbUuJ7O_tzv4oXjC?usp=sharing) to this directory under `models/`
 7 | 2. Put the video file you'd like to infer on in this directory under `videos`
 8 | 3. build the docker container in this directory with `./build-docker.sh` (this can take time because it involves compiling opencv)
 9 | 4. update the `inference-config.yaml` file to reflect the number of GPUs you have available
10 | 
11 | ## Running the Model
12 | ```
13 | python inference.py --cfg inference-config.yaml \
14 |     --videoFile ../../multi_people.mp4 \
15 |     --writeBoxFrames \
16 |     --outputDir output \
17 |     TEST.MODEL_FILE ../models/pytorch/pose_coco/pose_hrnet_w32_256x192.pth 
18 | 
19 | ```
20 | 
21 | The above command will create a video under *output* directory and a lot of pose image under *output/pose* directory. 
22 | Even with usage of GPU (GTX1080 in my case), the person detection will take nearly **0.06 sec**, the person pose match will
23 |  take nearly **0.07 sec**. In total. inference time per frame will be **0.13 sec**, nearly 10fps. So if you prefer a real-time (fps >= 20) 
24 |  pose estimation then you should try other approach.
25 | 
26 | ## Result
27 | 
28 | Some output image is as:
29 | 
30 | ![1 person](inference_1.jpg)
31 | Fig: 1 person inference
32 | 
33 | ![3 person](inference_3.jpg)
34 | Fig: 3 person inference
35 | 
36 | ![3 person](inference_5.jpg)
37 | Fig: 3 person inference


--------------------------------------------------------------------------------
/demo/build-docker.sh:
--------------------------------------------------------------------------------
1 | docker build -t hrnet_demo_inference .
2 | 


--------------------------------------------------------------------------------
/demo/demo.sh:
--------------------------------------------------------------------------------
1 | python inference.py --cfg inference-config-w18_v1.yaml \
2 |     --videoFile ./posetest.mp4 \
3 |     --writeBoxFrames \
4 |     --outputDir output \
5 |     TEST.MODEL_FILE ../../output/coco/pose_hrnet/w18_small_v1_256x192_adam_lr1e-3/model_best.pth
6 | 


--------------------------------------------------------------------------------
/demo/inference-config-w18_v1.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: true
  2 | CUDNN:
  3 |   BENCHMARK: true
  4 |   DETERMINISTIC: false
  5 |   ENABLED: true
  6 | DATA_DIR: ''
  7 | GPUS: (0,)
  8 | OUTPUT_DIR: 'output'
  9 | LOG_DIR: 'log'
 10 | WORKERS: 24
 11 | PRINT_FREQ: 100
 12 | 
 13 | DATASET:
 14 |   COLOR_RGB: true
 15 |   DATASET: 'coco'
 16 |   DATA_FORMAT: jpg
 17 |   FLIP: true
 18 |   NUM_JOINTS_HALF_BODY: 8
 19 |   PROB_HALF_BODY: 0.3
 20 |   ROOT: 'data/coco/'
 21 |   ROT_FACTOR: 45
 22 |   SCALE_FACTOR: 0.35
 23 |   TEST_SET: 'val2017'
 24 |   TRAIN_SET: 'train2017'
 25 | MODEL:
 26 |   INIT_WEIGHTS: true
 27 |   NAME: pose_hrnet
 28 |   NUM_JOINTS: 17
 29 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
 30 |   TARGET_TYPE: gaussian
 31 |   IMAGE_SIZE:
 32 |   - 288
 33 |   - 384
 34 |   HEATMAP_SIZE:
 35 |   - 72
 36 |   - 96
 37 |   SIGMA: 3
 38 |   EXTRA:
 39 |     PRETRAINED_LAYERS:
 40 |     - 'conv1'
 41 |     - 'bn1'
 42 |     - 'conv2'
 43 |     - 'bn2'
 44 |     - 'layer1'
 45 |     - 'transition1'
 46 |     - 'stage2'
 47 |     - 'transition2'
 48 |     - 'stage3'
 49 |     - 'transition3'
 50 |     - 'stage4'
 51 |     FINAL_CONV_KERNEL: 1
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 2
 58 |       - 2
 59 |       NUM_CHANNELS:
 60 |       - 16
 61 |       - 32
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 1
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 2
 69 |       - 2
 70 |       - 2
 71 |       NUM_CHANNELS:
 72 |       - 16
 73 |       - 32
 74 |       - 64
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 1
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 2
 82 |       - 2
 83 |       - 2
 84 |       - 2
 85 |       NUM_CHANNELS:
 86 |       - 16
 87 |       - 32
 88 |       - 64
 89 |       - 128
 90 |       FUSE_METHOD: SUM
 91 | LOSS:
 92 |   USE_TARGET_WEIGHT: true
 93 | TRAIN:
 94 |   BATCH_SIZE_PER_GPU: 32
 95 |   SHUFFLE: true
 96 |   BEGIN_EPOCH: 0
 97 |   END_EPOCH: 210
 98 |   OPTIMIZER: adam
 99 |   LR: 0.001
100 |   LR_FACTOR: 0.1
101 |   LR_STEP:
102 |   - 170
103 |   - 200
104 |   WD: 0.0001
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 |   MOMENTUM: 0.9
108 |   NESTEROV: false
109 | TEST:
110 |   BATCH_SIZE_PER_GPU: 32
111 |   COCO_BBOX_FILE: '../../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
112 |   BBOX_THRE: 1.0
113 |   IMAGE_THRE: 0.0
114 |   IN_VIS_THRE: 0.2
115 |   MODEL_FILE: ''
116 |   NMS_THRE: 1.0
117 |   OKS_THRE: 0.9
118 |   USE_GT_BBOX: true
119 |   FLIP_TEST: true
120 |   POST_PROCESS: true
121 |   SHIFT_HEATMAP: true
122 | DEBUG:
123 |   DEBUG: true
124 |   SAVE_BATCH_IMAGES_GT: true
125 |   SAVE_BATCH_IMAGES_PRED: true
126 |   SAVE_HEATMAPS_GT: true
127 |   SAVE_HEATMAPS_PRED: true
128 | 


--------------------------------------------------------------------------------
/demo/inference-config-w18_v2.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: true
  2 | CUDNN:
  3 |   BENCHMARK: true
  4 |   DETERMINISTIC: false
  5 |   ENABLED: true
  6 | DATA_DIR: ''
  7 | GPUS: (0,)
  8 | OUTPUT_DIR: 'output'
  9 | LOG_DIR: 'log'
 10 | WORKERS: 24
 11 | PRINT_FREQ: 100
 12 | 
 13 | DATASET:
 14 |   COLOR_RGB: true
 15 |   DATASET: 'coco'
 16 |   DATA_FORMAT: jpg
 17 |   FLIP: true
 18 |   NUM_JOINTS_HALF_BODY: 8
 19 |   PROB_HALF_BODY: 0.3
 20 |   ROOT: 'data/coco/'
 21 |   ROT_FACTOR: 45
 22 |   SCALE_FACTOR: 0.35
 23 |   TEST_SET: 'val2017'
 24 |   TRAIN_SET: 'train2017'
 25 | MODEL:
 26 |   INIT_WEIGHTS: true
 27 |   NAME: pose_hrnet
 28 |   NUM_JOINTS: 17
 29 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
 30 |   TARGET_TYPE: gaussian
 31 |   IMAGE_SIZE:
 32 |   - 288
 33 |   - 384
 34 |   HEATMAP_SIZE:
 35 |   - 72
 36 |   - 96
 37 |   SIGMA: 3
 38 |   EXTRA:
 39 |     PRETRAINED_LAYERS:
 40 |     - 'conv1'
 41 |     - 'bn1'
 42 |     - 'conv2'
 43 |     - 'bn2'
 44 |     - 'layer1'
 45 |     - 'transition1'
 46 |     - 'stage2'
 47 |     - 'transition2'
 48 |     - 'stage3'
 49 |     - 'transition3'
 50 |     - 'stage4'
 51 |     FINAL_CONV_KERNEL: 1
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 2
 58 |       - 2
 59 |       NUM_CHANNELS:
 60 |       - 18
 61 |       - 36
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 3
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 2
 69 |       - 2
 70 |       - 2
 71 |       NUM_CHANNELS:
 72 |       - 18
 73 |       - 36
 74 |       - 72
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 2
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 2
 82 |       - 2
 83 |       - 2
 84 |       - 2
 85 |       NUM_CHANNELS:
 86 |       - 18
 87 |       - 36
 88 |       - 72
 89 |       - 144
 90 |       FUSE_METHOD: SUM
 91 | LOSS:
 92 |   USE_TARGET_WEIGHT: true
 93 | TRAIN:
 94 |   BATCH_SIZE_PER_GPU: 32
 95 |   SHUFFLE: true
 96 |   BEGIN_EPOCH: 0
 97 |   END_EPOCH: 210
 98 |   OPTIMIZER: adam
 99 |   LR: 0.001
100 |   LR_FACTOR: 0.1
101 |   LR_STEP:
102 |   - 170
103 |   - 200
104 |   WD: 0.0001
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 |   MOMENTUM: 0.9
108 |   NESTEROV: false
109 | TEST:
110 |   BATCH_SIZE_PER_GPU: 32
111 |   COCO_BBOX_FILE: '../../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
112 |   BBOX_THRE: 1.0
113 |   IMAGE_THRE: 0.0
114 |   IN_VIS_THRE: 0.2
115 |   MODEL_FILE: ''
116 |   NMS_THRE: 1.0
117 |   OKS_THRE: 0.9
118 |   USE_GT_BBOX: true
119 |   FLIP_TEST: true
120 |   POST_PROCESS: true
121 |   SHIFT_HEATMAP: true
122 | DEBUG:
123 |   DEBUG: true
124 |   SAVE_BATCH_IMAGES_GT: true
125 |   SAVE_BATCH_IMAGES_PRED: true
126 |   SAVE_HEATMAPS_GT: true
127 |   SAVE_HEATMAPS_PRED: true
128 | 


--------------------------------------------------------------------------------
/demo/inference-config.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: true
  2 | CUDNN:
  3 |   BENCHMARK: true
  4 |   DETERMINISTIC: false
  5 |   ENABLED: true
  6 | DATA_DIR: ''
  7 | GPUS: (0,)
  8 | OUTPUT_DIR: 'output'
  9 | LOG_DIR: 'log'
 10 | WORKERS: 24
 11 | PRINT_FREQ: 100
 12 | 
 13 | DATASET:
 14 |   COLOR_RGB: true
 15 |   DATASET: 'coco'
 16 |   DATA_FORMAT: jpg
 17 |   FLIP: true
 18 |   NUM_JOINTS_HALF_BODY: 8
 19 |   PROB_HALF_BODY: 0.3
 20 |   ROOT: 'data/coco/'
 21 |   ROT_FACTOR: 45
 22 |   SCALE_FACTOR: 0.35
 23 |   TEST_SET: 'val2017'
 24 |   TRAIN_SET: 'train2017'
 25 | MODEL:
 26 |   INIT_WEIGHTS: true
 27 |   NAME: pose_hrnet
 28 |   NUM_JOINTS: 17
 29 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
 30 |   TARGET_TYPE: gaussian
 31 |   IMAGE_SIZE:
 32 |   - 288
 33 |   - 384
 34 |   HEATMAP_SIZE:
 35 |   - 72
 36 |   - 96
 37 |   SIGMA: 3
 38 |   EXTRA:
 39 |     PRETRAINED_LAYERS:
 40 |     - 'conv1'
 41 |     - 'bn1'
 42 |     - 'conv2'
 43 |     - 'bn2'
 44 |     - 'layer1'
 45 |     - 'transition1'
 46 |     - 'stage2'
 47 |     - 'transition2'
 48 |     - 'stage3'
 49 |     - 'transition3'
 50 |     - 'stage4'
 51 |     FINAL_CONV_KERNEL: 1
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 4
 58 |       - 4
 59 |       NUM_CHANNELS:
 60 |       - 32
 61 |       - 64
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 4
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 4
 69 |       - 4
 70 |       - 4
 71 |       NUM_CHANNELS:
 72 |       - 32
 73 |       - 64
 74 |       - 128
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 3
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 4
 82 |       - 4
 83 |       - 4
 84 |       - 4
 85 |       NUM_CHANNELS:
 86 |       - 32
 87 |       - 64
 88 |       - 128
 89 |       - 256
 90 |       FUSE_METHOD: SUM
 91 | LOSS:
 92 |   USE_TARGET_WEIGHT: true
 93 | TRAIN:
 94 |   BATCH_SIZE_PER_GPU: 32
 95 |   SHUFFLE: true
 96 |   BEGIN_EPOCH: 0
 97 |   END_EPOCH: 210
 98 |   OPTIMIZER: adam
 99 |   LR: 0.001
100 |   LR_FACTOR: 0.1
101 |   LR_STEP:
102 |   - 170
103 |   - 200
104 |   WD: 0.0001
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 |   MOMENTUM: 0.9
108 |   NESTEROV: false
109 | TEST:
110 |   BATCH_SIZE_PER_GPU: 32
111 |   COCO_BBOX_FILE: '../../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
112 |   BBOX_THRE: 1.0
113 |   IMAGE_THRE: 0.0
114 |   IN_VIS_THRE: 0.2
115 |   MODEL_FILE: ''
116 |   NMS_THRE: 1.0
117 |   OKS_THRE: 0.9
118 |   USE_GT_BBOX: true
119 |   FLIP_TEST: true
120 |   POST_PROCESS: true
121 |   SHIFT_HEATMAP: true
122 | DEBUG:
123 |   DEBUG: true
124 |   SAVE_BATCH_IMAGES_GT: true
125 |   SAVE_BATCH_IMAGES_PRED: true
126 |   SAVE_HEATMAPS_GT: true
127 |   SAVE_HEATMAPS_PRED: true
128 | 


--------------------------------------------------------------------------------
/experiments/coco/hrnet/w18_small_v1_256x192_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: true
  2 | CUDNN:
  3 |   BENCHMARK: true
  4 |   DETERMINISTIC: false
  5 |   ENABLED: true
  6 | DATA_DIR: ''
  7 | GPUS: (0,1,2,3)
  8 | OUTPUT_DIR: '../output'
  9 | LOG_DIR: '../log'
 10 | WORKERS: 4
 11 | PRINT_FREQ: 100
 12 | 
 13 | DATASET:
 14 |   COLOR_RGB: true
 15 |   DATASET: 'coco'
 16 |   DATA_FORMAT: jpg
 17 |   FLIP: true
 18 |   NUM_JOINTS_HALF_BODY: 8
 19 |   PROB_HALF_BODY: 0.3
 20 |   ROOT: '../data/coco/'
 21 |   ROT_FACTOR: 45
 22 |   SCALE_FACTOR: 0.35
 23 |   TEST_SET: 'val2017'
 24 |   TRAIN_SET: 'train2017'
 25 | MODEL:
 26 |   INIT_WEIGHTS: true
 27 |   NAME: pose_hrnet
 28 |   NUM_JOINTS: 17
 29 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w18_small_model_v1.pth'
 30 |   TARGET_TYPE: gaussian
 31 |   IMAGE_SIZE:
 32 |   - 192
 33 |   - 256
 34 |   HEATMAP_SIZE:
 35 |   - 48
 36 |   - 64
 37 |   SIGMA: 2
 38 |   EXTRA:
 39 |     PRETRAINED_LAYERS:
 40 |     - 'conv1'
 41 |     - 'bn1'
 42 |     - 'conv2'
 43 |     - 'bn2'
 44 |     - 'layer1'
 45 |     - 'transition1'
 46 |     - 'stage2'
 47 |     - 'transition2'
 48 |     - 'stage3'
 49 |     - 'transition3'
 50 |     - 'stage4'
 51 |     FINAL_CONV_KERNEL: 1
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 2
 58 |       - 2
 59 |       NUM_CHANNELS:
 60 |       - 16
 61 |       - 32
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 1
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 2
 69 |       - 2
 70 |       - 2
 71 |       NUM_CHANNELS:
 72 |       - 16
 73 |       - 32
 74 |       - 64
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 1
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 2
 82 |       - 2
 83 |       - 2
 84 |       - 2
 85 |       NUM_CHANNELS:
 86 |       - 16
 87 |       - 32
 88 |       - 64
 89 |       - 128
 90 |       FUSE_METHOD: SUM
 91 | LOSS:
 92 |   USE_TARGET_WEIGHT: true
 93 | TRAIN:
 94 |   BATCH_SIZE_PER_GPU: 32
 95 |   SHUFFLE: true
 96 |   BEGIN_EPOCH: 0
 97 |   END_EPOCH: 210
 98 |   OPTIMIZER: adam
 99 |   LR: 0.001
100 |   LR_FACTOR: 0.1
101 |   LR_STEP:
102 |   - 170
103 |   - 200
104 |   WD: 0.0001
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 |   MOMENTUM: 0.9
108 |   NESTEROV: false
109 | TEST:
110 |   BATCH_SIZE_PER_GPU: 32
111 |   COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
112 |   BBOX_THRE: 1.0
113 |   IMAGE_THRE: 0.0
114 |   IN_VIS_THRE: 0.2
115 |   MODEL_FILE: ''
116 |   NMS_THRE: 1.0
117 |   OKS_THRE: 0.9
118 |   USE_GT_BBOX: true
119 |   FLIP_TEST: true
120 |   POST_PROCESS: true
121 |   SHIFT_HEATMAP: true
122 | DEBUG:
123 |   DEBUG: true
124 |   SAVE_BATCH_IMAGES_GT: true
125 |   SAVE_BATCH_IMAGES_PRED: true
126 |   SAVE_HEATMAPS_GT: true
127 |   SAVE_HEATMAPS_PRED: true
128 | 


--------------------------------------------------------------------------------
/experiments/coco/hrnet/w18_small_v2_256x192_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: true
  2 | CUDNN:
  3 |   BENCHMARK: true
  4 |   DETERMINISTIC: false
  5 |   ENABLED: true
  6 | DATA_DIR: ''
  7 | GPUS: (0,1,2,3)
  8 | OUTPUT_DIR: '../output'
  9 | LOG_DIR: '../log'
 10 | WORKERS: 4
 11 | PRINT_FREQ: 100
 12 | 
 13 | DATASET:
 14 |   COLOR_RGB: true
 15 |   DATASET: 'coco'
 16 |   DATA_FORMAT: jpg
 17 |   FLIP: true
 18 |   NUM_JOINTS_HALF_BODY: 8
 19 |   PROB_HALF_BODY: 0.3
 20 |   ROOT: '../data/coco/'
 21 |   ROT_FACTOR: 45
 22 |   SCALE_FACTOR: 0.35
 23 |   TEST_SET: 'val2017'
 24 |   TRAIN_SET: 'train2017'
 25 | MODEL:
 26 |   INIT_WEIGHTS: true
 27 |   NAME: pose_hrnet
 28 |   NUM_JOINTS: 17
 29 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w18_small_model_v2.pth'
 30 |   TARGET_TYPE: gaussian
 31 |   IMAGE_SIZE:
 32 |   - 192
 33 |   - 256
 34 |   HEATMAP_SIZE:
 35 |   - 48
 36 |   - 64
 37 |   SIGMA: 2
 38 |   EXTRA:
 39 |     PRETRAINED_LAYERS:
 40 |     - 'conv1'
 41 |     - 'bn1'
 42 |     - 'conv2'
 43 |     - 'bn2'
 44 |     - 'layer1'
 45 |     - 'transition1'
 46 |     - 'stage2'
 47 |     - 'transition2'
 48 |     - 'stage3'
 49 |     - 'transition3'
 50 |     - 'stage4'
 51 |     FINAL_CONV_KERNEL: 1
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 2
 58 |       - 2
 59 |       NUM_CHANNELS:
 60 |       - 18
 61 |       - 36
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 3
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 2
 69 |       - 2
 70 |       - 2
 71 |       NUM_CHANNELS:
 72 |       - 18
 73 |       - 36
 74 |       - 72
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 2
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 2
 82 |       - 2
 83 |       - 2
 84 |       - 2
 85 |       NUM_CHANNELS:
 86 |       - 18
 87 |       - 36
 88 |       - 72
 89 |       - 144
 90 |       FUSE_METHOD: SUM
 91 | LOSS:
 92 |   USE_TARGET_WEIGHT: true
 93 | TRAIN:
 94 |   BATCH_SIZE_PER_GPU: 32
 95 |   SHUFFLE: true
 96 |   BEGIN_EPOCH: 0
 97 |   END_EPOCH: 210
 98 |   OPTIMIZER: adam
 99 |   LR: 0.001
100 |   LR_FACTOR: 0.1
101 |   LR_STEP:
102 |   - 170
103 |   - 200
104 |   WD: 0.0001
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 |   MOMENTUM: 0.9
108 |   NESTEROV: false
109 | TEST:
110 |   BATCH_SIZE_PER_GPU: 32
111 |   COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
112 |   BBOX_THRE: 1.0
113 |   IMAGE_THRE: 0.0
114 |   IN_VIS_THRE: 0.2
115 |   MODEL_FILE: ''
116 |   NMS_THRE: 1.0
117 |   OKS_THRE: 0.9
118 |   USE_GT_BBOX: true
119 |   FLIP_TEST: true
120 |   POST_PROCESS: true
121 |   SHIFT_HEATMAP: true
122 | DEBUG:
123 |   DEBUG: true
124 |   SAVE_BATCH_IMAGES_GT: true
125 |   SAVE_BATCH_IMAGES_PRED: true
126 |   SAVE_HEATMAPS_GT: true
127 |   SAVE_HEATMAPS_PRED: true
128 | 


--------------------------------------------------------------------------------
/experiments/coco/hrnet/w18_small_v2_256x192_adam_lr1e-3_softargmax.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: true
  2 | CUDNN:
  3 |   BENCHMARK: true
  4 |   DETERMINISTIC: false
  5 |   ENABLED: true
  6 | DATA_DIR: ''
  7 | GPUS: (0,1,2,3)
  8 | OUTPUT_DIR: '../output'
  9 | LOG_DIR: '../log'
 10 | WORKERS: 4
 11 | PRINT_FREQ: 100
 12 | 
 13 | DATASET:
 14 |   COLOR_RGB: true
 15 |   DATASET: 'coco'
 16 |   DATA_FORMAT: jpg
 17 |   FLIP: true
 18 |   NUM_JOINTS_HALF_BODY: 8
 19 |   PROB_HALF_BODY: 0.3
 20 |   ROOT: '../data/coco/'
 21 |   ROT_FACTOR: 45
 22 |   SCALE_FACTOR: 0.35
 23 |   TEST_SET: 'val2017'
 24 |   TRAIN_SET: 'train2017'
 25 | MODEL:
 26 |   INIT_WEIGHTS: true
 27 |   NAME: pose_hrnet
 28 |   NUM_JOINTS: 17
 29 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w18_small_model_v2.pth'
 30 |   TARGET_TYPE: gaussian
 31 |   IMAGE_SIZE:
 32 |   - 192
 33 |   - 256
 34 |   HEATMAP_SIZE:
 35 |   - 48
 36 |   - 64
 37 |   SIGMA: 2
 38 |   EXTRA:
 39 |     PRETRAINED_LAYERS:
 40 |     - 'conv1'
 41 |     - 'bn1'
 42 |     - 'conv2'
 43 |     - 'bn2'
 44 |     - 'layer1'
 45 |     - 'transition1'
 46 |     - 'stage2'
 47 |     - 'transition2'
 48 |     - 'stage3'
 49 |     - 'transition3'
 50 |     - 'stage4'
 51 |     FINAL_CONV_KERNEL: 1
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 2
 58 |       - 2
 59 |       NUM_CHANNELS:
 60 |       - 18
 61 |       - 36
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 3
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 2
 69 |       - 2
 70 |       - 2
 71 |       NUM_CHANNELS:
 72 |       - 18
 73 |       - 36
 74 |       - 72
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 2
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 2
 82 |       - 2
 83 |       - 2
 84 |       - 2
 85 |       NUM_CHANNELS:
 86 |       - 18
 87 |       - 36
 88 |       - 72
 89 |       - 144
 90 |       FUSE_METHOD: SUM
 91 | LOSS:
 92 |   USE_TARGET_WEIGHT: true
 93 | TRAIN:
 94 |   BATCH_SIZE_PER_GPU: 32
 95 |   SHUFFLE: true
 96 |   BEGIN_EPOCH: 0
 97 |   END_EPOCH: 210
 98 |   OPTIMIZER: adam
 99 |   LR: 0.001
100 |   LR_FACTOR: 0.1
101 |   LR_STEP:
102 |   - 170
103 |   - 200
104 |   WD: 0.0001
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 |   MOMENTUM: 0.9
108 |   NESTEROV: false
109 | TEST:
110 |   BATCH_SIZE_PER_GPU: 32
111 |   COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
112 |   BBOX_THRE: 1.0
113 |   IMAGE_THRE: 0.0
114 |   IN_VIS_THRE: 0.2
115 |   MODEL_FILE: ''
116 |   NMS_THRE: 1.0
117 |   OKS_THRE: 0.9
118 |   USE_GT_BBOX: true
119 |   FLIP_TEST: true
120 |   POST_PROCESS: true
121 |   SHIFT_HEATMAP: true
122 |   SOFT_ARGMAX: true
123 | DEBUG:
124 |   DEBUG: true
125 |   SAVE_BATCH_IMAGES_GT: true
126 |   SAVE_BATCH_IMAGES_PRED: true
127 |   SAVE_HEATMAPS_GT: true
128 |   SAVE_HEATMAPS_PRED: true
129 | 


--------------------------------------------------------------------------------
/experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: true
  2 | CUDNN:
  3 |   BENCHMARK: true
  4 |   DETERMINISTIC: false
  5 |   ENABLED: true
  6 | DATA_DIR: ''
  7 | GPUS: (0,1,2,3)
  8 | OUTPUT_DIR: '../output'
  9 | LOG_DIR: '../log'
 10 | WORKERS: 4
 11 | PRINT_FREQ: 100
 12 | 
 13 | DATASET:
 14 |   COLOR_RGB: true
 15 |   DATASET: 'coco'
 16 |   DATA_FORMAT: jpg
 17 |   FLIP: true
 18 |   NUM_JOINTS_HALF_BODY: 8
 19 |   PROB_HALF_BODY: 0.3
 20 |   ROOT: '../data/coco/'
 21 |   ROT_FACTOR: 45
 22 |   SCALE_FACTOR: 0.35
 23 |   TEST_SET: 'val2017'
 24 |   TRAIN_SET: 'train2017'
 25 | MODEL:
 26 |   INIT_WEIGHTS: true
 27 |   NAME: pose_hrnet
 28 |   NUM_JOINTS: 17
 29 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
 30 |   TARGET_TYPE: gaussian
 31 |   IMAGE_SIZE:
 32 |   - 192
 33 |   - 256
 34 |   HEATMAP_SIZE:
 35 |   - 48
 36 |   - 64
 37 |   SIGMA: 2
 38 |   EXTRA:
 39 |     PRETRAINED_LAYERS:
 40 |     - 'conv1'
 41 |     - 'bn1'
 42 |     - 'conv2'
 43 |     - 'bn2'
 44 |     - 'layer1'
 45 |     - 'transition1'
 46 |     - 'stage2'
 47 |     - 'transition2'
 48 |     - 'stage3'
 49 |     - 'transition3'
 50 |     - 'stage4'
 51 |     FINAL_CONV_KERNEL: 1
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 4
 58 |       - 4
 59 |       NUM_CHANNELS:
 60 |       - 32
 61 |       - 64
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 4
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 4
 69 |       - 4
 70 |       - 4
 71 |       NUM_CHANNELS:
 72 |       - 32
 73 |       - 64
 74 |       - 128
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 3
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 4
 82 |       - 4
 83 |       - 4
 84 |       - 4
 85 |       NUM_CHANNELS:
 86 |       - 32
 87 |       - 64
 88 |       - 128
 89 |       - 256
 90 |       FUSE_METHOD: SUM
 91 | LOSS:
 92 |   USE_TARGET_WEIGHT: true
 93 | TRAIN:
 94 |   BATCH_SIZE_PER_GPU: 32
 95 |   SHUFFLE: true
 96 |   BEGIN_EPOCH: 0
 97 |   END_EPOCH: 210
 98 |   OPTIMIZER: adam
 99 |   LR: 0.001
100 |   LR_FACTOR: 0.1
101 |   LR_STEP:
102 |   - 170
103 |   - 200
104 |   WD: 0.0001
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 |   MOMENTUM: 0.9
108 |   NESTEROV: false
109 | TEST:
110 |   BATCH_SIZE_PER_GPU: 32
111 |   COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
112 |   BBOX_THRE: 1.0
113 |   IMAGE_THRE: 0.0
114 |   IN_VIS_THRE: 0.2
115 |   MODEL_FILE: ''
116 |   NMS_THRE: 1.0
117 |   OKS_THRE: 0.9
118 |   USE_GT_BBOX: true
119 |   FLIP_TEST: true
120 |   POST_PROCESS: true
121 |   SHIFT_HEATMAP: true
122 | DEBUG:
123 |   DEBUG: true
124 |   SAVE_BATCH_IMAGES_GT: true
125 |   SAVE_BATCH_IMAGES_PRED: true
126 |   SAVE_HEATMAPS_GT: true
127 |   SAVE_HEATMAPS_PRED: true
128 | 


--------------------------------------------------------------------------------
/experiments/coco/hrnet/w32_384x288_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: true
  2 | CUDNN:
  3 |   BENCHMARK: true
  4 |   DETERMINISTIC: false
  5 |   ENABLED: true
  6 | DATA_DIR: ''
  7 | GPUS: (0,1,2,3)
  8 | OUTPUT_DIR: 'output'
  9 | LOG_DIR: 'log'
 10 | WORKERS: 24
 11 | PRINT_FREQ: 100
 12 | 
 13 | DATASET:
 14 |   COLOR_RGB: true
 15 |   DATASET: 'coco'
 16 |   DATA_FORMAT: jpg
 17 |   FLIP: true
 18 |   NUM_JOINTS_HALF_BODY: 8
 19 |   PROB_HALF_BODY: 0.3
 20 |   ROOT: 'data/coco/'
 21 |   ROT_FACTOR: 45
 22 |   SCALE_FACTOR: 0.35
 23 |   TEST_SET: 'val2017'
 24 |   TRAIN_SET: 'train2017'
 25 | MODEL:
 26 |   INIT_WEIGHTS: true
 27 |   NAME: pose_hrnet
 28 |   NUM_JOINTS: 17
 29 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
 30 |   TARGET_TYPE: gaussian
 31 |   IMAGE_SIZE:
 32 |   - 288
 33 |   - 384
 34 |   HEATMAP_SIZE:
 35 |   - 72
 36 |   - 96
 37 |   SIGMA: 3
 38 |   EXTRA:
 39 |     PRETRAINED_LAYERS:
 40 |     - 'conv1'
 41 |     - 'bn1'
 42 |     - 'conv2'
 43 |     - 'bn2'
 44 |     - 'layer1'
 45 |     - 'transition1'
 46 |     - 'stage2'
 47 |     - 'transition2'
 48 |     - 'stage3'
 49 |     - 'transition3'
 50 |     - 'stage4'
 51 |     FINAL_CONV_KERNEL: 1
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 4
 58 |       - 4
 59 |       NUM_CHANNELS:
 60 |       - 32
 61 |       - 64
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 4
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 4
 69 |       - 4
 70 |       - 4
 71 |       NUM_CHANNELS:
 72 |       - 32
 73 |       - 64
 74 |       - 128
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 3
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 4
 82 |       - 4
 83 |       - 4
 84 |       - 4
 85 |       NUM_CHANNELS:
 86 |       - 32
 87 |       - 64
 88 |       - 128
 89 |       - 256
 90 |       FUSE_METHOD: SUM
 91 | LOSS:
 92 |   USE_TARGET_WEIGHT: true
 93 | TRAIN:
 94 |   BATCH_SIZE_PER_GPU: 32
 95 |   SHUFFLE: true
 96 |   BEGIN_EPOCH: 0
 97 |   END_EPOCH: 210
 98 |   OPTIMIZER: adam
 99 |   LR: 0.001
100 |   LR_FACTOR: 0.1
101 |   LR_STEP:
102 |   - 170
103 |   - 200
104 |   WD: 0.0001
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 |   MOMENTUM: 0.9
108 |   NESTEROV: false
109 | TEST:
110 |   BATCH_SIZE_PER_GPU: 32
111 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
112 |   BBOX_THRE: 1.0
113 |   IMAGE_THRE: 0.0
114 |   IN_VIS_THRE: 0.2
115 |   MODEL_FILE: ''
116 |   NMS_THRE: 1.0
117 |   OKS_THRE: 0.9
118 |   USE_GT_BBOX: true
119 |   FLIP_TEST: true
120 |   POST_PROCESS: true
121 |   SHIFT_HEATMAP: true
122 | DEBUG:
123 |   DEBUG: true
124 |   SAVE_BATCH_IMAGES_GT: true
125 |   SAVE_BATCH_IMAGES_PRED: true
126 |   SAVE_HEATMAPS_GT: true
127 |   SAVE_HEATMAPS_PRED: true
128 | 


--------------------------------------------------------------------------------
/experiments/coco/hrnet/w48_256x192_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: true
  2 | CUDNN:
  3 |   BENCHMARK: true
  4 |   DETERMINISTIC: false
  5 |   ENABLED: true
  6 | DATA_DIR: ''
  7 | GPUS: (0,1,2,3)
  8 | OUTPUT_DIR: 'output'
  9 | LOG_DIR: 'log'
 10 | WORKERS: 24
 11 | PRINT_FREQ: 100
 12 | 
 13 | DATASET:
 14 |   COLOR_RGB: true
 15 |   DATASET: 'coco'
 16 |   DATA_FORMAT: jpg
 17 |   FLIP: true
 18 |   NUM_JOINTS_HALF_BODY: 8
 19 |   PROB_HALF_BODY: 0.3
 20 |   ROOT: 'data/coco/'
 21 |   ROT_FACTOR: 45
 22 |   SCALE_FACTOR: 0.35
 23 |   TEST_SET: 'val2017'
 24 |   TRAIN_SET: 'train2017'
 25 | MODEL:
 26 |   INIT_WEIGHTS: true
 27 |   NAME: pose_hrnet
 28 |   NUM_JOINTS: 17
 29 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth'
 30 |   TARGET_TYPE: gaussian
 31 |   IMAGE_SIZE:
 32 |   - 192
 33 |   - 256
 34 |   HEATMAP_SIZE:
 35 |   - 48
 36 |   - 64
 37 |   SIGMA: 2
 38 |   EXTRA:
 39 |     PRETRAINED_LAYERS:
 40 |     - 'conv1'
 41 |     - 'bn1'
 42 |     - 'conv2'
 43 |     - 'bn2'
 44 |     - 'layer1'
 45 |     - 'transition1'
 46 |     - 'stage2'
 47 |     - 'transition2'
 48 |     - 'stage3'
 49 |     - 'transition3'
 50 |     - 'stage4'
 51 |     FINAL_CONV_KERNEL: 1
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 4
 58 |       - 4
 59 |       NUM_CHANNELS:
 60 |       - 48
 61 |       - 96
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 4
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 4
 69 |       - 4
 70 |       - 4
 71 |       NUM_CHANNELS:
 72 |       - 48
 73 |       - 96
 74 |       - 192
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 3
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 4
 82 |       - 4
 83 |       - 4
 84 |       - 4
 85 |       NUM_CHANNELS:
 86 |       - 48
 87 |       - 96
 88 |       - 192
 89 |       - 384
 90 |       FUSE_METHOD: SUM
 91 | LOSS:
 92 |   USE_TARGET_WEIGHT: true
 93 | TRAIN:
 94 |   BATCH_SIZE_PER_GPU: 32
 95 |   SHUFFLE: true
 96 |   BEGIN_EPOCH: 0
 97 |   END_EPOCH: 210
 98 |   OPTIMIZER: adam
 99 |   LR: 0.001
100 |   LR_FACTOR: 0.1
101 |   LR_STEP:
102 |   - 170
103 |   - 200
104 |   WD: 0.0001
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 |   MOMENTUM: 0.9
108 |   NESTEROV: false
109 | TEST:
110 |   BATCH_SIZE_PER_GPU: 32
111 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
112 |   BBOX_THRE: 1.0
113 |   IMAGE_THRE: 0.0
114 |   IN_VIS_THRE: 0.2
115 |   MODEL_FILE: ''
116 |   NMS_THRE: 1.0
117 |   OKS_THRE: 0.9
118 |   USE_GT_BBOX: true
119 |   FLIP_TEST: true
120 |   POST_PROCESS: true
121 |   SHIFT_HEATMAP: true
122 | DEBUG:
123 |   DEBUG: true
124 |   SAVE_BATCH_IMAGES_GT: true
125 |   SAVE_BATCH_IMAGES_PRED: true
126 |   SAVE_HEATMAPS_GT: true
127 |   SAVE_HEATMAPS_PRED: true
128 | 


--------------------------------------------------------------------------------
/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: true
  2 | CUDNN:
  3 |   BENCHMARK: true
  4 |   DETERMINISTIC: false
  5 |   ENABLED: true
  6 | DATA_DIR: ''
  7 | GPUS: (0,1,2,3)
  8 | OUTPUT_DIR: 'output'
  9 | LOG_DIR: 'log'
 10 | WORKERS: 24
 11 | PRINT_FREQ: 100
 12 | 
 13 | DATASET:
 14 |   COLOR_RGB: true
 15 |   DATASET: 'coco'
 16 |   DATA_FORMAT: jpg
 17 |   FLIP: true
 18 |   NUM_JOINTS_HALF_BODY: 8
 19 |   PROB_HALF_BODY: 0.3
 20 |   ROOT: 'data/coco/'
 21 |   ROT_FACTOR: 45
 22 |   SCALE_FACTOR: 0.35
 23 |   TEST_SET: 'val2017'
 24 |   TRAIN_SET: 'train2017'
 25 | MODEL:
 26 |   INIT_WEIGHTS: true
 27 |   NAME: pose_hrnet
 28 |   NUM_JOINTS: 17
 29 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth'
 30 |   TARGET_TYPE: gaussian
 31 |   IMAGE_SIZE:
 32 |   - 288
 33 |   - 384
 34 |   HEATMAP_SIZE:
 35 |   - 72
 36 |   - 96
 37 |   SIGMA: 3
 38 |   EXTRA:
 39 |     PRETRAINED_LAYERS:
 40 |     - 'conv1'
 41 |     - 'bn1'
 42 |     - 'conv2'
 43 |     - 'bn2'
 44 |     - 'layer1'
 45 |     - 'transition1'
 46 |     - 'stage2'
 47 |     - 'transition2'
 48 |     - 'stage3'
 49 |     - 'transition3'
 50 |     - 'stage4'
 51 |     FINAL_CONV_KERNEL: 1
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 4
 58 |       - 4
 59 |       NUM_CHANNELS:
 60 |       - 48
 61 |       - 96
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 4
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 4
 69 |       - 4
 70 |       - 4
 71 |       NUM_CHANNELS:
 72 |       - 48
 73 |       - 96
 74 |       - 192
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 3
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 4
 82 |       - 4
 83 |       - 4
 84 |       - 4
 85 |       NUM_CHANNELS:
 86 |       - 48
 87 |       - 96
 88 |       - 192
 89 |       - 384
 90 |       FUSE_METHOD: SUM
 91 | LOSS:
 92 |   USE_TARGET_WEIGHT: true
 93 | TRAIN:
 94 |   BATCH_SIZE_PER_GPU: 24
 95 |   SHUFFLE: true
 96 |   BEGIN_EPOCH: 0
 97 |   END_EPOCH: 210
 98 |   OPTIMIZER: adam
 99 |   LR: 0.001
100 |   LR_FACTOR: 0.1
101 |   LR_STEP:
102 |   - 170
103 |   - 200
104 |   WD: 0.0001
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 |   MOMENTUM: 0.9
108 |   NESTEROV: false
109 | TEST:
110 |   BATCH_SIZE_PER_GPU: 24
111 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
112 |   BBOX_THRE: 1.0
113 |   IMAGE_THRE: 0.0
114 |   IN_VIS_THRE: 0.2
115 |   MODEL_FILE: ''
116 |   NMS_THRE: 1.0
117 |   OKS_THRE: 0.9
118 |   USE_GT_BBOX: true
119 |   FLIP_TEST: true
120 |   POST_PROCESS: true
121 |   SHIFT_HEATMAP: true
122 | DEBUG:
123 |   DEBUG: true
124 |   SAVE_BATCH_IMAGES_GT: true
125 |   SAVE_BATCH_IMAGES_PRED: true
126 |   SAVE_HEATMAPS_GT: true
127 |   SAVE_HEATMAPS_PRED: true
128 | 


--------------------------------------------------------------------------------
/experiments/coco/lpn/lpn100_256x192_gd256x2_gc.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: false
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: '../output'
 9 | LOG_DIR: '../log'
10 | WORKERS: 4
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: true
15 |   DATASET: 'coco'
16 |   DATA_FORMAT: jpg
17 |   FLIP: true
18 |   NUM_JOINTS_HALF_BODY: 8
19 |   PROB_HALF_BODY: 0.3
20 |   ROOT: '../data/coco/'
21 |   ROT_FACTOR: 45
22 |   SCALE_FACTOR: 0.35
23 |   TEST_SET: 'val2017'
24 |   TRAIN_SET: 'train2017'
25 | MODEL:
26 |   NAME: 'lpn'
27 |   PRETRAINED: ''
28 |   IMAGE_SIZE:
29 |   - 192
30 |   - 256
31 |   HEATMAP_SIZE:
32 |   - 48
33 |   - 64
34 |   SIGMA: 2
35 |   NUM_JOINTS: 17
36 |   TARGET_TYPE: 'gaussian'
37 |   EXTRA:
38 |     ATTENTION: 'GC'
39 |     FINAL_CONV_KERNEL: 1
40 |     DECONV_WITH_BIAS: false
41 |     NUM_DECONV_LAYERS: 2
42 |     NUM_DECONV_FILTERS:
43 |     - 256
44 |     - 256
45 |     NUM_DECONV_KERNELS:
46 |     - 4
47 |     - 4
48 |     NUM_LAYERS: 100
49 | LOSS:
50 |   USE_TARGET_WEIGHT: true
51 | TRAIN:
52 |   BATCH_SIZE_PER_GPU: 32
53 |   SHUFFLE: true
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 150
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 90
61 |   - 120
62 |   WD: 0.0001
63 |   GAMMA1: 0.99
64 |   GAMMA2: 0.0
65 |   MOMENTUM: 0.9
66 |   NESTEROV: false
67 | TEST:
68 |   BATCH_SIZE_PER_GPU: 32
69 |   COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
70 |   BBOX_THRE: 1.0
71 |   IMAGE_THRE: 0.0
72 |   IN_VIS_THRE: 0.2
73 |   MODEL_FILE: ''
74 |   NMS_THRE: 1.0
75 |   OKS_THRE: 0.9
76 |   USE_GT_BBOX: true
77 |   FLIP_TEST: true
78 |   POST_PROCESS: true
79 |   SHIFT_HEATMAP: true
80 |   SOFT_ARGMAX: true
81 | DEBUG:
82 |   DEBUG: true
83 |   SAVE_BATCH_IMAGES_GT: true
84 |   SAVE_BATCH_IMAGES_PRED: true
85 |   SAVE_HEATMAPS_GT: true
86 |   SAVE_HEATMAPS_PRED: true
87 | 


--------------------------------------------------------------------------------
/experiments/coco/lpn/lpn101_256x192_gd256x2_gc.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: false
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: '../output'
 9 | LOG_DIR: '../log'
10 | WORKERS: 10
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: true
15 |   DATASET: 'coco'
16 |   ROOT: '../data/coco/'
17 |   TEST_SET: 'val2017'
18 |   TRAIN_SET: 'train2017'
19 |   FLIP: true
20 |   ROT_FACTOR: 40
21 |   SCALE_FACTOR: 0.3
22 | MODEL:
23 |   NAME: 'lpn'
24 |   PRETRAINED: ''
25 |   IMAGE_SIZE:
26 |   - 192
27 |   - 256
28 |   HEATMAP_SIZE:
29 |   - 48
30 |   - 64
31 |   SIGMA: 2
32 |   NUM_JOINTS: 17
33 |   TARGET_TYPE: 'gaussian'
34 |   EXTRA:
35 |     ATTENTION: 'GC'
36 |     FINAL_CONV_KERNEL: 1
37 |     DECONV_WITH_BIAS: false
38 |     NUM_DECONV_LAYERS: 2
39 |     NUM_DECONV_FILTERS:
40 |     - 256
41 |     - 256
42 |     NUM_DECONV_KERNELS:
43 |     - 4
44 |     - 4
45 |     NUM_LAYERS: 101
46 | LOSS:
47 |   USE_TARGET_WEIGHT: true
48 | TRAIN:
49 |   BATCH_SIZE_PER_GPU: 32
50 |   SHUFFLE: true
51 |   BEGIN_EPOCH: 0
52 |   END_EPOCH: 150
53 |   OPTIMIZER: 'adam'
54 |   LR: 0.001
55 |   LR_FACTOR: 0.1
56 |   LR_STEP:
57 |   - 90
58 |   - 120
59 |   WD: 0.0001
60 |   GAMMA1: 0.99
61 |   GAMMA2: 0.0
62 |   MOMENTUM: 0.9
63 |   NESTEROV: false
64 | TEST:
65 |   BATCH_SIZE_PER_GPU: 32
66 |   COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
67 |   BBOX_THRE: 1.0
68 |   IMAGE_THRE: 0.0
69 |   IN_VIS_THRE: 0.2
70 |   MODEL_FILE: ''
71 |   NMS_THRE: 1.0
72 |   OKS_THRE: 0.9
73 |   FLIP_TEST: true
74 |   POST_PROCESS: true
75 |   SHIFT_HEATMAP: true
76 |   SOFT_ARGMAX: true
77 | DEBUG:
78 |   DEBUG: true
79 |   SAVE_BATCH_IMAGES_GT: true
80 |   SAVE_BATCH_IMAGES_PRED: true
81 |   SAVE_HEATMAPS_GT: true
82 |   SAVE_HEATMAPS_PRED: true
83 | 


--------------------------------------------------------------------------------
/experiments/coco/lpn/lpn152_256x192_gd256x2_gc.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: false
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: '../output'
 9 | LOG_DIR: '../log'
10 | WORKERS: 10
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: true
15 |   DATASET: 'coco'
16 |   ROOT: '../data/coco/'
17 |   TEST_SET: 'val2017'
18 |   TRAIN_SET: 'train2017'
19 |   FLIP: true
20 |   ROT_FACTOR: 40
21 |   SCALE_FACTOR: 0.3
22 | MODEL:
23 |   NAME: 'lpn'
24 |   PRETRAINED: ''
25 |   IMAGE_SIZE:
26 |   - 192
27 |   - 256
28 |   HEATMAP_SIZE:
29 |   - 48
30 |   - 64
31 |   SIGMA: 2
32 |   NUM_JOINTS: 17
33 |   TARGET_TYPE: 'gaussian'
34 |   EXTRA:
35 |     ATTENTION: 'GC'
36 |     FINAL_CONV_KERNEL: 1
37 |     DECONV_WITH_BIAS: false
38 |     NUM_DECONV_LAYERS: 2
39 |     NUM_DECONV_FILTERS:
40 |     - 256
41 |     - 256
42 |     NUM_DECONV_KERNELS:
43 |     - 4
44 |     - 4
45 |     NUM_LAYERS: 152
46 | LOSS:
47 |   USE_TARGET_WEIGHT: true
48 | TRAIN:
49 |   BATCH_SIZE_PER_GPU: 32
50 |   SHUFFLE: true
51 |   BEGIN_EPOCH: 0
52 |   END_EPOCH: 150
53 |   OPTIMIZER: 'adam'
54 |   LR: 0.001
55 |   LR_FACTOR: 0.1
56 |   LR_STEP:
57 |   - 90
58 |   - 120
59 |   WD: 0.0001
60 |   GAMMA1: 0.99
61 |   GAMMA2: 0.0
62 |   MOMENTUM: 0.9
63 |   NESTEROV: false
64 | TEST:
65 |   BATCH_SIZE_PER_GPU: 32
66 |   COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
67 |   BBOX_THRE: 1.0
68 |   IMAGE_THRE: 0.0
69 |   IN_VIS_THRE: 0.2
70 |   MODEL_FILE: ''
71 |   NMS_THRE: 1.0
72 |   OKS_THRE: 0.9
73 |   FLIP_TEST: true
74 |   POST_PROCESS: true
75 |   SHIFT_HEATMAP: true
76 |   SOFT_ARGMAX: true
77 | DEBUG:
78 |   DEBUG: false
79 |   SAVE_BATCH_IMAGES_GT: true
80 |   SAVE_BATCH_IMAGES_PRED: true
81 |   SAVE_HEATMAPS_GT: true
82 |   SAVE_HEATMAPS_PRED: true
83 | 


--------------------------------------------------------------------------------
/experiments/coco/lpn/lpn18_256x192_gd256x2_gc.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: false
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: '../output'
 9 | LOG_DIR: '../log'
10 | WORKERS: 4
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: true
15 |   DATASET: 'coco'
16 |   DATA_FORMAT: jpg
17 |   FLIP: true
18 |   NUM_JOINTS_HALF_BODY: 8
19 |   PROB_HALF_BODY: 0.3
20 |   ROOT: '../data/coco/'
21 |   ROT_FACTOR: 45
22 |   SCALE_FACTOR: 0.35
23 |   TEST_SET: 'val2017'
24 |   TRAIN_SET: 'train2017'
25 | MODEL:
26 |   NAME: 'lpn'
27 |   PRETRAINED: ''
28 |   IMAGE_SIZE:
29 |   - 192
30 |   - 256
31 |   HEATMAP_SIZE:
32 |   - 48
33 |   - 64
34 |   SIGMA: 2
35 |   NUM_JOINTS: 17
36 |   TARGET_TYPE: 'gaussian'
37 |   EXTRA:
38 |     ATTENTION: 'GC'
39 |     FINAL_CONV_KERNEL: 1
40 |     DECONV_WITH_BIAS: false
41 |     NUM_DECONV_LAYERS: 2
42 |     NUM_DECONV_FILTERS:
43 |     - 256
44 |     - 256
45 |     NUM_DECONV_KERNELS:
46 |     - 4
47 |     - 4
48 |     NUM_LAYERS: 18
49 | LOSS:
50 |   USE_TARGET_WEIGHT: true
51 | TRAIN:
52 |   BATCH_SIZE_PER_GPU: 32
53 |   SHUFFLE: true
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 150
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 90
61 |   - 120
62 |   WD: 0.0001
63 |   GAMMA1: 0.99
64 |   GAMMA2: 0.0
65 |   MOMENTUM: 0.9
66 |   NESTEROV: false
67 | TEST:
68 |   BATCH_SIZE_PER_GPU: 32
69 |   COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
70 |   BBOX_THRE: 1.0
71 |   IMAGE_THRE: 0.0
72 |   IN_VIS_THRE: 0.2
73 |   MODEL_FILE: ''
74 |   NMS_THRE: 1.0
75 |   OKS_THRE: 0.9
76 |   USE_GT_BBOX: true
77 |   FLIP_TEST: true
78 |   POST_PROCESS: true
79 |   SHIFT_HEATMAP: true
80 |   SOFT_ARGMAX: true
81 | DEBUG:
82 |   DEBUG: true
83 |   SAVE_BATCH_IMAGES_GT: true
84 |   SAVE_BATCH_IMAGES_PRED: true
85 |   SAVE_HEATMAPS_GT: true
86 |   SAVE_HEATMAPS_PRED: true
87 | 


--------------------------------------------------------------------------------
/experiments/coco/lpn/lpn18h_256x192_gd256x2_gc.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: false
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: '../output'
 9 | LOG_DIR: '../log'
10 | WORKERS: 4
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: true
15 |   DATASET: 'coco'
16 |   DATA_FORMAT: jpg
17 |   FLIP: true
18 |   NUM_JOINTS_HALF_BODY: 8
19 |   PROB_HALF_BODY: 0.3
20 |   ROOT: '../data/coco/'
21 |   ROT_FACTOR: 45
22 |   SCALE_FACTOR: 0.35
23 |   TEST_SET: 'val2017'
24 |   TRAIN_SET: 'train2017'
25 | MODEL:
26 |   NAME: 'lpn'
27 |   PRETRAINED: ''
28 |   IMAGE_SIZE:
29 |   - 192
30 |   - 256
31 |   HEATMAP_SIZE:
32 |   - 48
33 |   - 64
34 |   SIGMA: 2
35 |   NUM_JOINTS: 17
36 |   TARGET_TYPE: 'gaussian'
37 |   EXTRA:
38 |     ATTENTION: 'GC'
39 |     FINAL_CONV_KERNEL: 1
40 |     DECONV_WITH_BIAS: false
41 |     NUM_DECONV_LAYERS: 2
42 |     NUM_DECONV_FILTERS:
43 |     - 256
44 |     - 256
45 |     NUM_DECONV_KERNELS:
46 |     - 4
47 |     - 4
48 |     NUM_LAYERS: 18
49 | LOSS:
50 |   USE_TARGET_WEIGHT: true
51 | TRAIN:
52 |   BATCH_SIZE_PER_GPU: 32
53 |   SHUFFLE: true
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 150
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 90
61 |   - 120
62 |   WD: 0.0001
63 |   GAMMA1: 0.99
64 |   GAMMA2: 0.0
65 |   MOMENTUM: 0.9
66 |   NESTEROV: false
67 | TEST:
68 |   BATCH_SIZE_PER_GPU: 32
69 |   COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
70 |   BBOX_THRE: 1.0
71 |   IMAGE_THRE: 0.0
72 |   IN_VIS_THRE: 0.2
73 |   MODEL_FILE: ''
74 |   NMS_THRE: 1.0
75 |   OKS_THRE: 0.9
76 |   USE_GT_BBOX: true
77 |   FLIP_TEST: true
78 |   POST_PROCESS: true
79 |   SHIFT_HEATMAP: true
80 |   SOFT_ARGMAX: true
81 | DEBUG:
82 |   DEBUG: true
83 |   SAVE_BATCH_IMAGES_GT: true
84 |   SAVE_BATCH_IMAGES_PRED: true
85 |   SAVE_HEATMAPS_GT: true
86 |   SAVE_HEATMAPS_PRED: true
87 | 


--------------------------------------------------------------------------------
/experiments/coco/lpn/lpn34_256x192_gd256x2_gc.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: false
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: '../output'
 9 | LOG_DIR: '../log'
10 | WORKERS: 4
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: true
15 |   DATASET: 'coco'
16 |   DATA_FORMAT: jpg
17 |   FLIP: true
18 |   NUM_JOINTS_HALF_BODY: 8
19 |   PROB_HALF_BODY: 0.3
20 |   ROOT: '../data/coco/'
21 |   ROT_FACTOR: 45
22 |   SCALE_FACTOR: 0.35
23 |   TEST_SET: 'val2017'
24 |   TRAIN_SET: 'train2017'
25 | MODEL:
26 |   NAME: 'lpn'
27 |   PRETRAINED: ''
28 |   IMAGE_SIZE:
29 |   - 192
30 |   - 256
31 |   HEATMAP_SIZE:
32 |   - 48
33 |   - 64
34 |   SIGMA: 2
35 |   NUM_JOINTS: 17
36 |   TARGET_TYPE: 'gaussian'
37 |   EXTRA:
38 |     ATTENTION: 'GC'
39 |     FINAL_CONV_KERNEL: 1
40 |     DECONV_WITH_BIAS: false
41 |     NUM_DECONV_LAYERS: 2
42 |     NUM_DECONV_FILTERS:
43 |     - 256
44 |     - 256
45 |     NUM_DECONV_KERNELS:
46 |     - 4
47 |     - 4
48 |     NUM_LAYERS: 34
49 | LOSS:
50 |   USE_TARGET_WEIGHT: true
51 | TRAIN:
52 |   BATCH_SIZE_PER_GPU: 32
53 |   SHUFFLE: true
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 150
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 90
61 |   - 120
62 |   WD: 0.0001
63 |   GAMMA1: 0.99
64 |   GAMMA2: 0.0
65 |   MOMENTUM: 0.9
66 |   NESTEROV: false
67 | TEST:
68 |   BATCH_SIZE_PER_GPU: 32
69 |   COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
70 |   BBOX_THRE: 1.0
71 |   IMAGE_THRE: 0.0
72 |   IN_VIS_THRE: 0.2
73 |   MODEL_FILE: ''
74 |   NMS_THRE: 1.0
75 |   OKS_THRE: 0.9
76 |   USE_GT_BBOX: true
77 |   FLIP_TEST: true
78 |   POST_PROCESS: true
79 |   SHIFT_HEATMAP: true
80 |   SOFT_ARGMAX: true
81 | DEBUG:
82 |   DEBUG: true
83 |   SAVE_BATCH_IMAGES_GT: true
84 |   SAVE_BATCH_IMAGES_PRED: true
85 |   SAVE_HEATMAPS_GT: true
86 |   SAVE_HEATMAPS_PRED: true
87 | 


--------------------------------------------------------------------------------
/experiments/coco/lpn/lpn34h_256x192_gd256x2_gc.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: false
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: '../output'
 9 | LOG_DIR: '../log'
10 | WORKERS: 4
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: true
15 |   DATASET: 'coco'
16 |   DATA_FORMAT: jpg
17 |   FLIP: true
18 |   NUM_JOINTS_HALF_BODY: 8
19 |   PROB_HALF_BODY: 0.3
20 |   ROOT: '../data/coco/'
21 |   ROT_FACTOR: 45
22 |   SCALE_FACTOR: 0.35
23 |   TEST_SET: 'val2017'
24 |   TRAIN_SET: 'train2017'
25 | MODEL:
26 |   NAME: 'lpn'
27 |   PRETRAINED: ''
28 |   IMAGE_SIZE:
29 |   - 192
30 |   - 256
31 |   HEATMAP_SIZE:
32 |   - 48
33 |   - 64
34 |   SIGMA: 2
35 |   NUM_JOINTS: 17
36 |   TARGET_TYPE: 'gaussian'
37 |   EXTRA:
38 |     ATTENTION: 'GC'
39 |     FINAL_CONV_KERNEL: 1
40 |     DECONV_WITH_BIAS: false
41 |     NUM_DECONV_LAYERS: 2
42 |     NUM_DECONV_FILTERS:
43 |     - 256
44 |     - 256
45 |     NUM_DECONV_KERNELS:
46 |     - 4
47 |     - 4
48 |     NUM_LAYERS: 34
49 | LOSS:
50 |   USE_TARGET_WEIGHT: true
51 | TRAIN:
52 |   BATCH_SIZE_PER_GPU: 32
53 |   SHUFFLE: true
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 150
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 90
61 |   - 120
62 |   WD: 0.0001
63 |   GAMMA1: 0.99
64 |   GAMMA2: 0.0
65 |   MOMENTUM: 0.9
66 |   NESTEROV: false
67 | TEST:
68 |   BATCH_SIZE_PER_GPU: 32
69 |   COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
70 |   BBOX_THRE: 1.0
71 |   IMAGE_THRE: 0.0
72 |   IN_VIS_THRE: 0.2
73 |   MODEL_FILE: ''
74 |   NMS_THRE: 1.0
75 |   OKS_THRE: 0.9
76 |   USE_GT_BBOX: true
77 |   FLIP_TEST: true
78 |   POST_PROCESS: true
79 |   SHIFT_HEATMAP: true
80 |   SOFT_ARGMAX: true
81 | DEBUG:
82 |   DEBUG: true
83 |   SAVE_BATCH_IMAGES_GT: true
84 |   SAVE_BATCH_IMAGES_PRED: true
85 |   SAVE_HEATMAPS_GT: true
86 |   SAVE_HEATMAPS_PRED: true
87 | 


--------------------------------------------------------------------------------
/experiments/coco/lpn/lpn50_256x192_gd256x2_gc.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: false
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: '../output'
 9 | LOG_DIR: '../log'
10 | WORKERS: 4
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: true
15 |   DATASET: 'coco'
16 |   DATA_FORMAT: jpg
17 |   FLIP: true
18 |   NUM_JOINTS_HALF_BODY: 8
19 |   PROB_HALF_BODY: 0.3
20 |   ROOT: '../data/coco/'
21 |   ROT_FACTOR: 45
22 |   SCALE_FACTOR: 0.35
23 |   TEST_SET: 'val2017'
24 |   TRAIN_SET: 'train2017'
25 | MODEL:
26 |   NAME: 'lpn'
27 |   PRETRAINED: ''
28 |   IMAGE_SIZE:
29 |   - 192
30 |   - 256
31 |   HEATMAP_SIZE:
32 |   - 48
33 |   - 64
34 |   SIGMA: 2
35 |   NUM_JOINTS: 17
36 |   TARGET_TYPE: 'gaussian'
37 |   EXTRA:
38 |     ATTENTION: 'GC'
39 |     FINAL_CONV_KERNEL: 1
40 |     DECONV_WITH_BIAS: false
41 |     NUM_DECONV_LAYERS: 2
42 |     NUM_DECONV_FILTERS:
43 |     - 256
44 |     - 256
45 |     NUM_DECONV_KERNELS:
46 |     - 4
47 |     - 4
48 |     NUM_LAYERS: 50
49 | LOSS:
50 |   USE_TARGET_WEIGHT: true
51 | TRAIN:
52 |   BATCH_SIZE_PER_GPU: 32
53 |   SHUFFLE: true
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 150
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 90
61 |   - 120
62 |   WD: 0.0001
63 |   GAMMA1: 0.99
64 |   GAMMA2: 0.0
65 |   MOMENTUM: 0.9
66 |   NESTEROV: false
67 | TEST:
68 |   BATCH_SIZE_PER_GPU: 32
69 |   COCO_BBOX_FILE: '../data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
70 |   BBOX_THRE: 1.0
71 |   IMAGE_THRE: 0.0
72 |   IN_VIS_THRE: 0.2
73 |   MODEL_FILE: ''
74 |   NMS_THRE: 1.0
75 |   OKS_THRE: 0.9
76 |   USE_GT_BBOX: true
77 |   FLIP_TEST: true
78 |   POST_PROCESS: true
79 |   SHIFT_HEATMAP: true
80 |   SOFT_ARGMAX: true
81 | DEBUG:
82 |   DEBUG: true
83 |   SAVE_BATCH_IMAGES_GT: true
84 |   SAVE_BATCH_IMAGES_PRED: true
85 |   SAVE_HEATMAPS_GT: true
86 |   SAVE_HEATMAPS_PRED: true
87 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet/res101_256x192_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: true
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: 'output'
 9 | LOG_DIR: 'log'
10 | WORKERS: 24
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: false
15 |   DATASET: 'coco'
16 |   ROOT: 'data/coco/'
17 |   TEST_SET: 'val2017'
18 |   TRAIN_SET: 'train2017'
19 |   FLIP: true
20 |   ROT_FACTOR: 40
21 |   SCALE_FACTOR: 0.3
22 | MODEL:
23 |   NAME: 'pose_resnet'
24 |   PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth'
25 |   IMAGE_SIZE:
26 |   - 192
27 |   - 256
28 |   HEATMAP_SIZE:
29 |   - 48
30 |   - 64
31 |   SIGMA: 2
32 |   NUM_JOINTS: 17
33 |   TARGET_TYPE: 'gaussian'
34 |   EXTRA:
35 |     FINAL_CONV_KERNEL: 1
36 |     DECONV_WITH_BIAS: false
37 |     NUM_DECONV_LAYERS: 3
38 |     NUM_DECONV_FILTERS:
39 |     - 256
40 |     - 256
41 |     - 256
42 |     NUM_DECONV_KERNELS:
43 |     - 4
44 |     - 4
45 |     - 4
46 |     NUM_LAYERS: 101
47 | LOSS:
48 |   USE_TARGET_WEIGHT: true
49 | TRAIN:
50 |   BATCH_SIZE_PER_GPU: 32
51 |   SHUFFLE: true
52 |   BEGIN_EPOCH: 0
53 |   END_EPOCH: 140
54 |   OPTIMIZER: 'adam'
55 |   LR: 0.001
56 |   LR_FACTOR: 0.1
57 |   LR_STEP:
58 |   - 90
59 |   - 120
60 |   WD: 0.0001
61 |   GAMMA1: 0.99
62 |   GAMMA2: 0.0
63 |   MOMENTUM: 0.9
64 |   NESTEROV: false
65 | TEST:
66 |   BATCH_SIZE_PER_GPU: 32
67 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
68 |   BBOX_THRE: 1.0
69 |   IMAGE_THRE: 0.0
70 |   IN_VIS_THRE: 0.2
71 |   MODEL_FILE: ''
72 |   NMS_THRE: 1.0
73 |   OKS_THRE: 0.9
74 |   FLIP_TEST: true
75 |   POST_PROCESS: true
76 |   SHIFT_HEATMAP: true
77 |   USE_GT_BBOX: true
78 | DEBUG:
79 |   DEBUG: true
80 |   SAVE_BATCH_IMAGES_GT: true
81 |   SAVE_BATCH_IMAGES_PRED: true
82 |   SAVE_HEATMAPS_GT: true
83 |   SAVE_HEATMAPS_PRED: true
84 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet/res101_384x288_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: true
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: 'output'
 9 | LOG_DIR: 'log'
10 | WORKERS: 24
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: false
15 |   DATASET: 'coco'
16 |   ROOT: 'data/coco/'
17 |   TEST_SET: 'val2017'
18 |   TRAIN_SET: 'train2017'
19 |   FLIP: true
20 |   ROT_FACTOR: 40
21 |   SCALE_FACTOR: 0.3
22 | MODEL:
23 |   NAME: 'pose_resnet'
24 |   PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth'
25 |   IMAGE_SIZE:
26 |   - 288
27 |   - 384
28 |   HEATMAP_SIZE:
29 |   - 72
30 |   - 96
31 |   SIGMA: 3
32 |   NUM_JOINTS: 17
33 |   TARGET_TYPE: 'gaussian'
34 |   EXTRA:
35 |     FINAL_CONV_KERNEL: 1
36 |     DECONV_WITH_BIAS: false
37 |     NUM_DECONV_LAYERS: 3
38 |     NUM_DECONV_FILTERS:
39 |     - 256
40 |     - 256
41 |     - 256
42 |     NUM_DECONV_KERNELS:
43 |     - 4
44 |     - 4
45 |     - 4
46 |     NUM_LAYERS: 101
47 | LOSS:
48 |   USE_TARGET_WEIGHT: true
49 | TRAIN:
50 |   BATCH_SIZE_PER_GPU: 32
51 |   SHUFFLE: true
52 |   BEGIN_EPOCH: 0
53 |   END_EPOCH: 140
54 |   OPTIMIZER: 'adam'
55 |   LR: 0.001
56 |   LR_FACTOR: 0.1
57 |   LR_STEP:
58 |   - 90
59 |   - 120
60 |   WD: 0.0001
61 |   GAMMA1: 0.99
62 |   GAMMA2: 0.0
63 |   MOMENTUM: 0.9
64 |   NESTEROV: false
65 | TEST:
66 |   BATCH_SIZE_PER_GPU: 32
67 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
68 |   BBOX_THRE: 1.0
69 |   IMAGE_THRE: 0.0
70 |   IN_VIS_THRE: 0.2
71 |   MODEL_FILE: ''
72 |   NMS_THRE: 1.0
73 |   OKS_THRE: 0.9
74 |   FLIP_TEST: true
75 |   POST_PROCESS: true
76 |   SHIFT_HEATMAP: true
77 |   USE_GT_BBOX: true
78 | DEBUG:
79 |   DEBUG: true
80 |   SAVE_BATCH_IMAGES_GT: true
81 |   SAVE_BATCH_IMAGES_PRED: true
82 |   SAVE_HEATMAPS_GT: true
83 |   SAVE_HEATMAPS_PRED: true
84 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet/res152_256x192_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: true
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: 'output'
 9 | LOG_DIR: 'log'
10 | WORKERS: 24
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: false
15 |   DATASET: 'coco'
16 |   ROOT: 'data/coco/'
17 |   TEST_SET: 'val2017'
18 |   TRAIN_SET: 'train2017'
19 |   FLIP: true
20 |   ROT_FACTOR: 40
21 |   SCALE_FACTOR: 0.3
22 | MODEL:
23 |   NAME: 'pose_resnet'
24 |   PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth'
25 |   IMAGE_SIZE:
26 |   - 192
27 |   - 256
28 |   HEATMAP_SIZE:
29 |   - 48
30 |   - 64
31 |   SIGMA: 2
32 |   NUM_JOINTS: 17
33 |   TARGET_TYPE: 'gaussian'
34 |   EXTRA:
35 |     FINAL_CONV_KERNEL: 1
36 |     DECONV_WITH_BIAS: false
37 |     NUM_DECONV_LAYERS: 3
38 |     NUM_DECONV_FILTERS:
39 |     - 256
40 |     - 256
41 |     - 256
42 |     NUM_DECONV_KERNELS:
43 |     - 4
44 |     - 4
45 |     - 4
46 |     NUM_LAYERS: 152
47 | LOSS:
48 |   USE_TARGET_WEIGHT: true
49 | TRAIN:
50 |   BATCH_SIZE_PER_GPU: 32
51 |   SHUFFLE: true
52 |   BEGIN_EPOCH: 0
53 |   END_EPOCH: 140
54 |   OPTIMIZER: 'adam'
55 |   LR: 0.001
56 |   LR_FACTOR: 0.1
57 |   LR_STEP:
58 |   - 90
59 |   - 120
60 |   WD: 0.0001
61 |   GAMMA1: 0.99
62 |   GAMMA2: 0.0
63 |   MOMENTUM: 0.9
64 |   NESTEROV: false
65 | TEST:
66 |   BATCH_SIZE_PER_GPU: 32
67 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
68 |   BBOX_THRE: 1.0
69 |   IMAGE_THRE: 0.0
70 |   IN_VIS_THRE: 0.2
71 |   MODEL_FILE: ''
72 |   NMS_THRE: 1.0
73 |   OKS_THRE: 0.9
74 |   FLIP_TEST: true
75 |   POST_PROCESS: true
76 |   SHIFT_HEATMAP: true
77 |   USE_GT_BBOX: true
78 | DEBUG:
79 |   DEBUG: true
80 |   SAVE_BATCH_IMAGES_GT: true
81 |   SAVE_BATCH_IMAGES_PRED: true
82 |   SAVE_HEATMAPS_GT: true
83 |   SAVE_HEATMAPS_PRED: true
84 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet/res152_384x288_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: true
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: 'output'
 9 | LOG_DIR: 'log'
10 | WORKERS: 24
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: false
15 |   DATASET: 'coco'
16 |   ROOT: 'data/coco/'
17 |   TEST_SET: 'val2017'
18 |   TRAIN_SET: 'train2017'
19 |   FLIP: true
20 |   ROT_FACTOR: 40
21 |   SCALE_FACTOR: 0.3
22 | MODEL:
23 |   NAME: 'pose_resnet'
24 |   PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth'
25 |   IMAGE_SIZE:
26 |   - 288
27 |   - 384
28 |   HEATMAP_SIZE:
29 |   - 72
30 |   - 96
31 |   SIGMA: 3
32 |   NUM_JOINTS: 17
33 |   TARGET_TYPE: 'gaussian'
34 |   EXTRA:
35 |     FINAL_CONV_KERNEL: 1
36 |     DECONV_WITH_BIAS: false
37 |     NUM_DECONV_LAYERS: 3
38 |     NUM_DECONV_FILTERS:
39 |     - 256
40 |     - 256
41 |     - 256
42 |     NUM_DECONV_KERNELS:
43 |     - 4
44 |     - 4
45 |     - 4
46 |     NUM_LAYERS: 152
47 | LOSS:
48 |   USE_TARGET_WEIGHT: true
49 | TRAIN:
50 |   BATCH_SIZE_PER_GPU: 32
51 |   SHUFFLE: true
52 |   BEGIN_EPOCH: 0
53 |   END_EPOCH: 140
54 |   OPTIMIZER: 'adam'
55 |   LR: 0.001
56 |   LR_FACTOR: 0.1
57 |   LR_STEP:
58 |   - 90
59 |   - 120
60 |   WD: 0.0001
61 |   GAMMA1: 0.99
62 |   GAMMA2: 0.0
63 |   MOMENTUM: 0.9
64 |   NESTEROV: false
65 | TEST:
66 |   BATCH_SIZE_PER_GPU: 32
67 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
68 |   BBOX_THRE: 1.0
69 |   IMAGE_THRE: 0.0
70 |   IN_VIS_THRE: 0.2
71 |   MODEL_FILE: ''
72 |   NMS_THRE: 1.0
73 |   OKS_THRE: 0.9
74 |   FLIP_TEST: true
75 |   POST_PROCESS: true
76 |   SHIFT_HEATMAP: true
77 |   USE_GT_BBOX: true
78 | DEBUG:
79 |   DEBUG: true
80 |   SAVE_BATCH_IMAGES_GT: true
81 |   SAVE_BATCH_IMAGES_PRED: true
82 |   SAVE_HEATMAPS_GT: true
83 |   SAVE_HEATMAPS_PRED: true
84 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet/res50_256x192_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: true
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: 'output'
 9 | LOG_DIR: 'log'
10 | WORKERS: 24
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: false
15 |   DATASET: 'coco'
16 |   ROOT: 'data/coco/'
17 |   TEST_SET: 'val2017'
18 |   TRAIN_SET: 'train2017'
19 |   FLIP: true
20 |   ROT_FACTOR: 40
21 |   SCALE_FACTOR: 0.3
22 | MODEL:
23 |   NAME: 'pose_resnet'
24 |   PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth'
25 |   IMAGE_SIZE:
26 |   - 192
27 |   - 256
28 |   HEATMAP_SIZE:
29 |   - 48
30 |   - 64
31 |   SIGMA: 2
32 |   NUM_JOINTS: 17
33 |   TARGET_TYPE: 'gaussian'
34 |   EXTRA:
35 |     FINAL_CONV_KERNEL: 1
36 |     DECONV_WITH_BIAS: false
37 |     NUM_DECONV_LAYERS: 3
38 |     NUM_DECONV_FILTERS:
39 |     - 256
40 |     - 256
41 |     - 256
42 |     NUM_DECONV_KERNELS:
43 |     - 4
44 |     - 4
45 |     - 4
46 |     NUM_LAYERS: 50
47 | LOSS:
48 |   USE_TARGET_WEIGHT: true
49 | TRAIN:
50 |   BATCH_SIZE_PER_GPU: 32
51 |   SHUFFLE: true
52 |   BEGIN_EPOCH: 0
53 |   END_EPOCH: 140
54 |   OPTIMIZER: 'adam'
55 |   LR: 0.001
56 |   LR_FACTOR: 0.1
57 |   LR_STEP:
58 |   - 90
59 |   - 120
60 |   WD: 0.0001
61 |   GAMMA1: 0.99
62 |   GAMMA2: 0.0
63 |   MOMENTUM: 0.9
64 |   NESTEROV: false
65 | TEST:
66 |   BATCH_SIZE_PER_GPU: 32
67 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
68 |   BBOX_THRE: 1.0
69 |   IMAGE_THRE: 0.0
70 |   IN_VIS_THRE: 0.2
71 |   MODEL_FILE: ''
72 |   NMS_THRE: 1.0
73 |   OKS_THRE: 0.9
74 |   FLIP_TEST: true
75 |   POST_PROCESS: true
76 |   SHIFT_HEATMAP: true
77 |   USE_GT_BBOX: true
78 | DEBUG:
79 |   DEBUG: true
80 |   SAVE_BATCH_IMAGES_GT: true
81 |   SAVE_BATCH_IMAGES_PRED: true
82 |   SAVE_HEATMAPS_GT: true
83 |   SAVE_HEATMAPS_PRED: true
84 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet/res50_384x288_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: true
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: 'output'
 9 | LOG_DIR: 'log'
10 | WORKERS: 24
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: false
15 |   DATASET: 'coco'
16 |   ROOT: 'data/coco/'
17 |   TEST_SET: 'val2017'
18 |   TRAIN_SET: 'train2017'
19 |   FLIP: true
20 |   ROT_FACTOR: 40
21 |   SCALE_FACTOR: 0.3
22 | MODEL:
23 |   NAME: 'pose_resnet'
24 |   PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth'
25 |   IMAGE_SIZE:
26 |   - 288
27 |   - 384
28 |   HEATMAP_SIZE:
29 |   - 72
30 |   - 96
31 |   SIGMA: 3
32 |   NUM_JOINTS: 17
33 |   TARGET_TYPE: 'gaussian'
34 |   EXTRA:
35 |     FINAL_CONV_KERNEL: 1
36 |     DECONV_WITH_BIAS: false
37 |     NUM_DECONV_LAYERS: 3
38 |     NUM_DECONV_FILTERS:
39 |     - 256
40 |     - 256
41 |     - 256
42 |     NUM_DECONV_KERNELS:
43 |     - 4
44 |     - 4
45 |     - 4
46 |     NUM_LAYERS: 50
47 | LOSS:
48 |   USE_TARGET_WEIGHT: true
49 | TRAIN:
50 |   BATCH_SIZE_PER_GPU: 32
51 |   SHUFFLE: true
52 |   BEGIN_EPOCH: 0
53 |   END_EPOCH: 140
54 |   OPTIMIZER: 'adam'
55 |   LR: 0.001
56 |   LR_FACTOR: 0.1
57 |   LR_STEP:
58 |   - 90
59 |   - 120
60 |   WD: 0.0001
61 |   GAMMA1: 0.99
62 |   GAMMA2: 0.0
63 |   MOMENTUM: 0.9
64 |   NESTEROV: false
65 | TEST:
66 |   BATCH_SIZE_PER_GPU: 32
67 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
68 |   BBOX_THRE: 1.0
69 |   IMAGE_THRE: 0.0
70 |   IN_VIS_THRE: 0.2
71 |   MODEL_FILE: ''
72 |   NMS_THRE: 1.0
73 |   OKS_THRE: 0.9
74 |   FLIP_TEST: true
75 |   POST_PROCESS: true
76 |   SHIFT_HEATMAP: true
77 |   USE_GT_BBOX: true
78 | DEBUG:
79 |   DEBUG: true
80 |   SAVE_BATCH_IMAGES_GT: true
81 |   SAVE_BATCH_IMAGES_PRED: true
82 |   SAVE_HEATMAPS_GT: true
83 |   SAVE_HEATMAPS_PRED: true
84 | 


--------------------------------------------------------------------------------
/experiments/mpii/hrnet/w32_256x256_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: true
  2 | CUDNN:
  3 |   BENCHMARK: true
  4 |   DETERMINISTIC: false
  5 |   ENABLED: true
  6 | DATA_DIR: ''
  7 | GPUS: (0,1,2,3)
  8 | OUTPUT_DIR: '../output'
  9 | LOG_DIR: '../log'
 10 | WORKERS: 24
 11 | PRINT_FREQ: 100
 12 | 
 13 | DATASET:
 14 |   COLOR_RGB: true
 15 |   DATASET: mpii
 16 |   DATA_FORMAT: jpg
 17 |   FLIP: true
 18 |   NUM_JOINTS_HALF_BODY: 8
 19 |   PROB_HALF_BODY: -1.0
 20 |   ROOT: '../data/mpii/'
 21 |   ROT_FACTOR: 30
 22 |   SCALE_FACTOR: 0.25
 23 |   TEST_SET: valid
 24 |   TRAIN_SET: train
 25 | MODEL:
 26 |   INIT_WEIGHTS: true
 27 |   NAME: pose_hrnet
 28 |   NUM_JOINTS: 16
 29 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
 30 |   TARGET_TYPE: gaussian
 31 |   IMAGE_SIZE:
 32 |   - 256
 33 |   - 256
 34 |   HEATMAP_SIZE:
 35 |   - 64
 36 |   - 64
 37 |   SIGMA: 2
 38 |   EXTRA:
 39 |     PRETRAINED_LAYERS:
 40 |     - 'conv1'
 41 |     - 'bn1'
 42 |     - 'conv2'
 43 |     - 'bn2'
 44 |     - 'layer1'
 45 |     - 'transition1'
 46 |     - 'stage2'
 47 |     - 'transition2'
 48 |     - 'stage3'
 49 |     - 'transition3'
 50 |     - 'stage4'
 51 |     FINAL_CONV_KERNEL: 1
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 4
 58 |       - 4
 59 |       NUM_CHANNELS:
 60 |       - 32
 61 |       - 64
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 4
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 4
 69 |       - 4
 70 |       - 4
 71 |       NUM_CHANNELS:
 72 |       - 32
 73 |       - 64
 74 |       - 128
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 3
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 4
 82 |       - 4
 83 |       - 4
 84 |       - 4
 85 |       NUM_CHANNELS:
 86 |       - 32
 87 |       - 64
 88 |       - 128
 89 |       - 256
 90 |       FUSE_METHOD: SUM
 91 | LOSS:
 92 |   USE_TARGET_WEIGHT: true
 93 | TRAIN:
 94 |   BATCH_SIZE_PER_GPU: 32
 95 |   SHUFFLE: true
 96 |   BEGIN_EPOCH: 0
 97 |   END_EPOCH: 210
 98 |   OPTIMIZER: adam
 99 |   LR: 0.001
100 |   LR_FACTOR: 0.1
101 |   LR_STEP:
102 |   - 170
103 |   - 200
104 |   WD: 0.0001
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 |   MOMENTUM: 0.9
108 |   NESTEROV: false
109 | TEST:
110 |   BATCH_SIZE_PER_GPU: 32
111 |   MODEL_FILE: ''
112 |   FLIP_TEST: true
113 |   POST_PROCESS: true
114 |   SHIFT_HEATMAP: true
115 | DEBUG:
116 |   DEBUG: true
117 |   SAVE_BATCH_IMAGES_GT: true
118 |   SAVE_BATCH_IMAGES_PRED: true
119 |   SAVE_HEATMAPS_GT: true
120 |   SAVE_HEATMAPS_PRED: true
121 | 


--------------------------------------------------------------------------------
/experiments/mpii/hrnet/w48_256x256_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: true
  2 | CUDNN:
  3 |   BENCHMARK: true
  4 |   DETERMINISTIC: false
  5 |   ENABLED: true
  6 | DATA_DIR: ''
  7 | GPUS: (0,1,2,3)
  8 | OUTPUT_DIR: '../output'
  9 | LOG_DIR: '../log'
 10 | WORKERS: 24
 11 | PRINT_FREQ: 100
 12 | 
 13 | DATASET:
 14 |   COLOR_RGB: true
 15 |   DATASET: mpii
 16 |   DATA_FORMAT: jpg
 17 |   FLIP: true
 18 |   NUM_JOINTS_HALF_BODY: 8
 19 |   PROB_HALF_BODY: -1.0
 20 |   ROOT: '../data/mpii/'
 21 |   ROT_FACTOR: 30
 22 |   SCALE_FACTOR: 0.25
 23 |   TEST_SET: valid
 24 |   TRAIN_SET: train
 25 | MODEL:
 26 |   INIT_WEIGHTS: true
 27 |   NAME: pose_hrnet
 28 |   NUM_JOINTS: 16
 29 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth'
 30 |   TARGET_TYPE: gaussian
 31 |   IMAGE_SIZE:
 32 |   - 256
 33 |   - 256
 34 |   HEATMAP_SIZE:
 35 |   - 64
 36 |   - 64
 37 |   SIGMA: 2
 38 |   EXTRA:
 39 |     PRETRAINED_LAYERS:
 40 |     - 'conv1'
 41 |     - 'bn1'
 42 |     - 'conv2'
 43 |     - 'bn2'
 44 |     - 'layer1'
 45 |     - 'transition1'
 46 |     - 'stage2'
 47 |     - 'transition2'
 48 |     - 'stage3'
 49 |     - 'transition3'
 50 |     - 'stage4'
 51 |     FINAL_CONV_KERNEL: 1
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 4
 58 |       - 4
 59 |       NUM_CHANNELS:
 60 |       - 48
 61 |       - 96
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 4
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 4
 69 |       - 4
 70 |       - 4
 71 |       NUM_CHANNELS:
 72 |       - 48
 73 |       - 96
 74 |       - 192
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 3
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 4
 82 |       - 4
 83 |       - 4
 84 |       - 4
 85 |       NUM_CHANNELS:
 86 |       - 48
 87 |       - 96
 88 |       - 192
 89 |       - 384
 90 |       FUSE_METHOD: SUM
 91 | LOSS:
 92 |   USE_TARGET_WEIGHT: true
 93 | TRAIN:
 94 |   BATCH_SIZE_PER_GPU: 32
 95 |   SHUFFLE: true
 96 |   BEGIN_EPOCH: 0
 97 |   END_EPOCH: 210
 98 |   OPTIMIZER: adam
 99 |   LR: 0.001
100 |   LR_FACTOR: 0.1
101 |   LR_STEP:
102 |   - 170
103 |   - 200
104 |   WD: 0.0001
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 |   MOMENTUM: 0.9
108 |   NESTEROV: false
109 | TEST:
110 |   BATCH_SIZE_PER_GPU: 32
111 |   MODEL_FILE: ''
112 |   FLIP_TEST: true
113 |   POST_PROCESS: true
114 |   SHIFT_HEATMAP: true
115 | DEBUG:
116 |   DEBUG: true
117 |   SAVE_BATCH_IMAGES_GT: true
118 |   SAVE_BATCH_IMAGES_PRED: true
119 |   SAVE_HEATMAPS_GT: true
120 |   SAVE_HEATMAPS_PRED: true
121 | 


--------------------------------------------------------------------------------
/experiments/mpii/resnet/res101_256x256_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: true
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: 'output'
 9 | LOG_DIR: 'log'
10 | WORKERS: 24
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: false
15 |   DATASET: mpii
16 |   DATA_FORMAT: jpg
17 |   FLIP: true
18 |   NUM_JOINTS_HALF_BODY: 8
19 |   PROB_HALF_BODY: -1.0
20 |   ROOT: 'data/mpii/'
21 |   ROT_FACTOR: 30
22 |   SCALE_FACTOR: 0.25
23 |   TEST_SET: valid
24 |   TRAIN_SET: train
25 | MODEL:
26 |   NAME: 'pose_resnet'
27 |   PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth'
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 256
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 64
34 |   SIGMA: 2
35 |   NUM_JOINTS: 16
36 |   TARGET_TYPE: 'gaussian'
37 |   EXTRA:
38 |     FINAL_CONV_KERNEL: 1
39 |     DECONV_WITH_BIAS: false
40 |     NUM_DECONV_LAYERS: 3
41 |     NUM_DECONV_FILTERS:
42 |     - 256
43 |     - 256
44 |     - 256
45 |     NUM_DECONV_KERNELS:
46 |     - 4
47 |     - 4
48 |     - 4
49 |     NUM_LAYERS: 101
50 | LOSS:
51 |   USE_TARGET_WEIGHT: true
52 | TRAIN:
53 |   BATCH_SIZE_PER_GPU: 32
54 |   SHUFFLE: true
55 |   BEGIN_EPOCH: 0
56 |   END_EPOCH: 140
57 |   OPTIMIZER: 'adam'
58 |   LR: 0.001
59 |   LR_FACTOR: 0.1
60 |   LR_STEP:
61 |   - 90
62 |   - 120
63 |   WD: 0.0001
64 |   GAMMA1: 0.99
65 |   GAMMA2: 0.0
66 |   MOMENTUM: 0.9
67 |   NESTEROV: false
68 | TEST:
69 |   BATCH_SIZE_PER_GPU: 32
70 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
71 |   BBOX_THRE: 1.0
72 |   IMAGE_THRE: 0.0
73 |   IN_VIS_THRE: 0.2
74 |   MODEL_FILE: ''
75 |   NMS_THRE: 1.0
76 |   OKS_THRE: 0.9
77 |   FLIP_TEST: true
78 |   POST_PROCESS: true
79 |   SHIFT_HEATMAP: true
80 |   USE_GT_BBOX: true
81 | DEBUG:
82 |   DEBUG: true
83 |   SAVE_BATCH_IMAGES_GT: true
84 |   SAVE_BATCH_IMAGES_PRED: true
85 |   SAVE_HEATMAPS_GT: true
86 |   SAVE_HEATMAPS_PRED: true
87 | 


--------------------------------------------------------------------------------
/experiments/mpii/resnet/res152_256x256_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: true
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: 'output'
 9 | LOG_DIR: 'log'
10 | WORKERS: 24
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: false
15 |   DATASET: mpii
16 |   DATA_FORMAT: jpg
17 |   FLIP: true
18 |   NUM_JOINTS_HALF_BODY: 8
19 |   PROB_HALF_BODY: -1.0
20 |   ROOT: 'data/mpii/'
21 |   ROT_FACTOR: 30
22 |   SCALE_FACTOR: 0.25
23 |   TEST_SET: valid
24 |   TRAIN_SET: train
25 | MODEL:
26 |   NAME: 'pose_resnet'
27 |   PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth'
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 256
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 64
34 |   SIGMA: 2
35 |   NUM_JOINTS: 16
36 |   TARGET_TYPE: 'gaussian'
37 |   EXTRA:
38 |     FINAL_CONV_KERNEL: 1
39 |     DECONV_WITH_BIAS: false
40 |     NUM_DECONV_LAYERS: 3
41 |     NUM_DECONV_FILTERS:
42 |     - 256
43 |     - 256
44 |     - 256
45 |     NUM_DECONV_KERNELS:
46 |     - 4
47 |     - 4
48 |     - 4
49 |     NUM_LAYERS: 152
50 | LOSS:
51 |   USE_TARGET_WEIGHT: true
52 | TRAIN:
53 |   BATCH_SIZE_PER_GPU: 32
54 |   SHUFFLE: true
55 |   BEGIN_EPOCH: 0
56 |   END_EPOCH: 140
57 |   OPTIMIZER: 'adam'
58 |   LR: 0.001
59 |   LR_FACTOR: 0.1
60 |   LR_STEP:
61 |   - 90
62 |   - 120
63 |   WD: 0.0001
64 |   GAMMA1: 0.99
65 |   GAMMA2: 0.0
66 |   MOMENTUM: 0.9
67 |   NESTEROV: false
68 | TEST:
69 |   BATCH_SIZE_PER_GPU: 32
70 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
71 |   BBOX_THRE: 1.0
72 |   IMAGE_THRE: 0.0
73 |   IN_VIS_THRE: 0.2
74 |   MODEL_FILE: ''
75 |   NMS_THRE: 1.0
76 |   OKS_THRE: 0.9
77 |   FLIP_TEST: true
78 |   POST_PROCESS: true
79 |   SHIFT_HEATMAP: true
80 |   USE_GT_BBOX: true
81 | DEBUG:
82 |   DEBUG: true
83 |   SAVE_BATCH_IMAGES_GT: true
84 |   SAVE_BATCH_IMAGES_PRED: true
85 |   SAVE_HEATMAPS_GT: true
86 |   SAVE_HEATMAPS_PRED: true
87 | 


--------------------------------------------------------------------------------
/experiments/mpii/resnet/res50_256x256_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: true
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: 'output'
 9 | LOG_DIR: 'log'
10 | WORKERS: 24
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: false
15 |   DATASET: mpii
16 |   DATA_FORMAT: jpg
17 |   FLIP: true
18 |   NUM_JOINTS_HALF_BODY: 8
19 |   PROB_HALF_BODY: -1.0
20 |   ROOT: 'data/mpii/'
21 |   ROT_FACTOR: 30
22 |   SCALE_FACTOR: 0.25
23 |   TEST_SET: valid
24 |   TRAIN_SET: train
25 | MODEL:
26 |   NAME: 'pose_resnet'
27 |   PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth'
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 256
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 64
34 |   SIGMA: 2
35 |   NUM_JOINTS: 16
36 |   TARGET_TYPE: 'gaussian'
37 |   EXTRA:
38 |     FINAL_CONV_KERNEL: 1
39 |     DECONV_WITH_BIAS: false
40 |     NUM_DECONV_LAYERS: 3
41 |     NUM_DECONV_FILTERS:
42 |     - 256
43 |     - 256
44 |     - 256
45 |     NUM_DECONV_KERNELS:
46 |     - 4
47 |     - 4
48 |     - 4
49 |     NUM_LAYERS: 50
50 | LOSS:
51 |   USE_TARGET_WEIGHT: true
52 | TRAIN:
53 |   BATCH_SIZE_PER_GPU: 32
54 |   SHUFFLE: true
55 |   BEGIN_EPOCH: 0
56 |   END_EPOCH: 140
57 |   OPTIMIZER: 'adam'
58 |   LR: 0.001
59 |   LR_FACTOR: 0.1
60 |   LR_STEP:
61 |   - 90
62 |   - 120
63 |   WD: 0.0001
64 |   GAMMA1: 0.99
65 |   GAMMA2: 0.0
66 |   MOMENTUM: 0.9
67 |   NESTEROV: false
68 | TEST:
69 |   BATCH_SIZE_PER_GPU: 32
70 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
71 |   BBOX_THRE: 1.0
72 |   IMAGE_THRE: 0.0
73 |   IN_VIS_THRE: 0.2
74 |   MODEL_FILE: ''
75 |   NMS_THRE: 1.0
76 |   OKS_THRE: 0.9
77 |   FLIP_TEST: true
78 |   POST_PROCESS: true
79 |   SHIFT_HEATMAP: true
80 |   USE_GT_BBOX: true
81 | DEBUG:
82 |   DEBUG: true
83 |   SAVE_BATCH_IMAGES_GT: true
84 |   SAVE_BATCH_IMAGES_PRED: true
85 |   SAVE_HEATMAPS_GT: true
86 |   SAVE_HEATMAPS_PRED: true
87 | 


--------------------------------------------------------------------------------
/lib/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	cd nms; python setup_linux.py build_ext --inplace; rm -rf build; cd ../../
3 | clean:
4 | 	cd nms; rm *.so; cd ../../
5 | 


--------------------------------------------------------------------------------
/lib/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from .default import _C as cfg
 8 | from .default import update_config
 9 | from .models import MODEL_EXTRAS
10 | 


--------------------------------------------------------------------------------
/lib/config/default.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | 
  7 | from yacs.config import CfgNode as CN
  8 | 
  9 | 
 10 | _C = CN()
 11 | 
 12 | _C.OUTPUT_DIR = ''
 13 | _C.LOG_DIR = ''
 14 | _C.DATA_DIR = ''
 15 | _C.GPUS = (0,1,2,3)
 16 | _C.WORKERS = 4
 17 | _C.PRINT_FREQ = 20
 18 | _C.AUTO_RESUME = False
 19 | _C.PIN_MEMORY = True
 20 | _C.VERBOSE = True
 21 | _C.RANK = 0
 22 | _C.DIST_BACKEND = 'nccl'
 23 | _C.DIST_URL = 'tcp://localhost:23456'
 24 | _C.MULTIPROCESSING_DISTRIBUTED = True
 25 | 
 26 | # Cudnn related params
 27 | _C.CUDNN = CN()
 28 | _C.CUDNN.BENCHMARK = True
 29 | _C.CUDNN.DETERMINISTIC = False
 30 | _C.CUDNN.ENABLED = True
 31 | 
 32 | # common params for NETWORK
 33 | _C.MODEL = CN()
 34 | _C.MODEL.NAME = 'pose_hrnet'
 35 | _C.MODEL.INIT_WEIGHTS = True
 36 | _C.MODEL.PRETRAINED = ''
 37 | _C.MODEL.NUM_JOINTS = 17
 38 | _C.MODEL.TAG_PER_JOINT = True
 39 | _C.MODEL.TARGET_TYPE = 'gaussian'
 40 | _C.MODEL.IMAGE_SIZE = [256, 256]  # width * height, ex: 192 * 256
 41 | _C.MODEL.HEATMAP_SIZE = [64, 64]  # width * height, ex: 24 * 32
 42 | _C.MODEL.SIGMA = 2
 43 | _C.MODEL.EXTRA = CN(new_allowed=True)
 44 | _C.MODEL.SYNC_BN = False
 45 | 
 46 | _C.LOSS = CN()
 47 | _C.LOSS.USE_OHKM = False
 48 | _C.LOSS.TOPK = 8
 49 | _C.LOSS.USE_TARGET_WEIGHT = True
 50 | _C.LOSS.USE_DIFFERENT_JOINTS_WEIGHT = False
 51 | 
 52 | # DATASET related params
 53 | _C.DATASET = CN()
 54 | _C.DATASET.ROOT = ''
 55 | _C.DATASET.DATASET = 'mpii'
 56 | _C.DATASET.TRAIN_SET = 'train'
 57 | _C.DATASET.TEST_SET = 'valid'
 58 | _C.DATASET.DATA_FORMAT = 'jpg'
 59 | _C.DATASET.HYBRID_JOINTS_TYPE = ''
 60 | _C.DATASET.SELECT_DATA = False
 61 | 
 62 | # training data augmentation
 63 | _C.DATASET.FLIP = True
 64 | _C.DATASET.SCALE_FACTOR = 0.25
 65 | _C.DATASET.ROT_FACTOR = 30
 66 | _C.DATASET.PROB_HALF_BODY = 0.0
 67 | _C.DATASET.NUM_JOINTS_HALF_BODY = 8
 68 | _C.DATASET.COLOR_RGB = False
 69 | 
 70 | # train
 71 | _C.TRAIN = CN()
 72 | 
 73 | _C.TRAIN.LR_FACTOR = 0.1
 74 | _C.TRAIN.LR_STEP = [90, 110]
 75 | _C.TRAIN.LR = 0.001
 76 | 
 77 | _C.TRAIN.OPTIMIZER = 'adam'
 78 | _C.TRAIN.MOMENTUM = 0.9
 79 | _C.TRAIN.WD = 0.0001
 80 | _C.TRAIN.NESTEROV = False
 81 | _C.TRAIN.GAMMA1 = 0.99
 82 | _C.TRAIN.GAMMA2 = 0.0
 83 | 
 84 | _C.TRAIN.BEGIN_EPOCH = 0
 85 | _C.TRAIN.END_EPOCH = 140
 86 | 
 87 | _C.TRAIN.RESUME = False
 88 | _C.TRAIN.CHECKPOINT = ''
 89 | 
 90 | _C.TRAIN.BATCH_SIZE_PER_GPU = 32
 91 | _C.TRAIN.SHUFFLE = True
 92 | 
 93 | # testing
 94 | _C.TEST = CN()
 95 | 
 96 | # size of images for each device
 97 | _C.TEST.BATCH_SIZE_PER_GPU = 32
 98 | # Test Model Epoch
 99 | _C.TEST.FLIP_TEST = False
100 | _C.TEST.POST_PROCESS = False
101 | _C.TEST.SHIFT_HEATMAP = False
102 | 
103 | _C.TEST.USE_GT_BBOX = False
104 | 
105 | # nms
106 | _C.TEST.IMAGE_THRE = 0.1
107 | _C.TEST.NMS_THRE = 0.6
108 | _C.TEST.SOFT_NMS = False
109 | _C.TEST.OKS_THRE = 0.5
110 | _C.TEST.IN_VIS_THRE = 0.0
111 | _C.TEST.COCO_BBOX_FILE = ''
112 | _C.TEST.BBOX_THRE = 1.0
113 | _C.TEST.MODEL_FILE = ''
114 | 
115 | # soft_argmax
116 | _C.TEST.SOFT_ARGMAX = False
117 | _C.TEST.BIAS = 0.0
118 | 
119 | # debug
120 | _C.DEBUG = CN()
121 | _C.DEBUG.DEBUG = False
122 | _C.DEBUG.SAVE_BATCH_IMAGES_GT = False
123 | _C.DEBUG.SAVE_BATCH_IMAGES_PRED = False
124 | _C.DEBUG.SAVE_HEATMAPS_GT = False
125 | _C.DEBUG.SAVE_HEATMAPS_PRED = False
126 | 
127 | 
128 | def update_config(cfg, args):
129 |     cfg.defrost()
130 |     cfg.merge_from_file(args.cfg)
131 |     cfg.merge_from_list(args.opts)
132 | 
133 |     cfg.DATASET.ROOT = os.path.join(
134 |         cfg.DATA_DIR, cfg.DATASET.ROOT
135 |     )
136 | 
137 |     cfg.MODEL.PRETRAINED = os.path.join(
138 |         cfg.DATA_DIR, cfg.MODEL.PRETRAINED
139 |     )
140 | 
141 |     if cfg.TEST.MODEL_FILE:
142 |         cfg.TEST.MODEL_FILE = os.path.join(
143 |             cfg.DATA_DIR, cfg.TEST.MODEL_FILE
144 |         )
145 | 
146 |     cfg.freeze()
147 | 
148 | 
149 | if __name__ == '__main__':
150 |     import sys
151 |     with open(sys.argv[1], 'w') as f:
152 |         print(_C, file=f)
153 | 
154 | 


--------------------------------------------------------------------------------
/lib/config/models.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from yacs.config import CfgNode as CN
12 | 
13 | 
14 | # pose_resnet related params
15 | POSE_RESNET = CN()
16 | POSE_RESNET.NUM_LAYERS = 50
17 | POSE_RESNET.DECONV_WITH_BIAS = False
18 | POSE_RESNET.NUM_DECONV_LAYERS = 3
19 | POSE_RESNET.NUM_DECONV_FILTERS = [256, 256, 256]
20 | POSE_RESNET.NUM_DECONV_KERNELS = [4, 4, 4]
21 | POSE_RESNET.FINAL_CONV_KERNEL = 1
22 | POSE_RESNET.PRETRAINED_LAYERS = ['*']
23 | 
24 | # pose_multi_resoluton_net related params
25 | POSE_HIGH_RESOLUTION_NET = CN()
26 | POSE_HIGH_RESOLUTION_NET.PRETRAINED_LAYERS = ['*']
27 | POSE_HIGH_RESOLUTION_NET.STEM_INPLANES = 64
28 | POSE_HIGH_RESOLUTION_NET.FINAL_CONV_KERNEL = 1
29 | 
30 | POSE_HIGH_RESOLUTION_NET.STAGE2 = CN()
31 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_MODULES = 1
32 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BRANCHES = 2
33 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BLOCKS = [4, 4]
34 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_CHANNELS = [32, 64]
35 | POSE_HIGH_RESOLUTION_NET.STAGE2.BLOCK = 'BASIC'
36 | POSE_HIGH_RESOLUTION_NET.STAGE2.FUSE_METHOD = 'SUM'
37 | 
38 | POSE_HIGH_RESOLUTION_NET.STAGE3 = CN()
39 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_MODULES = 1
40 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BRANCHES = 3
41 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BLOCKS = [4, 4, 4]
42 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_CHANNELS = [32, 64, 128]
43 | POSE_HIGH_RESOLUTION_NET.STAGE3.BLOCK = 'BASIC'
44 | POSE_HIGH_RESOLUTION_NET.STAGE3.FUSE_METHOD = 'SUM'
45 | 
46 | POSE_HIGH_RESOLUTION_NET.STAGE4 = CN()
47 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_MODULES = 1
48 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BRANCHES = 4
49 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
50 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
51 | POSE_HIGH_RESOLUTION_NET.STAGE4.BLOCK = 'BASIC'
52 | POSE_HIGH_RESOLUTION_NET.STAGE4.FUSE_METHOD = 'SUM'
53 | 
54 | 
55 | MODEL_EXTRAS = {
56 |     'pose_resnet': POSE_RESNET,
57 |     'pose_high_resolution_net': POSE_HIGH_RESOLUTION_NET,
58 | }
59 | 


--------------------------------------------------------------------------------
/lib/core/evaluate.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import numpy as np
12 | 
13 | from core.inference import get_max_preds
14 | 
15 | 
16 | def calc_dists(preds, target, normalize):
17 |     preds = preds.astype(np.float32)
18 |     target = target.astype(np.float32)
19 |     dists = np.zeros((preds.shape[1], preds.shape[0]))
20 |     for n in range(preds.shape[0]):
21 |         for c in range(preds.shape[1]):
22 |             if target[n, c, 0] > 1 and target[n, c, 1] > 1:
23 |                 normed_preds = preds[n, c, :] / normalize[n]
24 |                 normed_targets = target[n, c, :] / normalize[n]
25 |                 dists[c, n] = np.linalg.norm(normed_preds - normed_targets)
26 |             else:
27 |                 dists[c, n] = -1
28 |     return dists
29 | 
30 | 
31 | def dist_acc(dists, thr=0.5):
32 |     ''' Return percentage below threshold while ignoring values with a -1 '''
33 |     dist_cal = np.not_equal(dists, -1)
34 |     num_dist_cal = dist_cal.sum()
35 |     if num_dist_cal > 0:
36 |         return np.less(dists[dist_cal], thr).sum() * 1.0 / num_dist_cal
37 |     else:
38 |         return -1
39 | 
40 | 
41 | def accuracy(output, target, hm_type='gaussian', thr=0.5):
42 |     '''
43 |     Calculate accuracy according to PCK,
44 |     but uses ground truth heatmap rather than x,y locations
45 |     First value to be returned is average accuracy across 'idxs',
46 |     followed by individual accuracies
47 |     '''
48 |     idx = list(range(output.shape[1]))
49 |     norm = 1.0
50 |     if hm_type == 'gaussian':
51 |         pred, _ = get_max_preds(output)
52 |         target, _ = get_max_preds(target)
53 |         h = output.shape[2]
54 |         w = output.shape[3]
55 |         norm = np.ones((pred.shape[0], 2)) * np.array([h, w]) / 10
56 |     dists = calc_dists(pred, target, norm)
57 | 
58 |     acc = np.zeros((len(idx) + 1))
59 |     avg_acc = 0
60 |     cnt = 0
61 | 
62 |     for i in range(len(idx)):
63 |         acc[i + 1] = dist_acc(dists[idx[i]])
64 |         if acc[i + 1] >= 0:
65 |             avg_acc = avg_acc + acc[i + 1]
66 |             cnt += 1
67 | 
68 |     avg_acc = avg_acc / cnt if cnt != 0 else 0
69 |     if cnt != 0:
70 |         acc[0] = avg_acc
71 |     return acc, avg_acc, cnt, pred
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/lib/core/function.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | # ------------------------------------------------------------------------------
  8 | # Updated by cavalleria (cavalleria@gmail.com)
  9 | # ------------------------------------------------------------------------------
 10 | 
 11 | from __future__ import absolute_import
 12 | from __future__ import division
 13 | from __future__ import print_function
 14 |  
 15 | import time
 16 | import logging
 17 | import os
 18 | 
 19 | import numpy as np
 20 | import torch
 21 | 
 22 | from core.evaluate import accuracy
 23 | from core.inference import get_final_preds, get_final_preds_using_softargmax
 24 | from utils.transforms import flip_back
 25 | from utils.vis import save_debug_images
 26 | from tqdm import tqdm
 27 | 
 28 | logger = logging.getLogger(__name__)
 29 | 
 30 | 
 31 | def train(config, train_loader, model, criterion, optimizer, epoch,
 32 |           output_dir, tb_log_dir, writer_dict):
 33 |     batch_time = AverageMeter()
 34 |     data_time = AverageMeter()
 35 |     losses = AverageMeter()
 36 |     acc = AverageMeter()
 37 | 
 38 |     # switch to train mode
 39 |     model.train()
 40 | 
 41 |     end = time.time()
 42 |     i = 0
 43 |     for (input, target, target_weight, meta) in tqdm(iter(train_loader)):
 44 |         # measure data loading time
 45 |         data_time.update(time.time() - end)
 46 |         # compute output
 47 |         outputs = model(input)
 48 |         target = target.cuda(non_blocking=True)
 49 |         target_weight = target_weight.cuda(non_blocking=True)
 50 | 
 51 |         if isinstance(outputs, list):
 52 |             loss = criterion(outputs[0], target, target_weight)
 53 |             for output in outputs[1:]:
 54 |                 loss += criterion(output, target, target_weight)
 55 |         else:
 56 |             output = outputs
 57 |             loss = criterion(output, target, target_weight)
 58 |         # loss = criterion(output, target, target_weight)
 59 | 
 60 |         # compute gradient and do update step
 61 |         optimizer.zero_grad()
 62 |         loss.backward()
 63 |         optimizer.step()
 64 | 
 65 |         # measure accuracy and record loss
 66 |         losses.update(loss.item(), input.size(0))
 67 | 
 68 |         _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(),
 69 |                                          target.detach().cpu().numpy())
 70 |         acc.update(avg_acc, cnt)
 71 | 
 72 |         # measure elapsed time
 73 |         batch_time.update(time.time() - end)
 74 |         end = time.time()
 75 | 
 76 |         if i % config.PRINT_FREQ == 0:
 77 |             msg = 'Epoch: [{0}][{1}/{2}]\t' \
 78 |                   'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \
 79 |                   'Speed {speed:.1f} samples/s\t' \
 80 |                   'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \
 81 |                   'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \
 82 |                   'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
 83 |                       epoch, i, len(train_loader), batch_time=batch_time,
 84 |                       speed=input.size(0)/batch_time.val,
 85 |                       data_time=data_time, loss=losses, acc=acc)
 86 |             logger.info(msg)
 87 | 
 88 |             writer = writer_dict['writer']
 89 |             global_steps = writer_dict['train_global_steps']
 90 |             writer.add_scalar('train_loss', losses.val, global_steps)
 91 |             writer.add_scalar('train_acc', acc.val, global_steps)
 92 |             writer_dict['train_global_steps'] = global_steps + 1
 93 | 
 94 |             prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i)
 95 |             save_debug_images(config, input, meta, target, pred*4, output, prefix)
 96 |         i += 1
 97 | 
 98 | def validate(args, config, val_loader, val_dataset, model, criterion, output_dir,
 99 |              tb_log_dir, writer_dict=None):
100 |     batch_time = AverageMeter()
101 |     losses = AverageMeter()
102 |     acc = AverageMeter()
103 | 
104 |     # switch to evaluate mode
105 |     model.eval()
106 | 
107 |     num_samples = len(val_dataset)
108 |     all_preds = np.zeros(
109 |         (num_samples, config.MODEL.NUM_JOINTS, 3),
110 |         dtype=np.float32
111 |     )
112 |     all_boxes = np.zeros((num_samples, 6))
113 |     image_path = []
114 |     filenames = []
115 |     imgnums = []
116 |     idx = 0
117 |     with torch.no_grad():
118 |         end = time.time()
119 |         for i, (input, target, target_weight, meta) in enumerate(val_loader):
120 |             # compute output
121 |             outputs = model(input)
122 |             if isinstance(outputs, list):
123 |                 output = outputs[-1]
124 |             else:
125 |                 output = outputs
126 | 
127 |             if config.TEST.FLIP_TEST:
128 |                 input_flipped = input.flip(3)
129 |                 outputs_flipped = model(input_flipped)
130 | 
131 |                 if isinstance(outputs_flipped, list):
132 |                     output_flipped = outputs_flipped[-1]
133 |                 else:
134 |                     output_flipped = outputs_flipped
135 | 
136 |                 output_flipped = flip_back(output_flipped.cpu().numpy(),
137 |                                            val_dataset.flip_pairs)
138 |                 output_flipped = torch.from_numpy(output_flipped.copy()).cuda()
139 | 
140 | 
141 |                 # feature is not aligned, shift flipped heatmap for higher accuracy
142 |                 if config.TEST.SHIFT_HEATMAP:
143 |                     output_flipped[:, :, :, 1:] = \
144 |                         output_flipped.clone()[:, :, :, 0:-1]
145 | 
146 |                 output = (output + output_flipped) * 0.5
147 | 
148 |             target = target.cuda(non_blocking=True)
149 |             target_weight = target_weight.cuda(non_blocking=True)
150 | 
151 |             loss = criterion(output, target, target_weight)
152 | 
153 |             num_images = input.size(0)
154 |             # measure accuracy and record loss
155 |             losses.update(loss.item(), num_images)
156 |             _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(),
157 |                                              target.cpu().numpy())
158 | 
159 |             acc.update(avg_acc, cnt)
160 | 
161 |             # measure elapsed time
162 |             batch_time.update(time.time() - end)
163 |             end = time.time()
164 | 
165 |             c = meta['center'].numpy()
166 |             s = meta['scale'].numpy()
167 |             score = meta['score'].numpy()
168 | 
169 |             #preds, maxvals = get_final_preds(config, output.clone().cpu().numpy(), c, s)
170 |             
171 |             if config.TEST.SOFT_ARGMAX:
172 |                 preds, maxvals = get_final_preds_using_softargmax(config, output.clone(), c, s)
173 |             else:
174 |                 preds, maxvals = get_final_preds(config, output.clone().cpu().numpy(), c, s)
175 | 
176 |             preds = preds - config.TEST.BIAS
177 | 
178 | 
179 |             all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2]
180 |             all_preds[idx:idx + num_images, :, 2:3] = maxvals
181 |             # double check this all_boxes parts
182 |             all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2]
183 |             all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2]
184 |             all_boxes[idx:idx + num_images, 4] = np.prod(s*200, 1)
185 |             all_boxes[idx:idx + num_images, 5] = score
186 |             image_path.extend(meta['image'])
187 | 
188 |             idx += num_images
189 | 
190 |             if i % config.PRINT_FREQ == 0:
191 |                 msg = 'Test: [{0}/{1}]\t' \
192 |                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \
193 |                       'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \
194 |                       'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
195 |                           i, len(val_loader), batch_time=batch_time,
196 |                           loss=losses, acc=acc)
197 |                 logger.info(msg)
198 | 
199 |                 prefix = '{}_{}'.format(
200 |                     os.path.join(output_dir, 'val'), i
201 |                 )
202 |                 save_debug_images(config, input, meta, target, pred*4, output, prefix)
203 |         
204 |         name_values, perf_indicator = val_dataset.evaluate(
205 |             args, config, all_preds, output_dir, all_boxes, image_path, filenames, imgnums)
206 | 
207 |         
208 |         model_name = config.MODEL.NAME
209 |         if isinstance(name_values, list):
210 |             for name_value in name_values:
211 |                 _print_name_value(name_value, model_name)
212 |         else:
213 |             _print_name_value(name_values, model_name)
214 | 
215 |         if writer_dict:
216 |             writer = writer_dict['writer']
217 |             global_steps = writer_dict['valid_global_steps']
218 |             writer.add_scalar(
219 |                 'valid_loss',
220 |                 losses.avg,
221 |                 global_steps
222 |             )
223 |             writer.add_scalar(
224 |                 'valid_acc',
225 |                 acc.avg,
226 |                 global_steps
227 |             )
228 |             if isinstance(name_values, list):
229 |                 for name_value in name_values:
230 |                     writer.add_scalars(
231 |                         'valid',
232 |                         dict(name_value),
233 |                         global_steps
234 |                     )
235 |             else:
236 |                 writer.add_scalars(
237 |                     'valid',
238 |                     dict(name_values),
239 |                     global_steps
240 |                 )
241 |             writer_dict['valid_global_steps'] = global_steps + 1
242 | 
243 |     return perf_indicator
244 | 
245 | 
246 | # markdown format output
247 | def _print_name_value(name_value, full_arch_name):
248 |     names = name_value.keys()
249 |     values = name_value.values()
250 |     num_values = len(name_value)
251 |     logger.info(
252 |         '| Arch ' +
253 |         ' '.join(['| {}'.format(name) for name in names]) +
254 |         ' |'
255 |     )
256 |     logger.info('|---' * (num_values+1) + '|')
257 | 
258 |     if len(full_arch_name) > 15:
259 |         full_arch_name = full_arch_name[:8] + '...'
260 |     logger.info(
261 |         '| ' + full_arch_name + ' ' +
262 |         ' '.join(['| {:.3f}'.format(value) for value in values]) +
263 |          ' |'
264 |     )
265 | 
266 | 
267 | class AverageMeter(object):
268 |     """Computes and stores the average and current value"""
269 |     def __init__(self):
270 |         self.reset()
271 | 
272 |     def reset(self):
273 |         self.val = 0
274 |         self.avg = 0
275 |         self.sum = 0
276 |         self.count = 0
277 | 
278 |     def update(self, val, n=1):
279 |         self.val = val
280 |         self.sum += val * n
281 |         self.count += n
282 |         self.avg = self.sum / self.count if self.count != 0 else 0
283 | 


--------------------------------------------------------------------------------
/lib/core/inference.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | # ------------------------------------------------------------------------------
  8 | # Updated by cavalleria (cavalleria@gmail.com)
  9 | # ------------------------------------------------------------------------------
 10 | 
 11 | from __future__ import absolute_import
 12 | from __future__ import division
 13 | from __future__ import print_function
 14 | 
 15 | import math
 16 | 
 17 | import numpy as np
 18 | import torch
 19 | import torch.nn as nn
 20 | from utils.transforms import transform_preds
 21 | 
 22 | 
 23 | def get_max_preds(batch_heatmaps):
 24 |     '''
 25 |     get predictions from score maps
 26 |     heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
 27 |     '''
 28 |     assert isinstance(batch_heatmaps, np.ndarray), \
 29 |         'batch_heatmaps should be numpy.ndarray'
 30 |     assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim'
 31 | 
 32 |     batch_size = batch_heatmaps.shape[0]
 33 |     num_joints = batch_heatmaps.shape[1]
 34 |     width = batch_heatmaps.shape[3]
 35 |     heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
 36 |     idx = np.argmax(heatmaps_reshaped, 2)
 37 |     maxvals = np.amax(heatmaps_reshaped, 2)
 38 | 
 39 |     maxvals = maxvals.reshape((batch_size, num_joints, 1))
 40 |     idx = idx.reshape((batch_size, num_joints, 1))
 41 | 
 42 |     preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
 43 | 
 44 |     preds[:, :, 0] = (preds[:, :, 0]) % width
 45 |     preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
 46 | 
 47 |     pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
 48 |     pred_mask = pred_mask.astype(np.float32)
 49 | 
 50 |     preds *= pred_mask
 51 |     return preds, maxvals
 52 | 
 53 | 
 54 | def get_final_preds(config, batch_heatmaps, center, scale):
 55 |     coords, maxvals = get_max_preds(batch_heatmaps)
 56 | 
 57 |     heatmap_height = batch_heatmaps.shape[2]
 58 |     heatmap_width = batch_heatmaps.shape[3]
 59 | 
 60 |     # post-processing
 61 |     if config.TEST.POST_PROCESS:
 62 |         for n in range(coords.shape[0]):
 63 |             for p in range(coords.shape[1]):
 64 |                 hm = batch_heatmaps[n][p]
 65 |                 px = int(math.floor(coords[n][p][0] + 0.5))
 66 |                 py = int(math.floor(coords[n][p][1] + 0.5))
 67 |                 if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1:
 68 |                     diff = np.array(
 69 |                         [
 70 |                             hm[py][px + 1] - hm[py][px - 1],
 71 |                             hm[py + 1][px] - hm[py - 1][px]
 72 |                         ]
 73 |                     )
 74 |                     coords[n][p] += np.sign(diff) * .25
 75 | 
 76 |     preds = coords.copy()
 77 | 
 78 |     # Transform back
 79 |     for i in range(coords.shape[0]):
 80 |         preds[i] = transform_preds(
 81 |             coords[i], center[i], scale[i], [heatmap_width, heatmap_height]
 82 |         )
 83 | 
 84 |     return preds, maxvals
 85 | 
 86 | class SoftArgmax2D(nn.Module):
 87 |     def __init__(self, height=64, width=48, beta=100):
 88 |         super(SoftArgmax2D, self).__init__()
 89 |         self.softmax = nn.Softmax(dim=-1)
 90 |         self.beta = beta
 91 |         # Note that meshgrid in pytorch behaves differently with numpy.
 92 |         self.WY, self.WX = torch.meshgrid(torch.arange(height, dtype=torch.float),
 93 |                                           torch.arange(width, dtype=torch.float))
 94 | 
 95 |     def forward(self, x):
 96 |         b, c, h, w = x.shape
 97 |         device = x.device
 98 | 
 99 |         probs = self.softmax(x.view(b, c, -1) * self.beta)
100 |         probs = probs.view(b, c, h, w)
101 | 
102 |         self.WY = self.WY.to(device)
103 |         self.WX = self.WX.to(device)
104 | 
105 |         px = torch.sum(probs * self.WX, dim=(2, 3))
106 |         py = torch.sum(probs * self.WY, dim=(2, 3))
107 |         preds = torch.stack((px, py), dim=-1).cpu().numpy()
108 | 
109 |         idx = np.round(preds).astype(np.int32)
110 |         maxvals = np.zeros(shape=(b, c, 1))
111 |         for bi in range(b):
112 |             for ci in range(c):
113 |                 maxvals[bi, ci, 0] = x[bi, ci, idx[bi, ci, 1], idx[bi, ci, 0]]
114 | 
115 |         return preds, maxvals
116 | 
117 | def get_final_preds_using_softargmax(config, batch_heatmaps, center, scale):
118 |     soft_argmax = SoftArgmax2D(config.MODEL.HEATMAP_SIZE[1], config.MODEL.HEATMAP_SIZE[0], beta=160)
119 |     coords, maxvals = soft_argmax(batch_heatmaps)
120 | 
121 |     heatmap_height = batch_heatmaps.shape[2]
122 |     heatmap_width = batch_heatmaps.shape[3]
123 | 
124 |     batch_heatmaps = batch_heatmaps.cpu().numpy()
125 | 
126 |     # post-processing
127 |     if config.TEST.POST_PROCESS:
128 |         for n in range(coords.shape[0]):
129 |             for p in range(coords.shape[1]):
130 |                 hm = batch_heatmaps[n][p]
131 |                 px = int(math.floor(coords[n][p][0] + 0.5))
132 |                 py = int(math.floor(coords[n][p][1] + 0.5))
133 |                 if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1:
134 |                     diff = np.array(
135 |                         [
136 |                             hm[py][px + 1] - hm[py][px - 1],
137 |                             hm[py + 1][px] - hm[py - 1][px]
138 |                         ]
139 |                     )
140 |                     coords[n][p] += np.sign(diff) * .25
141 | 
142 |     preds = coords.copy()
143 | 
144 |     # Transform back
145 |     for i in range(coords.shape[0]):
146 |         preds[i] = transform_preds(
147 |             coords[i], center[i], scale[i], [heatmap_width, heatmap_height]
148 |         )
149 | 
150 |     return preds, maxvals


--------------------------------------------------------------------------------
/lib/core/loss.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import torch
12 | import torch.nn as nn
13 | 
14 | 
15 | class JointsMSELoss(nn.Module):
16 |     def __init__(self, use_target_weight):
17 |         super(JointsMSELoss, self).__init__()
18 |         self.criterion = nn.MSELoss(reduction='mean')
19 |         self.use_target_weight = use_target_weight
20 | 
21 |     def forward(self, output, target, target_weight):
22 |         batch_size = output.size(0)
23 |         num_joints = output.size(1)
24 |         heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1)
25 |         heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1)
26 |         loss = 0
27 | 
28 |         for idx in range(num_joints):
29 |             heatmap_pred = heatmaps_pred[idx].squeeze()
30 |             heatmap_gt = heatmaps_gt[idx].squeeze()
31 |             if self.use_target_weight:
32 |                 loss += 0.5 * self.criterion(
33 |                     heatmap_pred.mul(target_weight[:, idx]),
34 |                     heatmap_gt.mul(target_weight[:, idx])
35 |                 )
36 |             else:
37 |                 loss += 0.5 * self.criterion(heatmap_pred, heatmap_gt)
38 | 
39 |         return loss / num_joints
40 | 
41 | 
42 | class JointsOHKMMSELoss(nn.Module):
43 |     def __init__(self, use_target_weight, topk=8):
44 |         super(JointsOHKMMSELoss, self).__init__()
45 |         self.criterion = nn.MSELoss(reduction='none')
46 |         self.use_target_weight = use_target_weight
47 |         self.topk = topk
48 | 
49 |     def ohkm(self, loss):
50 |         ohkm_loss = 0.
51 |         for i in range(loss.size()[0]):
52 |             sub_loss = loss[i]
53 |             topk_val, topk_idx = torch.topk(
54 |                 sub_loss, k=self.topk, dim=0, sorted=False
55 |             )
56 |             tmp_loss = torch.gather(sub_loss, 0, topk_idx)
57 |             ohkm_loss += torch.sum(tmp_loss) / self.topk
58 |         ohkm_loss /= loss.size()[0]
59 |         return ohkm_loss
60 | 
61 |     def forward(self, output, target, target_weight):
62 |         batch_size = output.size(0)
63 |         num_joints = output.size(1)
64 |         heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1)
65 |         heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1)
66 | 
67 |         loss = []
68 |         for idx in range(num_joints):
69 |             heatmap_pred = heatmaps_pred[idx].squeeze()
70 |             heatmap_gt = heatmaps_gt[idx].squeeze()
71 |             if self.use_target_weight:
72 |                 loss.append(0.5 * self.criterion(
73 |                     heatmap_pred.mul(target_weight[:, idx]),
74 |                     heatmap_gt.mul(target_weight[:, idx])
75 |                 ))
76 |             else:
77 |                 loss.append(
78 |                     0.5 * self.criterion(heatmap_pred, heatmap_gt)
79 |                 )
80 | 
81 |         loss = [l.mean(dim=1).unsqueeze(dim=1) for l in loss]
82 |         loss = torch.cat(loss, dim=1)
83 | 
84 |         return self.ohkm(loss)
85 | 


--------------------------------------------------------------------------------
/lib/dataset/JointsDataset.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import copy
 12 | import logging
 13 | import random
 14 | 
 15 | import cv2
 16 | import numpy as np
 17 | import torch
 18 | from torch.utils.data import Dataset
 19 | 
 20 | from utils.transforms import get_affine_transform
 21 | from utils.transforms import affine_transform
 22 | from utils.transforms import fliplr_joints
 23 | 
 24 | 
 25 | logger = logging.getLogger(__name__)
 26 | 
 27 | 
 28 | class JointsDataset(Dataset):
 29 |     def __init__(self, cfg, root, image_set, is_train, transform=None):
 30 |         self.num_joints = 0
 31 |         self.pixel_std = 200
 32 |         self.flip_pairs = []
 33 |         self.parent_ids = []
 34 | 
 35 |         self.is_train = is_train
 36 |         self.root = root
 37 |         self.image_set = image_set
 38 | 
 39 |         self.output_path = cfg.OUTPUT_DIR
 40 |         self.data_format = cfg.DATASET.DATA_FORMAT
 41 | 
 42 |         self.scale_factor = cfg.DATASET.SCALE_FACTOR
 43 |         self.rotation_factor = cfg.DATASET.ROT_FACTOR
 44 |         self.flip = cfg.DATASET.FLIP
 45 |         self.num_joints_half_body = cfg.DATASET.NUM_JOINTS_HALF_BODY
 46 |         self.prob_half_body = cfg.DATASET.PROB_HALF_BODY
 47 |         self.color_rgb = cfg.DATASET.COLOR_RGB
 48 | 
 49 |         self.target_type = cfg.MODEL.TARGET_TYPE
 50 |         self.image_size = np.array(cfg.MODEL.IMAGE_SIZE)
 51 |         self.heatmap_size = np.array(cfg.MODEL.HEATMAP_SIZE)
 52 |         self.sigma = cfg.MODEL.SIGMA
 53 |         self.use_different_joints_weight = cfg.LOSS.USE_DIFFERENT_JOINTS_WEIGHT
 54 |         self.joints_weight = 1
 55 | 
 56 |         self.transform = transform
 57 |         self.db = []
 58 | 
 59 |     def _get_db(self):
 60 |         raise NotImplementedError
 61 | 
 62 |     def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
 63 |         raise NotImplementedError
 64 | 
 65 |     def half_body_transform(self, joints, joints_vis):
 66 |         upper_joints = []
 67 |         lower_joints = []
 68 |         for joint_id in range(self.num_joints):
 69 |             if joints_vis[joint_id][0] > 0:
 70 |                 if joint_id in self.upper_body_ids:
 71 |                     upper_joints.append(joints[joint_id])
 72 |                 else:
 73 |                     lower_joints.append(joints[joint_id])
 74 | 
 75 |         if np.random.randn() < 0.5 and len(upper_joints) > 2:
 76 |             selected_joints = upper_joints
 77 |         else:
 78 |             selected_joints = lower_joints \
 79 |                 if len(lower_joints) > 2 else upper_joints
 80 | 
 81 |         if len(selected_joints) < 2:
 82 |             return None, None
 83 | 
 84 |         selected_joints = np.array(selected_joints, dtype=np.float32)
 85 |         center = selected_joints.mean(axis=0)[:2]
 86 | 
 87 |         left_top = np.amin(selected_joints, axis=0)
 88 |         right_bottom = np.amax(selected_joints, axis=0)
 89 | 
 90 |         w = right_bottom[0] - left_top[0]
 91 |         h = right_bottom[1] - left_top[1]
 92 | 
 93 |         if w > self.aspect_ratio * h:
 94 |             h = w * 1.0 / self.aspect_ratio
 95 |         elif w < self.aspect_ratio * h:
 96 |             w = h * self.aspect_ratio
 97 | 
 98 |         scale = np.array(
 99 |             [
100 |                 w * 1.0 / self.pixel_std,
101 |                 h * 1.0 / self.pixel_std
102 |             ],
103 |             dtype=np.float32
104 |         )
105 | 
106 |         scale = scale * 1.5
107 | 
108 |         return center, scale
109 | 
110 |     def __len__(self,):
111 |         return len(self.db)
112 | 
113 |     def __getitem__(self, idx):
114 |         db_rec = copy.deepcopy(self.db[idx])
115 | 
116 |         image_file = db_rec['image']
117 |         filename = db_rec['filename'] if 'filename' in db_rec else ''
118 |         imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''
119 | 
120 |         if self.data_format == 'zip':
121 |             from utils import zipreader
122 |             data_numpy = zipreader.imread(
123 |                 image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION
124 |             )
125 |         else:
126 |             data_numpy = cv2.imread(
127 |                 image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION
128 |             )
129 | 
130 |         if self.color_rgb:
131 |             data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)
132 | 
133 |         if data_numpy is None:
134 |             logger.error('=> fail to read {}'.format(image_file))
135 |             raise ValueError('Fail to read {}'.format(image_file))
136 | 
137 |         joints = db_rec['joints_3d']
138 |         joints_vis = db_rec['joints_3d_vis']
139 | 
140 |         c = db_rec['center']
141 |         s = db_rec['scale']
142 |         score = db_rec['score'] if 'score' in db_rec else 1
143 |         r = 0
144 | 
145 |         if self.is_train:
146 |             if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
147 |                 and np.random.rand() < self.prob_half_body):
148 |                 c_half_body, s_half_body = self.half_body_transform(
149 |                     joints, joints_vis
150 |                 )
151 | 
152 |                 if c_half_body is not None and s_half_body is not None:
153 |                     c, s = c_half_body, s_half_body
154 | 
155 |             sf = self.scale_factor
156 |             rf = self.rotation_factor
157 |             s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
158 |             r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
159 |                 if random.random() <= 0.6 else 0
160 | 
161 |             if self.flip and random.random() <= 0.5:
162 |                 data_numpy = data_numpy[:, ::-1, :]
163 |                 joints, joints_vis = fliplr_joints(
164 |                     joints, joints_vis, data_numpy.shape[1], self.flip_pairs)
165 |                 c[0] = data_numpy.shape[1] - c[0] - 1
166 | 
167 |         trans = get_affine_transform(c, s, r, self.image_size)
168 |         input = cv2.warpAffine(
169 |             data_numpy,
170 |             trans,
171 |             (int(self.image_size[0]), int(self.image_size[1])),
172 |             flags=cv2.INTER_LINEAR)
173 | 
174 |         if self.transform:
175 |             input = self.transform(input)
176 | 
177 |         for i in range(self.num_joints):
178 |             if joints_vis[i, 0] > 0.0:
179 |                 joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
180 | 
181 |         target, target_weight = self.generate_target(joints, joints_vis)
182 | 
183 |         target = torch.from_numpy(target)
184 |         target_weight = torch.from_numpy(target_weight)
185 | 
186 |         meta = {
187 |             'image': image_file,
188 |             'filename': filename,
189 |             'imgnum': imgnum,
190 |             'joints': joints,
191 |             'joints_vis': joints_vis,
192 |             'center': c,
193 |             'scale': s,
194 |             'rotation': r,
195 |             'score': score
196 |         }
197 | 
198 |         return input, target, target_weight, meta
199 | 
200 |     def select_data(self, db):
201 |         db_selected = []
202 |         for rec in db:
203 |             num_vis = 0
204 |             joints_x = 0.0
205 |             joints_y = 0.0
206 |             for joint, joint_vis in zip(
207 |                     rec['joints_3d'], rec['joints_3d_vis']):
208 |                 if joint_vis[0] <= 0:
209 |                     continue
210 |                 num_vis += 1
211 | 
212 |                 joints_x += joint[0]
213 |                 joints_y += joint[1]
214 |             if num_vis == 0:
215 |                 continue
216 | 
217 |             joints_x, joints_y = joints_x / num_vis, joints_y / num_vis
218 | 
219 |             area = rec['scale'][0] * rec['scale'][1] * (self.pixel_std**2)
220 |             joints_center = np.array([joints_x, joints_y])
221 |             bbox_center = np.array(rec['center'])
222 |             diff_norm2 = np.linalg.norm((joints_center-bbox_center), 2)
223 |             ks = np.exp(-1.0*(diff_norm2**2) / ((0.2)**2*2.0*area))
224 | 
225 |             metric = (0.2 / 16) * num_vis + 0.45 - 0.2 / 16
226 |             if ks > metric:
227 |                 db_selected.append(rec)
228 | 
229 |         logger.info('=> num db: {}'.format(len(db)))
230 |         logger.info('=> num selected db: {}'.format(len(db_selected)))
231 |         return db_selected
232 | 
233 |     def generate_target(self, joints, joints_vis):
234 |         '''
235 |         :param joints:  [num_joints, 3]
236 |         :param joints_vis: [num_joints, 3]
237 |         :return: target, target_weight(1: visible, 0: invisible)
238 |         '''
239 |         target_weight = np.ones((self.num_joints, 1), dtype=np.float32)
240 |         target_weight[:, 0] = joints_vis[:, 0]
241 | 
242 |         assert self.target_type == 'gaussian', \
243 |             'Only support gaussian map now!'
244 | 
245 |         if self.target_type == 'gaussian':
246 |             target = np.zeros((self.num_joints,
247 |                                self.heatmap_size[1],
248 |                                self.heatmap_size[0]),
249 |                               dtype=np.float32)
250 | 
251 |             tmp_size = self.sigma * 3
252 | 
253 |             for joint_id in range(self.num_joints):
254 |                 feat_stride = self.image_size / self.heatmap_size
255 |                 mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
256 |                 mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
257 |                 # Check that any part of the gaussian is in-bounds
258 |                 ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
259 |                 br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
260 |                 if ul[0] >= self.heatmap_size[0] or ul[1] >= self.heatmap_size[1] \
261 |                         or br[0] < 0 or br[1] < 0:
262 |                     # If not, just return the image as is
263 |                     target_weight[joint_id] = 0
264 |                     continue
265 | 
266 |                 # # Generate gaussian
267 |                 size = 2 * tmp_size + 1
268 |                 x = np.arange(0, size, 1, np.float32)
269 |                 y = x[:, np.newaxis]
270 |                 x0 = y0 = size // 2
271 |                 # The gaussian is not normalized, we want the center value to equal 1
272 |                 g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * self.sigma ** 2))
273 | 
274 |                 # Usable gaussian range
275 |                 g_x = max(0, -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0]
276 |                 g_y = max(0, -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1]
277 |                 # Image range
278 |                 img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0])
279 |                 img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1])
280 | 
281 |                 v = target_weight[joint_id]
282 |                 if v > 0.5:
283 |                     target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
284 |                         g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
285 | 
286 |         if self.use_different_joints_weight:
287 |             target_weight = np.multiply(target_weight, self.joints_weight)
288 | 
289 |         return target, target_weight
290 | 


--------------------------------------------------------------------------------
/lib/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from .mpii import MPIIDataset as mpii
12 | from .coco import COCODataset as coco
13 | 
14 | 


--------------------------------------------------------------------------------
/lib/dataset/mpii.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import logging
 12 | import os
 13 | import json_tricks as json
 14 | from collections import OrderedDict
 15 | 
 16 | import numpy as np
 17 | from scipy.io import loadmat, savemat
 18 | 
 19 | from dataset.JointsDataset import JointsDataset
 20 | 
 21 | 
 22 | logger = logging.getLogger(__name__)
 23 | 
 24 | 
 25 | class MPIIDataset(JointsDataset):
 26 |     def __init__(self, cfg, root, image_set, is_train, transform=None):
 27 |         super().__init__(cfg, root, image_set, is_train, transform)
 28 | 
 29 |         self.num_joints = 16
 30 |         self.flip_pairs = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]]
 31 |         self.parent_ids = [1, 2, 6, 6, 3, 4, 6, 6, 7, 8, 11, 12, 7, 7, 13, 14]
 32 | 
 33 |         self.upper_body_ids = (7, 8, 9, 10, 11, 12, 13, 14, 15)
 34 |         self.lower_body_ids = (0, 1, 2, 3, 4, 5, 6)
 35 | 
 36 |         self.db = self._get_db()
 37 | 
 38 |         if is_train and cfg.DATASET.SELECT_DATA:
 39 |             self.db = self.select_data(self.db)
 40 | 
 41 |         logger.info('=> load {} samples'.format(len(self.db)))
 42 | 
 43 |     def _get_db(self):
 44 |         # create train/val split
 45 |         file_name = os.path.join(
 46 |             self.root, 'annot', self.image_set+'.json'
 47 |         )
 48 |         with open(file_name) as anno_file:
 49 |             anno = json.load(anno_file)
 50 | 
 51 |         gt_db = []
 52 |         for a in anno:
 53 |             image_name = a['image']
 54 | 
 55 |             c = np.array(a['center'], dtype=np.float)
 56 |             s = np.array([a['scale'], a['scale']], dtype=np.float)
 57 | 
 58 |             # Adjust center/scale slightly to avoid cropping limbs
 59 |             if c[0] != -1:
 60 |                 c[1] = c[1] + 15 * s[1]
 61 |                 s = s * 1.25
 62 | 
 63 |             # MPII uses matlab format, index is based 1,
 64 |             # we should first convert to 0-based index
 65 |             c = c - 1
 66 | 
 67 |             joints_3d = np.zeros((self.num_joints, 3), dtype=np.float)
 68 |             joints_3d_vis = np.zeros((self.num_joints,  3), dtype=np.float)
 69 |             if self.image_set != 'test':
 70 |                 joints = np.array(a['joints'])
 71 |                 joints[:, 0:2] = joints[:, 0:2] - 1
 72 |                 joints_vis = np.array(a['joints_vis'])
 73 |                 assert len(joints) == self.num_joints, \
 74 |                     'joint num diff: {} vs {}'.format(len(joints),
 75 |                                                       self.num_joints)
 76 | 
 77 |                 joints_3d[:, 0:2] = joints[:, 0:2]
 78 |                 joints_3d_vis[:, 0] = joints_vis[:]
 79 |                 joints_3d_vis[:, 1] = joints_vis[:]
 80 | 
 81 |             image_dir = 'images.zip@' if self.data_format == 'zip' else 'images'
 82 |             gt_db.append(
 83 |                 {
 84 |                     'image': os.path.join(self.root, image_dir, image_name),
 85 |                     'center': c,
 86 |                     'scale': s,
 87 |                     'joints_3d': joints_3d,
 88 |                     'joints_3d_vis': joints_3d_vis,
 89 |                     'filename': '',
 90 |                     'imgnum': 0,
 91 |                 }
 92 |             )
 93 | 
 94 |         return gt_db
 95 | 
 96 |     def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
 97 |         # convert 0-based index to 1-based index
 98 |         preds = preds[:, :, 0:2] + 1.0
 99 | 
100 |         if output_dir:
101 |             pred_file = os.path.join(output_dir, 'pred.mat')
102 |             savemat(pred_file, mdict={'preds': preds})
103 | 
104 |         if 'test' in cfg.DATASET.TEST_SET:
105 |             return {'Null': 0.0}, 0.0
106 | 
107 |         SC_BIAS = 0.6
108 |         threshold = 0.5
109 | 
110 |         gt_file = os.path.join(cfg.DATASET.ROOT,
111 |                                'annot',
112 |                                'gt_{}.mat'.format(cfg.DATASET.TEST_SET))
113 |         gt_dict = loadmat(gt_file)
114 |         dataset_joints = gt_dict['dataset_joints']
115 |         jnt_missing = gt_dict['jnt_missing']
116 |         pos_gt_src = gt_dict['pos_gt_src']
117 |         headboxes_src = gt_dict['headboxes_src']
118 | 
119 |         pos_pred_src = np.transpose(preds, [1, 2, 0])
120 | 
121 |         head = np.where(dataset_joints == 'head')[1][0]
122 |         lsho = np.where(dataset_joints == 'lsho')[1][0]
123 |         lelb = np.where(dataset_joints == 'lelb')[1][0]
124 |         lwri = np.where(dataset_joints == 'lwri')[1][0]
125 |         lhip = np.where(dataset_joints == 'lhip')[1][0]
126 |         lkne = np.where(dataset_joints == 'lkne')[1][0]
127 |         lank = np.where(dataset_joints == 'lank')[1][0]
128 | 
129 |         rsho = np.where(dataset_joints == 'rsho')[1][0]
130 |         relb = np.where(dataset_joints == 'relb')[1][0]
131 |         rwri = np.where(dataset_joints == 'rwri')[1][0]
132 |         rkne = np.where(dataset_joints == 'rkne')[1][0]
133 |         rank = np.where(dataset_joints == 'rank')[1][0]
134 |         rhip = np.where(dataset_joints == 'rhip')[1][0]
135 | 
136 |         jnt_visible = 1 - jnt_missing
137 |         uv_error = pos_pred_src - pos_gt_src
138 |         uv_err = np.linalg.norm(uv_error, axis=1)
139 |         headsizes = headboxes_src[1, :, :] - headboxes_src[0, :, :]
140 |         headsizes = np.linalg.norm(headsizes, axis=0)
141 |         headsizes *= SC_BIAS
142 |         scale = np.multiply(headsizes, np.ones((len(uv_err), 1)))
143 |         scaled_uv_err = np.divide(uv_err, scale)
144 |         scaled_uv_err = np.multiply(scaled_uv_err, jnt_visible)
145 |         jnt_count = np.sum(jnt_visible, axis=1)
146 |         less_than_threshold = np.multiply((scaled_uv_err <= threshold),
147 |                                           jnt_visible)
148 |         PCKh = np.divide(100.*np.sum(less_than_threshold, axis=1), jnt_count)
149 | 
150 |         # save
151 |         rng = np.arange(0, 0.5+0.01, 0.01)
152 |         pckAll = np.zeros((len(rng), 16))
153 | 
154 |         for r in range(len(rng)):
155 |             threshold = rng[r]
156 |             less_than_threshold = np.multiply(scaled_uv_err <= threshold,
157 |                                               jnt_visible)
158 |             pckAll[r, :] = np.divide(100.*np.sum(less_than_threshold, axis=1),
159 |                                      jnt_count)
160 | 
161 |         PCKh = np.ma.array(PCKh, mask=False)
162 |         PCKh.mask[6:8] = True
163 | 
164 |         jnt_count = np.ma.array(jnt_count, mask=False)
165 |         jnt_count.mask[6:8] = True
166 |         jnt_ratio = jnt_count / np.sum(jnt_count).astype(np.float64)
167 | 
168 |         name_value = [
169 |             ('Head', PCKh[head]),
170 |             ('Shoulder', 0.5 * (PCKh[lsho] + PCKh[rsho])),
171 |             ('Elbow', 0.5 * (PCKh[lelb] + PCKh[relb])),
172 |             ('Wrist', 0.5 * (PCKh[lwri] + PCKh[rwri])),
173 |             ('Hip', 0.5 * (PCKh[lhip] + PCKh[rhip])),
174 |             ('Knee', 0.5 * (PCKh[lkne] + PCKh[rkne])),
175 |             ('Ankle', 0.5 * (PCKh[lank] + PCKh[rank])),
176 |             ('Mean', np.sum(PCKh * jnt_ratio)),
177 |             ('Mean@0.1', np.sum(pckAll[11, :] * jnt_ratio))
178 |         ]
179 |         name_value = OrderedDict(name_value)
180 | 
181 |         return name_value, name_value['Mean']
182 | 


--------------------------------------------------------------------------------
/lib/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from __future__ import absolute_import
12 | from __future__ import division
13 | from __future__ import print_function
14 | 
15 | import models.pose_resnet
16 | import models.pose_hrnet
17 | import models.lpn
18 | 


--------------------------------------------------------------------------------
/lib/models/lpn.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | import math
  4 | import torch
  5 | import torch.nn as nn
  6 | from .lightweight_modules import LW_Bottleneck, LW_BasicBlock, MV2_BasicBlock
  7 | 
  8 | BN_MOMENTUM = 0.1
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | 
 12 | class LPN(nn.Module):
 13 | 
 14 |     def __init__(self, block, layers, cfg, **kwargs):
 15 |         super(LPN, self).__init__()
 16 |         extra = cfg.MODEL.EXTRA
 17 | 
 18 |         self.inplanes = 64
 19 |         self.deconv_with_bias = extra.DECONV_WITH_BIAS
 20 |         self.attention = extra.get('ATTENTION')
 21 | 
 22 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
 23 |         self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
 24 |         self.relu = nn.ReLU(inplace=True)
 25 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 26 | 
 27 |         self.layer1 = self._make_layer(block, 64, layers[0])
 28 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
 29 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
 30 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=1)
 31 | 
 32 |         # used for deconv layers
 33 |         self.deconv_layers = self._make_deconv_layer(
 34 |             extra.NUM_DECONV_LAYERS,
 35 |             extra.NUM_DECONV_FILTERS,
 36 |             extra.NUM_DECONV_KERNELS,
 37 |         )
 38 | 
 39 |         self.final_layer = nn.Conv2d(
 40 |             in_channels=extra.NUM_DECONV_FILTERS[-1],
 41 |             out_channels=cfg.MODEL.NUM_JOINTS,
 42 |             kernel_size=extra.FINAL_CONV_KERNEL,
 43 |             stride=1,
 44 |             padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0
 45 |         )
 46 | 
 47 |     def _make_layer(self, block, planes, blocks, stride=1):
 48 |         downsample = None
 49 |         if stride != 1 or self.inplanes != planes * block.expansion:
 50 |             downsample = nn.Sequential(
 51 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
 52 |                           kernel_size=1, stride=stride, bias=False),
 53 |                 nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
 54 |             )
 55 | 
 56 |         layers = []
 57 |         layers.append(block(self.inplanes, planes, stride, downsample, self.attention))
 58 |         self.inplanes = planes * block.expansion
 59 |         for i in range(1, blocks):
 60 |             layers.append(block(self.inplanes, planes, attention=self.attention))
 61 | 
 62 |         return nn.Sequential(*layers)
 63 | 
 64 |     def _get_deconv_cfg(self, deconv_kernel, index):
 65 |         if deconv_kernel == 4:
 66 |             padding = 1
 67 |             output_padding = 0
 68 |         elif deconv_kernel == 3:
 69 |             padding = 1
 70 |             output_padding = 1
 71 |         elif deconv_kernel == 2:
 72 |             padding = 0
 73 |             output_padding = 0
 74 | 
 75 |         return deconv_kernel, padding, output_padding
 76 | 
 77 |     def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
 78 |         layers = []
 79 |         for i in range(num_layers):
 80 |             kernel, padding, output_padding = \
 81 |                 self._get_deconv_cfg(num_kernels[i], i)
 82 | 
 83 |             planes = num_filters[i]
 84 |             layers.extend([
 85 |                 nn.ConvTranspose2d(in_channels=self.inplanes, out_channels=planes, kernel_size=kernel,
 86 |                                    stride=2, padding=padding, output_padding=output_padding,
 87 |                                    groups=math.gcd(self.inplanes, planes), bias=self.deconv_with_bias),
 88 |                 nn.BatchNorm2d(planes, momentum=BN_MOMENTUM),
 89 |                 nn.ReLU(inplace=True),
 90 |                 nn.Conv2d(planes, planes, kernel_size=1, bias=False),
 91 |                 nn.BatchNorm2d(planes, momentum=BN_MOMENTUM),
 92 |                 nn.ReLU(inplace=True),
 93 |             ])
 94 |             self.inplanes = planes
 95 | 
 96 |         return nn.Sequential(*layers)
 97 | 
 98 |     def forward(self, x):
 99 |         x = self.conv1(x)
100 |         x = self.bn1(x)
101 |         x = self.relu(x)
102 |         x = self.maxpool(x)
103 | 
104 |         x = self.layer1(x)
105 |         x = self.layer2(x)
106 |         x = self.layer3(x)
107 |         x = self.layer4(x)
108 | 
109 |         features = self.deconv_layers(x)
110 |         x = self.final_layer(features)
111 | 
112 |         return x
113 | 
114 |     def init_weights(self, pretrained=''):
115 |         if os.path.isfile(pretrained):
116 |             logger.info('=> init deconv weights from normal distribution')
117 |             for name, m in self.deconv_layers.named_modules():
118 |                 if isinstance(m, nn.ConvTranspose2d):
119 |                     logger.info('=> init {}.weight as normal(0, 0.001)'.format(name))
120 |                     logger.info('=> init {}.bias as 0'.format(name))
121 |                     nn.init.normal_(m.weight, std=0.001)
122 |                     if self.deconv_with_bias:
123 |                         nn.init.constant_(m.bias, 0)
124 |                 elif isinstance(m, nn.BatchNorm2d):
125 |                     logger.info('=> init {}.weight as 1'.format(name))
126 |                     logger.info('=> init {}.bias as 0'.format(name))
127 |                     nn.init.constant_(m.weight, 1)
128 |                     nn.init.constant_(m.bias, 0)
129 |             logger.info('=> init final conv weights from normal distribution')
130 |             for m in self.final_layer.modules():
131 |                 if isinstance(m, nn.Conv2d):
132 |                     # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
133 |                     logger.info('=> init {}.weight as normal(0, 0.001)'.format(name))
134 |                     logger.info('=> init {}.bias as 0'.format(name))
135 |                     nn.init.normal_(m.weight, std=0.001)
136 |                     nn.init.constant_(m.bias, 0)
137 | 
138 |             pretrained_state_dict = torch.load(pretrained)
139 |             logger.info('=> loading pretrained model {}'.format(pretrained))
140 |             self.load_state_dict(pretrained_state_dict, strict=False)
141 |         else:
142 |             logger.info('=> init weights from normal distribution')
143 |             for m in self.modules():
144 |                 if isinstance(m, nn.Conv2d):
145 |                     # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
146 |                     nn.init.normal_(m.weight, std=0.001)
147 |                     # nn.init.constant_(m.bias, 0)
148 |                 elif isinstance(m, nn.BatchNorm2d):
149 |                     nn.init.constant_(m.weight, 1)
150 |                     nn.init.constant_(m.bias, 0)
151 |                 elif isinstance(m, nn.ConvTranspose2d):
152 |                     nn.init.normal_(m.weight, std=0.001)
153 |                     if self.deconv_with_bias:
154 |                         nn.init.constant_(m.bias, 0)
155 | 
156 | 
157 | resnet_spec = {
158 |     '18m': (MV2_BasicBlock, [1, 1, 1, 1]),
159 |     18: (LW_BasicBlock, [2, 2, 2, 2]),
160 |     34: (LW_BasicBlock, [3, 4, 6, 3]),
161 |     50: (LW_Bottleneck, [3, 4, 6, 3]),
162 |     101: (LW_Bottleneck, [3, 4, 23, 3]),
163 |     100: (LW_Bottleneck, [3, 13, 30, 3]),
164 |     152: (LW_Bottleneck, [3, 8, 36, 3])
165 | }
166 | 
167 | 
168 | def get_pose_net(cfg, is_train, **kwargs):
169 |     num_layers = cfg.MODEL.EXTRA.NUM_LAYERS
170 | 
171 |     block_class, layers = resnet_spec[num_layers]
172 | 
173 |     model = LPN(block_class, layers, cfg, **kwargs)
174 | 
175 |     if is_train and cfg.MODEL.INIT_WEIGHTS:
176 |         model.init_weights(cfg.MODEL.PRETRAINED)
177 | 
178 |     return model
179 | 


--------------------------------------------------------------------------------
/lib/models/pose_resnet.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os
 12 | import logging
 13 | 
 14 | import torch
 15 | import torch.nn as nn
 16 | 
 17 | 
 18 | BN_MOMENTUM = 0.1
 19 | logger = logging.getLogger(__name__)
 20 | 
 21 | 
 22 | def conv3x3(in_planes, out_planes, stride=1):
 23 |     """3x3 convolution with padding"""
 24 |     return nn.Conv2d(
 25 |         in_planes, out_planes, kernel_size=3, stride=stride,
 26 |         padding=1, bias=False
 27 |     )
 28 | 
 29 | 
 30 | class BasicBlock(nn.Module):
 31 |     expansion = 1
 32 | 
 33 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 34 |         super(BasicBlock, self).__init__()
 35 |         self.conv1 = conv3x3(inplanes, planes, stride)
 36 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 37 |         self.relu = nn.ReLU(inplace=True)
 38 |         self.conv2 = conv3x3(planes, planes)
 39 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 40 |         self.downsample = downsample
 41 |         self.stride = stride
 42 | 
 43 |     def forward(self, x):
 44 |         residual = x
 45 | 
 46 |         out = self.conv1(x)
 47 |         out = self.bn1(out)
 48 |         out = self.relu(out)
 49 | 
 50 |         out = self.conv2(out)
 51 |         out = self.bn2(out)
 52 | 
 53 |         if self.downsample is not None:
 54 |             residual = self.downsample(x)
 55 | 
 56 |         out += residual
 57 |         out = self.relu(out)
 58 | 
 59 |         return out
 60 | 
 61 | 
 62 | class Bottleneck(nn.Module):
 63 |     expansion = 4
 64 | 
 65 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 66 |         super(Bottleneck, self).__init__()
 67 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 68 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 69 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 70 |                                padding=1, bias=False)
 71 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 72 |         self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
 73 |                                bias=False)
 74 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion,
 75 |                                   momentum=BN_MOMENTUM)
 76 |         self.relu = nn.ReLU(inplace=True)
 77 |         self.downsample = downsample
 78 |         self.stride = stride
 79 | 
 80 |     def forward(self, x):
 81 |         residual = x
 82 | 
 83 |         out = self.conv1(x)
 84 |         out = self.bn1(out)
 85 |         out = self.relu(out)
 86 | 
 87 |         out = self.conv2(out)
 88 |         out = self.bn2(out)
 89 |         out = self.relu(out)
 90 | 
 91 |         out = self.conv3(out)
 92 |         out = self.bn3(out)
 93 | 
 94 |         if self.downsample is not None:
 95 |             residual = self.downsample(x)
 96 | 
 97 |         out += residual
 98 |         out = self.relu(out)
 99 | 
100 |         return out
101 | 
102 | 
103 | class PoseResNet(nn.Module):
104 | 
105 |     def __init__(self, block, layers, cfg, **kwargs):
106 |         self.inplanes = 64
107 |         extra = cfg.MODEL.EXTRA
108 |         self.deconv_with_bias = extra.DECONV_WITH_BIAS
109 | 
110 |         super(PoseResNet, self).__init__()
111 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
112 |                                bias=False)
113 |         self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
114 |         self.relu = nn.ReLU(inplace=True)
115 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
116 |         self.layer1 = self._make_layer(block, 64, layers[0])
117 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
118 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
119 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
120 | 
121 |         # used for deconv layers
122 |         self.deconv_layers = self._make_deconv_layer(
123 |             extra.NUM_DECONV_LAYERS,
124 |             extra.NUM_DECONV_FILTERS,
125 |             extra.NUM_DECONV_KERNELS,
126 |         )
127 | 
128 |         self.final_layer = nn.Conv2d(
129 |             in_channels=extra.NUM_DECONV_FILTERS[-1],
130 |             out_channels=cfg.MODEL.NUM_JOINTS,
131 |             kernel_size=extra.FINAL_CONV_KERNEL,
132 |             stride=1,
133 |             padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0
134 |         )
135 | 
136 |     def _make_layer(self, block, planes, blocks, stride=1):
137 |         downsample = None
138 |         if stride != 1 or self.inplanes != planes * block.expansion:
139 |             downsample = nn.Sequential(
140 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
141 |                           kernel_size=1, stride=stride, bias=False),
142 |                 nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
143 |             )
144 | 
145 |         layers = []
146 |         layers.append(block(self.inplanes, planes, stride, downsample))
147 |         self.inplanes = planes * block.expansion
148 |         for i in range(1, blocks):
149 |             layers.append(block(self.inplanes, planes))
150 | 
151 |         return nn.Sequential(*layers)
152 | 
153 |     def _get_deconv_cfg(self, deconv_kernel, index):
154 |         if deconv_kernel == 4:
155 |             padding = 1
156 |             output_padding = 0
157 |         elif deconv_kernel == 3:
158 |             padding = 1
159 |             output_padding = 1
160 |         elif deconv_kernel == 2:
161 |             padding = 0
162 |             output_padding = 0
163 | 
164 |         return deconv_kernel, padding, output_padding
165 | 
166 |     def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
167 |         assert num_layers == len(num_filters), \
168 |             'ERROR: num_deconv_layers is different len(num_deconv_filters)'
169 |         assert num_layers == len(num_kernels), \
170 |             'ERROR: num_deconv_layers is different len(num_deconv_filters)'
171 | 
172 |         layers = []
173 |         for i in range(num_layers):
174 |             kernel, padding, output_padding = \
175 |                 self._get_deconv_cfg(num_kernels[i], i)
176 | 
177 |             planes = num_filters[i]
178 |             layers.append(
179 |                 nn.ConvTranspose2d(
180 |                     in_channels=self.inplanes,
181 |                     out_channels=planes,
182 |                     kernel_size=kernel,
183 |                     stride=2,
184 |                     padding=padding,
185 |                     output_padding=output_padding,
186 |                     bias=self.deconv_with_bias))
187 |             layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
188 |             layers.append(nn.ReLU(inplace=True))
189 |             self.inplanes = planes
190 | 
191 |         return nn.Sequential(*layers)
192 | 
193 |     def forward(self, x):
194 |         x = self.conv1(x)
195 |         x = self.bn1(x)
196 |         x = self.relu(x)
197 |         x = self.maxpool(x)
198 | 
199 |         x = self.layer1(x)
200 |         x = self.layer2(x)
201 |         x = self.layer3(x)
202 |         x = self.layer4(x)
203 | 
204 |         x = self.deconv_layers(x)
205 |         x = self.final_layer(x)
206 | 
207 |         return x
208 | 
209 |     def init_weights(self, pretrained=''):
210 |         if os.path.isfile(pretrained):
211 |             logger.info('=> init deconv weights from normal distribution')
212 |             for name, m in self.deconv_layers.named_modules():
213 |                 if isinstance(m, nn.ConvTranspose2d):
214 |                     logger.info('=> init {}.weight as normal(0, 0.001)'.format(name))
215 |                     logger.info('=> init {}.bias as 0'.format(name))
216 |                     nn.init.normal_(m.weight, std=0.001)
217 |                     if self.deconv_with_bias:
218 |                         nn.init.constant_(m.bias, 0)
219 |                 elif isinstance(m, nn.BatchNorm2d):
220 |                     logger.info('=> init {}.weight as 1'.format(name))
221 |                     logger.info('=> init {}.bias as 0'.format(name))
222 |                     nn.init.constant_(m.weight, 1)
223 |                     nn.init.constant_(m.bias, 0)
224 |             logger.info('=> init final conv weights from normal distribution')
225 |             for m in self.final_layer.modules():
226 |                 if isinstance(m, nn.Conv2d):
227 |                     # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
228 |                     logger.info('=> init {}.weight as normal(0, 0.001)'.format(name))
229 |                     logger.info('=> init {}.bias as 0'.format(name))
230 |                     nn.init.normal_(m.weight, std=0.001)
231 |                     nn.init.constant_(m.bias, 0)
232 | 
233 |             pretrained_state_dict = torch.load(pretrained)
234 |             logger.info('=> loading pretrained model {}'.format(pretrained))
235 |             self.load_state_dict(pretrained_state_dict, strict=False)
236 |         else:
237 |             logger.info('=> init weights from normal distribution')
238 |             for m in self.modules():
239 |                 if isinstance(m, nn.Conv2d):
240 |                     # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
241 |                     nn.init.normal_(m.weight, std=0.001)
242 |                     # nn.init.constant_(m.bias, 0)
243 |                 elif isinstance(m, nn.BatchNorm2d):
244 |                     nn.init.constant_(m.weight, 1)
245 |                     nn.init.constant_(m.bias, 0)
246 |                 elif isinstance(m, nn.ConvTranspose2d):
247 |                     nn.init.normal_(m.weight, std=0.001)
248 |                     if self.deconv_with_bias:
249 |                         nn.init.constant_(m.bias, 0)
250 | 
251 | 
252 | resnet_spec = {
253 |     18: (BasicBlock, [2, 2, 2, 2]),
254 |     34: (BasicBlock, [3, 4, 6, 3]),
255 |     50: (Bottleneck, [3, 4, 6, 3]),
256 |     101: (Bottleneck, [3, 4, 23, 3]),
257 |     152: (Bottleneck, [3, 8, 36, 3])
258 | }
259 | 
260 | 
261 | def get_pose_net(cfg, is_train, **kwargs):
262 |     num_layers = cfg.MODEL.EXTRA.NUM_LAYERS
263 | 
264 |     block_class, layers = resnet_spec[num_layers]
265 | 
266 |     model = PoseResNet(block_class, layers, cfg, **kwargs)
267 | 
268 |     if is_train and cfg.MODEL.INIT_WEIGHTS:
269 |         model.init_weights(cfg.MODEL.PRETRAINED)
270 | 
271 |     return model
272 | 


--------------------------------------------------------------------------------
/lib/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cavalleria/humanpose.pytorch/08e0316b8f65e9fa45fefc8c9d0e28a6096a1d5f/lib/nms/__init__.py


--------------------------------------------------------------------------------
/lib/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import numpy as np
12 | cimport numpy as np
13 | 
14 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
15 |     return a if a >= b else b
16 | 
17 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
18 |     return a if a <= b else b
19 | 
20 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
21 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
22 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
23 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
24 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
25 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
26 | 
27 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
28 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1].astype('i')
29 | 
30 |     cdef int ndets = dets.shape[0]
31 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
32 |             np.zeros((ndets), dtype=np.int)
33 | 
34 |     # nominal indices
35 |     cdef int _i, _j
36 |     # sorted indices
37 |     cdef int i, j
38 |     # temp variables for box i's (the box currently under consideration)
39 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
40 |     # variables for computing overlap with box j (lower scoring box)
41 |     cdef np.float32_t xx1, yy1, xx2, yy2
42 |     cdef np.float32_t w, h
43 |     cdef np.float32_t inter, ovr
44 | 
45 |     keep = []
46 |     for _i in range(ndets):
47 |         i = order[_i]
48 |         if suppressed[i] == 1:
49 |             continue
50 |         keep.append(i)
51 |         ix1 = x1[i]
52 |         iy1 = y1[i]
53 |         ix2 = x2[i]
54 |         iy2 = y2[i]
55 |         iarea = areas[i]
56 |         for _j in range(_i + 1, ndets):
57 |             j = order[_j]
58 |             if suppressed[j] == 1:
59 |                 continue
60 |             xx1 = max(ix1, x1[j])
61 |             yy1 = max(iy1, y1[j])
62 |             xx2 = min(ix2, x2[j])
63 |             yy2 = min(iy2, y2[j])
64 |             w = max(0.0, xx2 - xx1 + 1)
65 |             h = max(0.0, yy2 - yy1 + 1)
66 |             inter = w * h
67 |             ovr = inter / (iarea + areas[j] - inter)
68 |             if ovr >= thresh:
69 |                 suppressed[j] = 1
70 | 
71 |     return keep
72 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import numpy as np
12 | cimport numpy as np
13 | 
14 | assert sizeof(int) == sizeof(np.int32_t)
15 | 
16 | cdef extern from "gpu_nms.hpp":
17 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
18 | 
19 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
20 |             np.int32_t device_id=0):
21 |     cdef int boxes_num = dets.shape[0]
22 |     cdef int boxes_dim = dets.shape[1]
23 |     cdef int num_out
24 |     cdef np.ndarray[np.int32_t, ndim=1] \
25 |         keep = np.zeros(boxes_num, dtype=np.int32)
26 |     cdef np.ndarray[np.float32_t, ndim=1] \
27 |         scores = dets[:, 4]
28 |     cdef np.ndarray[np.int32_t, ndim=1] \
29 |         order = scores.argsort()[::-1].astype(np.int32)
30 |     cdef np.ndarray[np.float32_t, ndim=2] \
31 |         sorted_dets = dets[order, :]
32 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
33 |     keep = keep[:num_out]
34 |     return list(order[keep])
35 | 


--------------------------------------------------------------------------------
/lib/nms/nms.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import numpy as np
 12 | 
 13 | from .cpu_nms import cpu_nms
 14 | from .gpu_nms import gpu_nms
 15 | 
 16 | 
 17 | def py_nms_wrapper(thresh):
 18 |     def _nms(dets):
 19 |         return nms(dets, thresh)
 20 |     return _nms
 21 | 
 22 | 
 23 | def cpu_nms_wrapper(thresh):
 24 |     def _nms(dets):
 25 |         return cpu_nms(dets, thresh)
 26 |     return _nms
 27 | 
 28 | 
 29 | def gpu_nms_wrapper(thresh, device_id):
 30 |     def _nms(dets):
 31 |         return gpu_nms(dets, thresh, device_id)
 32 |     return _nms
 33 | 
 34 | 
 35 | def nms(dets, thresh):
 36 |     """
 37 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
 38 |     rule out overlap >= thresh
 39 |     :param dets: [[x1, y1, x2, y2 score]]
 40 |     :param thresh: retain overlap < thresh
 41 |     :return: indexes to keep
 42 |     """
 43 |     if dets.shape[0] == 0:
 44 |         return []
 45 | 
 46 |     x1 = dets[:, 0]
 47 |     y1 = dets[:, 1]
 48 |     x2 = dets[:, 2]
 49 |     y2 = dets[:, 3]
 50 |     scores = dets[:, 4]
 51 | 
 52 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 53 |     order = scores.argsort()[::-1]
 54 | 
 55 |     keep = []
 56 |     while order.size > 0:
 57 |         i = order[0]
 58 |         keep.append(i)
 59 |         xx1 = np.maximum(x1[i], x1[order[1:]])
 60 |         yy1 = np.maximum(y1[i], y1[order[1:]])
 61 |         xx2 = np.minimum(x2[i], x2[order[1:]])
 62 |         yy2 = np.minimum(y2[i], y2[order[1:]])
 63 | 
 64 |         w = np.maximum(0.0, xx2 - xx1 + 1)
 65 |         h = np.maximum(0.0, yy2 - yy1 + 1)
 66 |         inter = w * h
 67 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
 68 | 
 69 |         inds = np.where(ovr <= thresh)[0]
 70 |         order = order[inds + 1]
 71 | 
 72 |     return keep
 73 | 
 74 | 
 75 | def oks_iou(g, d, a_g, a_d, sigmas=None, in_vis_thre=None):
 76 |     if not isinstance(sigmas, np.ndarray):
 77 |         sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0
 78 |     vars = (sigmas * 2) ** 2
 79 |     xg = g[0::3]
 80 |     yg = g[1::3]
 81 |     vg = g[2::3]
 82 |     ious = np.zeros((d.shape[0]))
 83 |     for n_d in range(0, d.shape[0]):
 84 |         xd = d[n_d, 0::3]
 85 |         yd = d[n_d, 1::3]
 86 |         vd = d[n_d, 2::3]
 87 |         dx = xd - xg
 88 |         dy = yd - yg
 89 |         e = (dx ** 2 + dy ** 2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2
 90 |         if in_vis_thre is not None:
 91 |             ind = list(vg > in_vis_thre) and list(vd > in_vis_thre)
 92 |             e = e[ind]
 93 |         ious[n_d] = np.sum(np.exp(-e)) / e.shape[0] if e.shape[0] != 0 else 0.0
 94 |     return ious
 95 | 
 96 | 
 97 | def oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
 98 |     """
 99 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
100 |     rule out overlap >= thresh, overlap = oks
101 |     :param kpts_db
102 |     :param thresh: retain overlap < thresh
103 |     :return: indexes to keep
104 |     """
105 |     if len(kpts_db) == 0:
106 |         return []
107 | 
108 |     scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
109 |     kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
110 |     areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
111 | 
112 |     order = scores.argsort()[::-1]
113 | 
114 |     keep = []
115 |     while order.size > 0:
116 |         i = order[0]
117 |         keep.append(i)
118 | 
119 |         oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre)
120 | 
121 |         inds = np.where(oks_ovr <= thresh)[0]
122 |         order = order[inds + 1]
123 | 
124 |     return keep
125 | 
126 | 
127 | def rescore(overlap, scores, thresh, type='gaussian'):
128 |     assert overlap.shape[0] == scores.shape[0]
129 |     if type == 'linear':
130 |         inds = np.where(overlap >= thresh)[0]
131 |         scores[inds] = scores[inds] * (1 - overlap[inds])
132 |     else:
133 |         scores = scores * np.exp(- overlap**2 / thresh)
134 | 
135 |     return scores
136 | 
137 | 
138 | def soft_oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
139 |     """
140 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
141 |     rule out overlap >= thresh, overlap = oks
142 |     :param kpts_db
143 |     :param thresh: retain overlap < thresh
144 |     :return: indexes to keep
145 |     """
146 |     if len(kpts_db) == 0:
147 |         return []
148 | 
149 |     scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
150 |     kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
151 |     areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
152 | 
153 |     order = scores.argsort()[::-1]
154 |     scores = scores[order]
155 | 
156 |     # max_dets = order.size
157 |     max_dets = 20
158 |     keep = np.zeros(max_dets, dtype=np.intp)
159 |     keep_cnt = 0
160 |     while order.size > 0 and keep_cnt < max_dets:
161 |         i = order[0]
162 | 
163 |         oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre)
164 | 
165 |         order = order[1:]
166 |         scores = rescore(oks_ovr, scores[1:], thresh)
167 | 
168 |         tmp = scores.argsort()[::-1]
169 |         order = order[tmp]
170 |         scores = scores[tmp]
171 | 
172 |         keep[keep_cnt] = i
173 |         keep_cnt += 1
174 | 
175 |     keep = keep[:keep_cnt]
176 | 
177 |     return keep
178 |     # kpts_db = kpts_db[:keep_cnt]
179 | 
180 |     # return kpts_db
181 | 


--------------------------------------------------------------------------------
/lib/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Copyright (c) Microsoft
  3 | // Licensed under The MIT License
  4 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
  5 | // ------------------------------------------------------------------
  6 | 
  7 | #include "gpu_nms.hpp"
  8 | #include <vector>
  9 | #include <iostream>
 10 | 
 11 | #define CUDA_CHECK(condition) \
 12 |   /* Code block avoids redefinition of cudaError_t error */ \
 13 |   do { \
 14 |     cudaError_t error = condition; \
 15 |     if (error != cudaSuccess) { \
 16 |       std::cout << cudaGetErrorString(error) << std::endl; \
 17 |     } \
 18 |   } while (0)
 19 | 
 20 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 21 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 22 | 
 23 | __device__ inline float devIoU(float const * const a, float const * const b) {
 24 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 25 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 26 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 27 |   float interS = width * height;
 28 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 29 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 30 |   return interS / (Sa + Sb - interS);
 31 | }
 32 | 
 33 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 34 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 35 |   const int row_start = blockIdx.y;
 36 |   const int col_start = blockIdx.x;
 37 | 
 38 |   // if (row_start > col_start) return;
 39 | 
 40 |   const int row_size =
 41 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 42 |   const int col_size =
 43 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 44 | 
 45 |   __shared__ float block_boxes[threadsPerBlock * 5];
 46 |   if (threadIdx.x < col_size) {
 47 |     block_boxes[threadIdx.x * 5 + 0] =
 48 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 49 |     block_boxes[threadIdx.x * 5 + 1] =
 50 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 51 |     block_boxes[threadIdx.x * 5 + 2] =
 52 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 53 |     block_boxes[threadIdx.x * 5 + 3] =
 54 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 55 |     block_boxes[threadIdx.x * 5 + 4] =
 56 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 57 |   }
 58 |   __syncthreads();
 59 | 
 60 |   if (threadIdx.x < row_size) {
 61 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 62 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 63 |     int i = 0;
 64 |     unsigned long long t = 0;
 65 |     int start = 0;
 66 |     if (row_start == col_start) {
 67 |       start = threadIdx.x + 1;
 68 |     }
 69 |     for (i = start; i < col_size; i++) {
 70 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 71 |         t |= 1ULL << i;
 72 |       }
 73 |     }
 74 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 75 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 76 |   }
 77 | }
 78 | 
 79 | void _set_device(int device_id) {
 80 |   int current_device;
 81 |   CUDA_CHECK(cudaGetDevice(&current_device));
 82 |   if (current_device == device_id) {
 83 |     return;
 84 |   }
 85 |   // The call to cudaSetDevice must come before any calls to Get, which
 86 |   // may perform initialization using the GPU.
 87 |   CUDA_CHECK(cudaSetDevice(device_id));
 88 | }
 89 | 
 90 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 91 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 92 |   _set_device(device_id);
 93 | 
 94 |   float* boxes_dev = NULL;
 95 |   unsigned long long* mask_dev = NULL;
 96 | 
 97 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 98 | 
 99 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
100 |                         boxes_num * boxes_dim * sizeof(float)));
101 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
102 |                         boxes_host,
103 |                         boxes_num * boxes_dim * sizeof(float),
104 |                         cudaMemcpyHostToDevice));
105 | 
106 |   CUDA_CHECK(cudaMalloc(&mask_dev,
107 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
108 | 
109 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
110 |               DIVUP(boxes_num, threadsPerBlock));
111 |   dim3 threads(threadsPerBlock);
112 |   nms_kernel<<<blocks, threads>>>(boxes_num,
113 |                                   nms_overlap_thresh,
114 |                                   boxes_dev,
115 |                                   mask_dev);
116 | 
117 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
118 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
119 |                         mask_dev,
120 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
121 |                         cudaMemcpyDeviceToHost));
122 | 
123 |   std::vector<unsigned long long> remv(col_blocks);
124 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
125 | 
126 |   int num_to_keep = 0;
127 |   for (int i = 0; i < boxes_num; i++) {
128 |     int nblock = i / threadsPerBlock;
129 |     int inblock = i % threadsPerBlock;
130 | 
131 |     if (!(remv[nblock] & (1ULL << inblock))) {
132 |       keep_out[num_to_keep++] = i;
133 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
134 |       for (int j = nblock; j < col_blocks; j++) {
135 |         remv[j] |= p[j];
136 |       }
137 |     }
138 |   }
139 |   *num_out = num_to_keep;
140 | 
141 |   CUDA_CHECK(cudaFree(boxes_dev));
142 |   CUDA_CHECK(cudaFree(mask_dev));
143 | }
144 | 


--------------------------------------------------------------------------------
/lib/nms/setup_linux.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Pose.gluon
  3 | # Copyright (c) 2018-present Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | from setuptools import setup
 11 | from distutils.extension import Extension
 12 | from Cython.Distutils import build_ext
 13 | import numpy as np
 14 | 
 15 | 
 16 | def find_in_path(name, path):
 17 |     "Find a file in a search path"
 18 |     # Adapted fom
 19 |     # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 20 |     for dir in path.split(os.pathsep):
 21 |         binpath = pjoin(dir, name)
 22 |         if os.path.exists(binpath):
 23 |             return os.path.abspath(binpath)
 24 |     return None
 25 | 
 26 | 
 27 | def locate_cuda():
 28 |     """Locate the CUDA environment on the system
 29 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 30 |     and values giving the absolute path to each directory.
 31 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 32 |     is based on finding 'nvcc' in the PATH.
 33 |     """
 34 | 
 35 |     # first check if the CUDAHOME env variable is in use
 36 |     if 'CUDAHOME' in os.environ:
 37 |         home = os.environ['CUDAHOME']
 38 |         nvcc = pjoin(home, 'bin', 'nvcc')
 39 |     else:
 40 |         # otherwise, search the PATH for NVCC
 41 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 42 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 43 |         if nvcc is None:
 44 |             raise EnvironmentError('The nvcc binary could not be '
 45 |                 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 46 |         home = os.path.dirname(os.path.dirname(nvcc))
 47 | 
 48 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 49 |                   'include': pjoin(home, 'include'),
 50 |                   'lib64': pjoin(home, 'lib64')}
 51 |     for k, v in cudaconfig.items():
 52 |         if not os.path.exists(v):
 53 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 54 | 
 55 |     return cudaconfig
 56 | CUDA = locate_cuda()
 57 | 
 58 | 
 59 | # Obtain the numpy include directory.  This logic works across numpy versions.
 60 | try:
 61 |     numpy_include = np.get_include()
 62 | except AttributeError:
 63 |     numpy_include = np.get_numpy_include()
 64 | 
 65 | 
 66 | def customize_compiler_for_nvcc(self):
 67 |     """inject deep into distutils to customize how the dispatch
 68 |     to gcc/nvcc works.
 69 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 70 |     injected in, and still have the right customizations (i.e.
 71 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 72 |     the OO route, I have this. Note, it's kindof like a wierd functional
 73 |     subclassing going on."""
 74 | 
 75 |     # tell the compiler it can processes .cu
 76 |     self.src_extensions.append('.cu')
 77 | 
 78 |     # save references to the default compiler_so and _comple methods
 79 |     default_compiler_so = self.compiler_so
 80 |     super = self._compile
 81 | 
 82 |     # now redefine the _compile method. This gets executed for each
 83 |     # object but distutils doesn't have the ability to change compilers
 84 |     # based on source extension: we add it.
 85 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 86 |         if os.path.splitext(src)[1] == '.cu':
 87 |             # use the cuda for .cu files
 88 |             self.set_executable('compiler_so', CUDA['nvcc'])
 89 |             # use only a subset of the extra_postargs, which are 1-1 translated
 90 |             # from the extra_compile_args in the Extension class
 91 |             postargs = extra_postargs['nvcc']
 92 |         else:
 93 |             postargs = extra_postargs['gcc']
 94 | 
 95 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 96 |         # reset the default compiler_so, which we might have changed for cuda
 97 |         self.compiler_so = default_compiler_so
 98 | 
 99 |     # inject our redefined _compile method into the class
100 |     self._compile = _compile
101 | 
102 | 
103 | # run the customize_compiler
104 | class custom_build_ext(build_ext):
105 |     def build_extensions(self):
106 |         customize_compiler_for_nvcc(self.compiler)
107 |         build_ext.build_extensions(self)
108 | 
109 | 
110 | ext_modules = [
111 |     Extension(
112 |         "cpu_nms",
113 |         ["cpu_nms.pyx"],
114 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
115 |         include_dirs = [numpy_include]
116 |     ),
117 |     Extension('gpu_nms',
118 |         ['nms_kernel.cu', 'gpu_nms.pyx'],
119 |         library_dirs=[CUDA['lib64']],
120 |         libraries=['cudart'],
121 |         language='c++',
122 |         runtime_library_dirs=[CUDA['lib64']],
123 |         # this syntax is specific to this build system
124 |         # we're only going to use certain compiler args with nvcc and not with
125 |         # gcc the implementation of this trick is in customize_compiler() below
126 |         extra_compile_args={'gcc': ["-Wno-unused-function"],
127 |                             'nvcc': ['-arch=sm_35',
128 |                                      '--ptxas-options=-v',
129 |                                      '-c',
130 |                                      '--compiler-options',
131 |                                      "'-fPIC'"]},
132 |         include_dirs = [numpy_include, CUDA['include']]
133 |     ),
134 | ]
135 | 
136 | setup(
137 |     name='nms',
138 |     ext_modules=ext_modules,
139 |     # inject our custom trigger
140 |     cmdclass={'build_ext': custom_build_ext},
141 | )
142 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cavalleria/humanpose.pytorch/08e0316b8f65e9fa45fefc8c9d0e28a6096a1d5f/lib/utils/__init__.py


--------------------------------------------------------------------------------
/lib/utils/transforms.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import numpy as np
 12 | import cv2
 13 | 
 14 | 
 15 | def flip_back(output_flipped, matched_parts):
 16 |     '''
 17 |     ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width)
 18 |     '''
 19 |     assert output_flipped.ndim == 4,\
 20 |         'output_flipped should be [batch_size, num_joints, height, width]'
 21 | 
 22 |     output_flipped = output_flipped[:, :, :, ::-1]
 23 | 
 24 |     for pair in matched_parts:
 25 |         tmp = output_flipped[:, pair[0], :, :].copy()
 26 |         output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
 27 |         output_flipped[:, pair[1], :, :] = tmp
 28 | 
 29 |     return output_flipped
 30 | 
 31 | 
 32 | def fliplr_joints(joints, joints_vis, width, matched_parts):
 33 |     """
 34 |     flip coords
 35 |     """
 36 |     # Flip horizontal
 37 |     joints[:, 0] = width - joints[:, 0] - 1
 38 | 
 39 |     # Change left-right parts
 40 |     for pair in matched_parts:
 41 |         joints[pair[0], :], joints[pair[1], :] = \
 42 |             joints[pair[1], :], joints[pair[0], :].copy()
 43 |         joints_vis[pair[0], :], joints_vis[pair[1], :] = \
 44 |             joints_vis[pair[1], :], joints_vis[pair[0], :].copy()
 45 | 
 46 |     return joints*joints_vis, joints_vis
 47 | 
 48 | 
 49 | def transform_preds(coords, center, scale, output_size):
 50 |     target_coords = np.zeros(coords.shape)
 51 |     trans = get_affine_transform(center, scale, 0, output_size, inv=1)
 52 |     for p in range(coords.shape[0]):
 53 |         target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
 54 |     return target_coords
 55 | 
 56 | 
 57 | def get_affine_transform(
 58 |         center, scale, rot, output_size,
 59 |         shift=np.array([0, 0], dtype=np.float32), inv=0
 60 | ):
 61 |     if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
 62 |         print(scale)
 63 |         scale = np.array([scale, scale])
 64 | 
 65 |     scale_tmp = scale * 200.0
 66 |     src_w = scale_tmp[0]
 67 |     dst_w = output_size[0]
 68 |     dst_h = output_size[1]
 69 | 
 70 |     rot_rad = np.pi * rot / 180
 71 |     src_dir = get_dir([0, src_w * -0.5], rot_rad)
 72 |     dst_dir = np.array([0, dst_w * -0.5], np.float32)
 73 | 
 74 |     src = np.zeros((3, 2), dtype=np.float32)
 75 |     dst = np.zeros((3, 2), dtype=np.float32)
 76 |     src[0, :] = center + scale_tmp * shift
 77 |     src[1, :] = center + src_dir + scale_tmp * shift
 78 |     dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
 79 |     dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
 80 | 
 81 |     src[2:, :] = get_3rd_point(src[0, :], src[1, :])
 82 |     dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
 83 | 
 84 |     if inv:
 85 |         trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
 86 |     else:
 87 |         trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
 88 | 
 89 |     return trans
 90 | 
 91 | 
 92 | def affine_transform(pt, t):
 93 |     new_pt = np.array([pt[0], pt[1], 1.]).T
 94 |     new_pt = np.dot(t, new_pt)
 95 |     return new_pt[:2]
 96 | 
 97 | 
 98 | def get_3rd_point(a, b):
 99 |     direct = a - b
100 |     return b + np.array([-direct[1], direct[0]], dtype=np.float32)
101 | 
102 | 
103 | def get_dir(src_point, rot_rad):
104 |     sn, cs = np.sin(rot_rad), np.cos(rot_rad)
105 | 
106 |     src_result = [0, 0]
107 |     src_result[0] = src_point[0] * cs - src_point[1] * sn
108 |     src_result[1] = src_point[0] * sn + src_point[1] * cs
109 | 
110 |     return src_result
111 | 
112 | 
113 | def crop(img, center, scale, output_size, rot=0):
114 |     trans = get_affine_transform(center, scale, rot, output_size)
115 | 
116 |     dst_img = cv2.warpAffine(
117 |         img, trans, (int(output_size[0]), int(output_size[1])),
118 |         flags=cv2.INTER_LINEAR
119 |     )
120 | 
121 |     return dst_img
122 | 


--------------------------------------------------------------------------------
/lib/utils/utils.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os
 12 | import logging
 13 | import time
 14 | from collections import namedtuple
 15 | from pathlib import Path
 16 | 
 17 | import torch
 18 | import torch.optim as optim
 19 | import torch.nn as nn
 20 | 
 21 | 
 22 | def setup_logger(final_output_dir, rank, phase):
 23 |     time_str = time.strftime('%Y-%m-%d-%H-%M')
 24 |     log_file = '{}_{}_rank{}.log'.format(phase, time_str, rank)
 25 |     final_log_file = os.path.join(final_output_dir, log_file)
 26 |     head = '%(asctime)-15s %(message)s'
 27 |     # logging.basicConfig(format=head)
 28 |     logging.basicConfig(filename=str(final_log_file),
 29 |                         format=head)
 30 |     logger = logging.getLogger()
 31 |     logger.setLevel(logging.INFO)
 32 |     console = logging.StreamHandler()
 33 |     logging.getLogger('').addHandler(console)
 34 | 
 35 |     return logger, time_str
 36 | 
 37 | def create_logger(cfg, cfg_name, phase='train'):
 38 |     root_output_dir = Path(cfg.OUTPUT_DIR)
 39 |     # set up logger
 40 |     if not root_output_dir.exists():
 41 |         print('=> creating {}'.format(root_output_dir))
 42 |         root_output_dir.mkdir()
 43 | 
 44 |     dataset = cfg.DATASET.DATASET + '_' + cfg.DATASET.HYBRID_JOINTS_TYPE \
 45 |         if cfg.DATASET.HYBRID_JOINTS_TYPE else cfg.DATASET.DATASET
 46 |     dataset = dataset.replace(':', '_')
 47 |     model = cfg.MODEL.NAME
 48 |     cfg_name = os.path.basename(cfg_name).split('.')[0]
 49 | 
 50 |     final_output_dir = root_output_dir / dataset / model / cfg_name
 51 | 
 52 |     print('=> creating {}'.format(final_output_dir))
 53 |     final_output_dir.mkdir(parents=True, exist_ok=True)
 54 | 
 55 |     time_str = time.strftime('%Y-%m-%d-%H-%M')
 56 |     log_file = '{}_{}_{}.log'.format(cfg_name, time_str, phase)
 57 |     final_log_file = final_output_dir / log_file
 58 |     head = '%(asctime)-15s %(message)s'
 59 |     logging.basicConfig(filename=str(final_log_file),
 60 |                         format=head)
 61 |     logger = logging.getLogger()
 62 |     logger.setLevel(logging.INFO)
 63 |     console = logging.StreamHandler()
 64 |     logging.getLogger('').addHandler(console)
 65 | 
 66 |     tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / \
 67 |         (cfg_name + '_' + time_str)
 68 | 
 69 |     print('=> creating {}'.format(tensorboard_log_dir))
 70 |     tensorboard_log_dir.mkdir(parents=True, exist_ok=True)
 71 | 
 72 |     return logger, str(final_output_dir), str(tensorboard_log_dir)
 73 | 
 74 | 
 75 | def get_optimizer(cfg, model):
 76 |     optimizer = None
 77 |     if cfg.TRAIN.OPTIMIZER == 'sgd':
 78 |         optimizer = optim.SGD(
 79 |             model.parameters(),
 80 |             lr=cfg.TRAIN.LR,
 81 |             momentum=cfg.TRAIN.MOMENTUM,
 82 |             weight_decay=cfg.TRAIN.WD,
 83 |             nesterov=cfg.TRAIN.NESTEROV
 84 |         )
 85 |     elif cfg.TRAIN.OPTIMIZER == 'adam':
 86 |         optimizer = optim.Adam(
 87 |             model.parameters(),
 88 |             lr=cfg.TRAIN.LR
 89 |         )
 90 | 
 91 |     return optimizer
 92 | 
 93 | 
 94 | def save_checkpoint(states, is_best, output_dir,
 95 |                     filename='checkpoint.pth'):
 96 |     torch.save(states, os.path.join(output_dir, filename))
 97 |     if is_best and 'state_dict' in states:
 98 |         torch.save(states['best_state_dict'],
 99 |                    os.path.join(output_dir, 'model_best.pth'))
100 | 
101 | 
102 | def get_model_summary(model, *input_tensors, item_length=26, verbose=False):
103 |     """
104 |     :param model:
105 |     :param input_tensors:
106 |     :param item_length:
107 |     :return:
108 |     """
109 | 
110 |     summary = []
111 | 
112 |     ModuleDetails = namedtuple(
113 |         "Layer", ["name", "input_size", "output_size", "num_parameters", "multiply_adds"])
114 |     hooks = []
115 |     layer_instances = {}
116 | 
117 |     def add_hooks(module):
118 | 
119 |         def hook(module, input, output):
120 |             class_name = str(module.__class__.__name__)
121 | 
122 |             instance_index = 1
123 |             if class_name not in layer_instances:
124 |                 layer_instances[class_name] = instance_index
125 |             else:
126 |                 instance_index = layer_instances[class_name] + 1
127 |                 layer_instances[class_name] = instance_index
128 | 
129 |             layer_name = class_name + "_" + str(instance_index)
130 | 
131 |             params = 0
132 | 
133 |             if class_name.find("Conv") != -1 or class_name.find("BatchNorm") != -1 or \
134 |                class_name.find("Linear") != -1:
135 |                 for param_ in module.parameters():
136 |                     params += param_.view(-1).size(0)
137 | 
138 |             flops = "Not Available"
139 |             if class_name.find("Conv") != -1 and hasattr(module, "weight"):
140 |                 flops = (
141 |                     torch.prod(
142 |                         torch.LongTensor(list(module.weight.data.size()))) *
143 |                     torch.prod(
144 |                         torch.LongTensor(list(output.size())[2:]))).item()
145 |             elif isinstance(module, nn.Linear):
146 |                 flops = (torch.prod(torch.LongTensor(list(output.size()))) \
147 |                          * input[0].size(1)).item()
148 | 
149 |             if isinstance(input[0], list):
150 |                 input = input[0]
151 |             if isinstance(output, list):
152 |                 output = output[0]
153 | 
154 |             summary.append(
155 |                 ModuleDetails(
156 |                     name=layer_name,
157 |                     input_size=list(input[0].size()),
158 |                     output_size=list(output.size()),
159 |                     num_parameters=params,
160 |                     multiply_adds=flops)
161 |             )
162 | 
163 |         if not isinstance(module, nn.ModuleList) \
164 |            and not isinstance(module, nn.Sequential) \
165 |            and module != model:
166 |             hooks.append(module.register_forward_hook(hook))
167 | 
168 |     model.eval()
169 |     model.apply(add_hooks)
170 | 
171 |     space_len = item_length
172 | 
173 |     model(*input_tensors)
174 |     for hook in hooks:
175 |         hook.remove()
176 | 
177 |     details = ''
178 |     if verbose:
179 |         details = "Model Summary" + \
180 |             os.linesep + \
181 |             "Name{}Input Size{}Output Size{}Parameters{}Multiply Adds (Flops){}".format(
182 |                 ' ' * (space_len - len("Name")),
183 |                 ' ' * (space_len - len("Input Size")),
184 |                 ' ' * (space_len - len("Output Size")),
185 |                 ' ' * (space_len - len("Parameters")),
186 |                 ' ' * (space_len - len("Multiply Adds (Flops)"))) \
187 |                 + os.linesep + '-' * space_len * 5 + os.linesep
188 | 
189 |     params_sum = 0
190 |     flops_sum = 0
191 |     for layer in summary:
192 |         params_sum += layer.num_parameters
193 |         if layer.multiply_adds != "Not Available":
194 |             flops_sum += layer.multiply_adds
195 |         if verbose:
196 |             details += "{}{}{}{}{}{}{}{}{}{}".format(
197 |                 layer.name,
198 |                 ' ' * (space_len - len(layer.name)),
199 |                 layer.input_size,
200 |                 ' ' * (space_len - len(str(layer.input_size))),
201 |                 layer.output_size,
202 |                 ' ' * (space_len - len(str(layer.output_size))),
203 |                 layer.num_parameters,
204 |                 ' ' * (space_len - len(str(layer.num_parameters))),
205 |                 layer.multiply_adds,
206 |                 ' ' * (space_len - len(str(layer.multiply_adds)))) \
207 |                 + os.linesep + '-' * space_len * 5 + os.linesep
208 | 
209 |     details += os.linesep \
210 |         + "Total Parameters: {:,}".format(params_sum) \
211 |         + os.linesep + '-' * space_len * 5 + os.linesep
212 |     details += "Total Multiply Adds (For Convolution and Linear Layers only): {:,} GFLOPs".format(flops_sum/(1024**3)) \
213 |         + os.linesep + '-' * space_len * 5 + os.linesep
214 |     details += "Number of Layers" + os.linesep
215 |     for layer in layer_instances:
216 |         details += "{} : {} layers   ".format(layer, layer_instances[layer])
217 | 
218 |     return details
219 | 


--------------------------------------------------------------------------------
/lib/utils/vis.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import math
 12 | 
 13 | import numpy as np
 14 | import torchvision
 15 | import cv2
 16 | 
 17 | from core.inference import get_max_preds
 18 | 
 19 | 
 20 | def save_batch_image_with_joints(batch_image, batch_joints, batch_joints_vis,
 21 |                                  file_name, nrow=8, padding=2):
 22 |     '''
 23 |     batch_image: [batch_size, channel, height, width]
 24 |     batch_joints: [batch_size, num_joints, 3],
 25 |     batch_joints_vis: [batch_size, num_joints, 1],
 26 |     }
 27 |     '''
 28 |     grid = torchvision.utils.make_grid(batch_image, nrow, padding, True)
 29 |     ndarr = grid.mul(255).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy()
 30 |     ndarr = ndarr.copy()
 31 | 
 32 |     nmaps = batch_image.size(0)
 33 |     xmaps = min(nrow, nmaps)
 34 |     ymaps = int(math.ceil(float(nmaps) / xmaps))
 35 |     height = int(batch_image.size(2) + padding)
 36 |     width = int(batch_image.size(3) + padding)
 37 |     k = 0
 38 |     for y in range(ymaps):
 39 |         for x in range(xmaps):
 40 |             if k >= nmaps:
 41 |                 break
 42 |             joints = batch_joints[k]
 43 |             joints_vis = batch_joints_vis[k]
 44 | 
 45 |             for joint, joint_vis in zip(joints, joints_vis):
 46 |                 joint[0] = x * width + padding + joint[0]
 47 |                 joint[1] = y * height + padding + joint[1]
 48 |                 if joint_vis[0]:
 49 |                     cv2.circle(ndarr, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2)
 50 |             k = k + 1
 51 |     cv2.imwrite(file_name, ndarr)
 52 | 
 53 | 
 54 | def save_batch_heatmaps(batch_image, batch_heatmaps, file_name,
 55 |                         normalize=True):
 56 |     '''
 57 |     batch_image: [batch_size, channel, height, width]
 58 |     batch_heatmaps: ['batch_size, num_joints, height, width]
 59 |     file_name: saved file name
 60 |     '''
 61 |     if normalize:
 62 |         batch_image = batch_image.clone()
 63 |         min = float(batch_image.min())
 64 |         max = float(batch_image.max())
 65 | 
 66 |         batch_image.add_(-min).div_(max - min + 1e-5)
 67 | 
 68 |     batch_size = batch_heatmaps.size(0)
 69 |     num_joints = batch_heatmaps.size(1)
 70 |     heatmap_height = batch_heatmaps.size(2)
 71 |     heatmap_width = batch_heatmaps.size(3)
 72 | 
 73 |     grid_image = np.zeros((batch_size*heatmap_height,
 74 |                            (num_joints+1)*heatmap_width,
 75 |                            3),
 76 |                           dtype=np.uint8)
 77 | 
 78 |     preds, maxvals = get_max_preds(batch_heatmaps.detach().cpu().numpy())
 79 | 
 80 |     for i in range(batch_size):
 81 |         image = batch_image[i].mul(255)\
 82 |                               .clamp(0, 255)\
 83 |                               .byte()\
 84 |                               .permute(1, 2, 0)\
 85 |                               .cpu().numpy()
 86 |         heatmaps = batch_heatmaps[i].mul(255)\
 87 |                                     .clamp(0, 255)\
 88 |                                     .byte()\
 89 |                                     .cpu().numpy()
 90 | 
 91 |         resized_image = cv2.resize(image,
 92 |                                    (int(heatmap_width), int(heatmap_height)))
 93 | 
 94 |         height_begin = heatmap_height * i
 95 |         height_end = heatmap_height * (i + 1)
 96 |         for j in range(num_joints):
 97 |             cv2.circle(resized_image,
 98 |                        (int(preds[i][j][0]), int(preds[i][j][1])),
 99 |                        1, [0, 0, 255], 1)
100 |             heatmap = heatmaps[j, :, :]
101 |             colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
102 |             masked_image = colored_heatmap*0.7 + resized_image*0.3
103 |             cv2.circle(masked_image,
104 |                        (int(preds[i][j][0]), int(preds[i][j][1])),
105 |                        1, [0, 0, 255], 1)
106 | 
107 |             width_begin = heatmap_width * (j+1)
108 |             width_end = heatmap_width * (j+2)
109 |             grid_image[height_begin:height_end, width_begin:width_end, :] = \
110 |                 masked_image
111 |             # grid_image[height_begin:height_end, width_begin:width_end, :] = \
112 |             #     colored_heatmap*0.7 + resized_image*0.3
113 | 
114 |         grid_image[height_begin:height_end, 0:heatmap_width, :] = resized_image
115 | 
116 |     cv2.imwrite(file_name, grid_image)
117 | 
118 | 
119 | def save_debug_images(config, input, meta, target, joints_pred, output,
120 |                       prefix):
121 |     if not config.DEBUG.DEBUG:
122 |         return
123 | 
124 |     if config.DEBUG.SAVE_BATCH_IMAGES_GT:
125 |         save_batch_image_with_joints(
126 |             input, meta['joints'], meta['joints_vis'],
127 |             '{}_gt.jpg'.format(prefix)
128 |         )
129 |     if config.DEBUG.SAVE_BATCH_IMAGES_PRED:
130 |         save_batch_image_with_joints(
131 |             input, joints_pred, meta['joints_vis'],
132 |             '{}_pred.jpg'.format(prefix)
133 |         )
134 |     if config.DEBUG.SAVE_HEATMAPS_GT:
135 |         save_batch_heatmaps(
136 |             input, target, '{}_hm_gt.jpg'.format(prefix)
137 |         )
138 |     if config.DEBUG.SAVE_HEATMAPS_PRED:
139 |         save_batch_heatmaps(
140 |             input, output, '{}_hm_pred.jpg'.format(prefix)
141 |         )
142 | 


--------------------------------------------------------------------------------
/lib/utils/zipreader.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import os
12 | import zipfile
13 | import xml.etree.ElementTree as ET
14 | 
15 | import cv2
16 | import numpy as np
17 | 
18 | _im_zfile = []
19 | _xml_path_zip = []
20 | _xml_zfile = []
21 | 
22 | 
23 | def imread(filename, flags=cv2.IMREAD_COLOR):
24 |     global _im_zfile
25 |     path = filename
26 |     pos_at = path.index('@')
27 |     if pos_at == -1:
28 |         print("character '@' is not found from the given path '%s'"%(path))
29 |         assert 0
30 |     path_zip = path[0: pos_at]
31 |     path_img = path[pos_at + 2:]
32 |     if not os.path.isfile(path_zip):
33 |         print("zip file '%s' is not found"%(path_zip))
34 |         assert 0
35 |     for i in range(len(_im_zfile)):
36 |         if _im_zfile[i]['path'] == path_zip:
37 |             data = _im_zfile[i]['zipfile'].read(path_img)
38 |             return cv2.imdecode(np.frombuffer(data, np.uint8), flags)
39 | 
40 |     _im_zfile.append({
41 |         'path': path_zip,
42 |         'zipfile': zipfile.ZipFile(path_zip, 'r')
43 |     })
44 |     data = _im_zfile[-1]['zipfile'].read(path_img)
45 | 
46 |     return cv2.imdecode(np.frombuffer(data, np.uint8), flags)
47 | 
48 | 
49 | def xmlread(filename):
50 |     global _xml_path_zip
51 |     global _xml_zfile
52 |     path = filename
53 |     pos_at = path.index('@')
54 |     if pos_at == -1:
55 |         print("character '@' is not found from the given path '%s'"%(path))
56 |         assert 0
57 |     path_zip = path[0: pos_at]
58 |     path_xml = path[pos_at + 2:]
59 |     if not os.path.isfile(path_zip):
60 |         print("zip file '%s' is not found"%(path_zip))
61 |         assert 0
62 |     for i in xrange(len(_xml_path_zip)):
63 |         if _xml_path_zip[i] == path_zip:
64 |             data = _xml_zfile[i].open(path_xml)
65 |             return ET.fromstring(data.read())
66 |     _xml_path_zip.append(path_zip)
67 |     print("read new xml file '%s'"%(path_zip))
68 |     _xml_zfile.append(zipfile.ZipFile(path_zip, 'r'))
69 |     data = _xml_zfile[-1].open(path_xml)
70 |     return ET.fromstring(data.read())
71 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | shapely==1.6.4
2 | Cython
3 | pyyaml
4 | json_tricks
5 | yacs>=0.1.5
6 | 


--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # pose.pytorch
 3 | # Copyright (c) 2018-present Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 6 | # ------------------------------------------------------------------------------
 7 | 
 8 | from __future__ import absolute_import
 9 | from __future__ import division
10 | from __future__ import print_function
11 | 
12 | import os.path as osp
13 | import sys
14 | 
15 | 
16 | def add_path(path):
17 |     if path not in sys.path:
18 |         sys.path.insert(0, path)
19 | 
20 | 
21 | this_dir = osp.dirname(__file__)
22 | 
23 | lib_path = osp.join(this_dir, '..', 'lib')
24 | add_path(lib_path)
25 | 
26 | mm_path = osp.join(this_dir, '..', 'lib/poseeval/py-motmetrics')
27 | add_path(mm_path)
28 | 


--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | # ------------------------------------------------------------------------------
  8 | # Updated by cavalleria (cavalleria@gmail.com)
  9 | # ------------------------------------------------------------------------------
 10 | 
 11 | from __future__ import absolute_import
 12 | from __future__ import division
 13 | from __future__ import print_function
 14 | 
 15 | import argparse
 16 | import os
 17 | import pprint
 18 | 
 19 | import torch
 20 | import torch.nn.parallel
 21 | import torch.backends.cudnn as cudnn
 22 | import torch.optim
 23 | import torch.utils.data
 24 | import torch.utils.data.distributed
 25 | import torchvision.transforms as transforms
 26 | 
 27 | import _init_paths
 28 | from config import cfg
 29 | from config import update_config
 30 | from core.loss import JointsMSELoss
 31 | from core.function import validate
 32 | from utils.utils import create_logger
 33 | 
 34 | import dataset
 35 | import models
 36 | 
 37 | 
 38 | def parse_args():
 39 |     parser = argparse.ArgumentParser(description='Train keypoints network')
 40 |     # general
 41 |     parser.add_argument('--cfg',
 42 |                         help='experiment configure file name',
 43 |                         required=True,
 44 |                         type=str)
 45 | 
 46 |     parser.add_argument('opts',
 47 |                         help="Modify config options using the command-line",
 48 |                         default=None,
 49 |                         nargs=argparse.REMAINDER)
 50 | 
 51 |     parser.add_argument('--modelDir',
 52 |                         help='model directory',
 53 |                         type=str,
 54 |                         default='')
 55 |     parser.add_argument('--logDir',
 56 |                         help='log directory',
 57 |                         type=str,
 58 |                         default='')
 59 |     parser.add_argument('--dataDir',
 60 |                         help='data directory',
 61 |                         type=str,
 62 |                         default='')
 63 |     parser.add_argument('--prevModelDir',
 64 |                         help='prev Model directory',
 65 |                         type=str,
 66 |                         default='')
 67 | 
 68 |     args = parser.parse_args()
 69 |     return args
 70 | 
 71 | 
 72 | def main():
 73 |     args = parse_args()
 74 |     update_config(cfg, args)
 75 | 
 76 |     logger, final_output_dir, tb_log_dir = create_logger(
 77 |         cfg, args.cfg, 'valid')
 78 | 
 79 |     logger.info(pprint.pformat(args))
 80 |     logger.info(cfg)
 81 | 
 82 |     # cudnn related setting
 83 |     cudnn.benchmark = cfg.CUDNN.BENCHMARK
 84 |     torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
 85 |     torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
 86 | 
 87 |     model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
 88 |         cfg, is_train=False
 89 |     )
 90 | 
 91 |     if cfg.TEST.MODEL_FILE:
 92 |         logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
 93 |         model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
 94 |     else:
 95 |         model_state_file = os.path.join(
 96 |             final_output_dir, 'final_state.pth'
 97 |         )
 98 |         logger.info('=> loading model from {}'.format(model_state_file))
 99 |         model.load_state_dict(torch.load(model_state_file))
100 | 
101 |     model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
102 | 
103 |     # define loss function (criterion) and optimizer
104 |     criterion = JointsMSELoss(
105 |         use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT
106 |     ).cuda()
107 | 
108 |     # Data loading code
109 |     normalize = transforms.Normalize(
110 |         mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
111 |     )
112 |     valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)(
113 |         cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
114 |         transforms.Compose([
115 |             transforms.ToTensor(),
116 |             normalize,
117 |         ])
118 |     )
119 |     valid_loader = torch.utils.data.DataLoader(
120 |         valid_dataset,
121 |         batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS),
122 |         shuffle=False,
123 |         num_workers=cfg.WORKERS,
124 |         pin_memory=True
125 |     )
126 | 
127 |     # evaluate on validation set
128 |     validate(cfg, valid_loader, valid_dataset, model, criterion,
129 |              final_output_dir, tb_log_dir)
130 | 
131 | 
132 | if __name__ == '__main__':
133 |     main()
134 | 


--------------------------------------------------------------------------------
/tools/train.ori.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import argparse
 12 | import os
 13 | import pprint
 14 | import shutil
 15 | 
 16 | import torch
 17 | import torch.nn.parallel
 18 | import torch.backends.cudnn as cudnn
 19 | import torch.optim
 20 | import torch.utils.data
 21 | import torch.utils.data.distributed
 22 | import torchvision.transforms as transforms
 23 | from tensorboardX import SummaryWriter
 24 | 
 25 | import _init_paths
 26 | from config import cfg
 27 | from config import update_config
 28 | from core.loss import JointsMSELoss
 29 | from core.function import train
 30 | from core.function import validate
 31 | from utils.utils import get_optimizer
 32 | from utils.utils import save_checkpoint
 33 | from utils.utils import create_logger
 34 | from utils.utils import get_model_summary
 35 | 
 36 | import dataset
 37 | import models
 38 | 
 39 | 
 40 | def parse_args():
 41 |     parser = argparse.ArgumentParser(description='Train keypoints network')
 42 |     # general
 43 |     parser.add_argument('--cfg',
 44 |                         help='experiment configure file name',
 45 |                         required=True,
 46 |                         type=str)
 47 | 
 48 |     parser.add_argument('opts',
 49 |                         help="Modify config options using the command-line",
 50 |                         default=None,
 51 |                         nargs=argparse.REMAINDER)
 52 | 
 53 |     # philly
 54 |     parser.add_argument('--modelDir',
 55 |                         help='model directory',
 56 |                         type=str,
 57 |                         default='')
 58 |     parser.add_argument('--logDir',
 59 |                         help='log directory',
 60 |                         type=str,
 61 |                         default='')
 62 |     parser.add_argument('--dataDir',
 63 |                         help='data directory',
 64 |                         type=str,
 65 |                         default='')
 66 |     parser.add_argument('--prevModelDir',
 67 |                         help='prev Model directory',
 68 |                         type=str,
 69 |                         default='')
 70 | 
 71 |     args = parser.parse_args()
 72 | 
 73 |     return args
 74 | 
 75 | 
 76 | def main():
 77 |     args = parse_args()
 78 |     update_config(cfg, args)
 79 | 
 80 |     logger, final_output_dir, tb_log_dir = create_logger(
 81 |         cfg, args.cfg, 'train')
 82 | 
 83 |     logger.info(pprint.pformat(args))
 84 |     logger.info(cfg)
 85 | 
 86 |     # cudnn related setting
 87 |     cudnn.benchmark = cfg.CUDNN.BENCHMARK
 88 |     torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
 89 |     torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
 90 | 
 91 |     model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
 92 |         cfg, is_train=True
 93 |     )
 94 | 
 95 |     # copy model file
 96 |     this_dir = os.path.dirname(__file__)
 97 |     shutil.copy2(
 98 |         os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
 99 |         final_output_dir)
100 |     # logger.info(pprint.pformat(model))
101 | 
102 |     writer_dict = {
103 |         'writer': SummaryWriter(log_dir=tb_log_dir),
104 |         'train_global_steps': 0,
105 |         'valid_global_steps': 0,
106 |     }
107 | 
108 |     dump_input = torch.rand(
109 |         (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])
110 |     )
111 |     writer_dict['writer'].add_graph(model, (dump_input, ))
112 | 
113 |     logger.info(get_model_summary(model, dump_input))
114 | 
115 |     model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
116 | 
117 |     # define loss function (criterion) and optimizer
118 |     criterion = JointsMSELoss(
119 |         use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT
120 |     ).cuda()
121 | 
122 |     # Data loading code
123 |     normalize = transforms.Normalize(
124 |         mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
125 |     )
126 |     train_dataset = eval('dataset.'+cfg.DATASET.DATASET)(
127 |         cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True,
128 |         transforms.Compose([
129 |             transforms.ToTensor(),
130 |             normalize,
131 |         ])
132 |     )
133 |     valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)(
134 |         cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
135 |         transforms.Compose([
136 |             transforms.ToTensor(),
137 |             normalize,
138 |         ])
139 |     )
140 | 
141 |     train_loader = torch.utils.data.DataLoader(
142 |         train_dataset,
143 |         batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS),
144 |         shuffle=cfg.TRAIN.SHUFFLE,
145 |         num_workers=cfg.WORKERS,
146 |         pin_memory=cfg.PIN_MEMORY
147 |     )
148 |     valid_loader = torch.utils.data.DataLoader(
149 |         valid_dataset,
150 |         batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS),
151 |         shuffle=False,
152 |         num_workers=cfg.WORKERS,
153 |         pin_memory=cfg.PIN_MEMORY
154 |     )
155 | 
156 |     best_perf = 0.0
157 |     best_model = False
158 |     last_epoch = -1
159 |     optimizer = get_optimizer(cfg, model)
160 |     begin_epoch = cfg.TRAIN.BEGIN_EPOCH
161 |     checkpoint_file = os.path.join(
162 |         final_output_dir, 'checkpoint.pth'
163 |     )
164 | 
165 |     if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
166 |         logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
167 |         checkpoint = torch.load(checkpoint_file)
168 |         begin_epoch = checkpoint['epoch']
169 |         best_perf = checkpoint['perf']
170 |         last_epoch = checkpoint['epoch']
171 |         model.load_state_dict(checkpoint['state_dict'])
172 | 
173 |         optimizer.load_state_dict(checkpoint['optimizer'])
174 |         logger.info("=> loaded checkpoint '{}' (epoch {})".format(
175 |             checkpoint_file, checkpoint['epoch']))
176 | 
177 |     lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
178 |         optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR,
179 |         last_epoch=last_epoch
180 |     )
181 | 
182 |     for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
183 |         lr_scheduler.step()
184 | 
185 |         # train for one epoch
186 |         train(args, cfg, train_loader, model, criterion, optimizer, epoch,
187 |               final_output_dir, tb_log_dir, writer_dict)
188 | 
189 | 
190 |         # evaluate on validation set
191 |         perf_indicator = validate(
192 |             cfg, valid_loader, valid_dataset, model, criterion,
193 |             final_output_dir, tb_log_dir, writer_dict
194 |         )
195 | 
196 |         if perf_indicator >= best_perf:
197 |             best_perf = perf_indicator
198 |             best_model = True
199 |         else:
200 |             best_model = False
201 | 
202 |         logger.info('=> saving checkpoint to {}'.format(final_output_dir))
203 |         save_checkpoint({
204 |             'epoch': epoch + 1,
205 |             'model': cfg.MODEL.NAME,
206 |             'state_dict': model.state_dict(),
207 |             'best_state_dict': model.module.state_dict(),
208 |             'perf': perf_indicator,
209 |             'optimizer': optimizer.state_dict(),
210 |         }, best_model, final_output_dir)
211 | 
212 |     final_model_state_file = os.path.join(
213 |         final_output_dir, 'final_state.pth'
214 |     )
215 |     logger.info('=> saving final model state to {}'.format(
216 |         final_model_state_file)
217 |     )
218 |     torch.save(model.module.state_dict(), final_model_state_file)
219 |     writer_dict['writer'].close()
220 | 
221 | 
222 | if __name__ == '__main__':
223 |     main()


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | # ------------------------------------------------------------------------------
  8 | # Updated by cavalleria (cavalleria@gmail.com)
  9 | # ------------------------------------------------------------------------------
 10 | 
 11 | from __future__ import absolute_import
 12 | from __future__ import division
 13 | from __future__ import print_function
 14 | 
 15 | import argparse
 16 | import os
 17 | import pprint
 18 | import shutil
 19 | import warnings
 20 | import random
 21 | import numpy as np
 22 | 
 23 | import torch
 24 | import torch.backends.cudnn as cudnn
 25 | import torch.distributed as dist
 26 | import torch.multiprocessing as mp
 27 | import torch.nn as nn
 28 | import torch.nn.parallel
 29 | import torch.optim
 30 | import torch.utils.data
 31 | import torch.utils.data.distributed
 32 | import torchvision.transforms as transforms
 33 | from tensorboardX import SummaryWriter
 34 | 
 35 | import _init_paths
 36 | import dataset
 37 | import models
 38 | from tqdm import tqdm
 39 | 
 40 | from config import cfg
 41 | from config import update_config
 42 | from core.loss import JointsMSELoss
 43 | from core.function import train
 44 | from core.function import validate
 45 | 
 46 | from utils.utils import create_logger
 47 | from utils.utils import get_optimizer
 48 | from utils.utils import save_checkpoint
 49 | from utils.utils import setup_logger
 50 | from utils.utils import get_model_summary
 51 | 
 52 | 
 53 | def parse_args():
 54 |     parser = argparse.ArgumentParser(description='Train keypoints network')
 55 |     # general
 56 |     parser.add_argument('--cfg',
 57 |                         help='experiment configure file name',
 58 |                         required=True,
 59 |                         type=str)
 60 |     parser.add_argument('opts',
 61 |                         help="Modify config options using the command-line",
 62 |                         default=None,
 63 |                         nargs=argparse.REMAINDER)
 64 |     parser.add_argument('--seed',
 65 |                         help='random seed',
 66 |                         default=1337,
 67 |                         type=int)
 68 | 
 69 |     parser.add_argument('--gpu',
 70 |                         help='gpu id for multiprocessing training',
 71 |                         type=str)
 72 |     parser.add_argument('--world-size',
 73 |                         default=1,
 74 |                         type=int,
 75 |                         help='number of nodes for distributed training')
 76 |     parser.add_argument('--rank',
 77 |                         default=0,
 78 |                         type=int,
 79 |                         help='node rank for distributed training')
 80 |     args = parser.parse_args()
 81 | 
 82 |     return args
 83 | 
 84 | def set_seed(seed):
 85 |     random.seed(seed)
 86 |     np.random.seed(seed)
 87 |     torch.manual_seed(seed)
 88 |     torch.cuda.manual_seed(seed)
 89 |     torch.cuda.manual_seed_all(seed)
 90 | 
 91 | def main():
 92 |     args = parse_args()
 93 |     set_seed(int(args.seed))
 94 |     update_config(cfg, args)
 95 | 
 96 |     cfg.defrost()
 97 |     cfg.RANK = args.rank
 98 |     cfg.freeze()
 99 | 
100 |     logger, final_output_dir, tb_log_dir = create_logger(cfg, args.cfg, 'train')
101 | 
102 |     logger.info(pprint.pformat(args))
103 |     logger.info(cfg)
104 | 
105 |     # cudnn related setting
106 |     cudnn.benchmark = cfg.CUDNN.BENCHMARK
107 |     torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
108 |     torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
109 | 
110 |     ngpus_per_node = torch.cuda.device_count()
111 | 
112 |     args.world_size = ngpus_per_node * args.world_size
113 |     mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args, final_output_dir, tb_log_dir))
114 |    
115 | 
116 | 
117 | def main_worker(gpu, ngpus_per_node, args, final_output_dir, tb_log_dir):
118 | 
119 |     args.gpu = gpu
120 |     args.rank = args.rank * ngpus_per_node + gpu
121 |     print('Init process group: dist_url: {}, world_size: {}, rank: {}'.format(cfg.DIST_URL, args.world_size, args.rank))
122 |     dist.init_process_group(backend=cfg.DIST_BACKEND, init_method=cfg.DIST_URL, world_size=args.world_size, rank=args.rank)
123 | 
124 |     update_config(cfg, args)
125 | 
126 |     # setup logger
127 |     logger, _ = setup_logger(final_output_dir, args.rank, 'train')
128 | 
129 |     model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(cfg, is_train=True)
130 |     logger.info(get_model_summary(model, torch.zeros(1, 3, *cfg.MODEL.IMAGE_SIZE)))
131 | 
132 |     # copy model file
133 |     if not cfg.MULTIPROCESSING_DISTRIBUTED or (cfg.MULTIPROCESSING_DISTRIBUTED and args.rank % ngpus_per_node == 0):
134 |         this_dir = os.path.dirname(__file__)
135 |         shutil.copy2(os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir)
136 | 
137 |     writer_dict = {
138 |         'writer': SummaryWriter(log_dir=tb_log_dir),
139 |         'train_global_steps': 0,
140 |         'valid_global_steps': 0,
141 |     }
142 | 
143 |     if not cfg.MULTIPROCESSING_DISTRIBUTED or (cfg.MULTIPROCESSING_DISTRIBUTED and args.rank % ngpus_per_node == 0):
144 |         dump_input = torch.rand((1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]))
145 |         writer_dict['writer'].add_graph(model, (dump_input, ))
146 |         # logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))
147 | 
148 |     if cfg.MODEL.SYNC_BN:
149 |         model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
150 |     
151 |     torch.cuda.set_device(args.gpu)
152 |     model.cuda(args.gpu)
153 |     model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
154 | 
155 |     # define loss function (criterion) and optimizer
156 |     criterion = JointsMSELoss(use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda(args.gpu)
157 | 
158 |     # Data loading code
159 |     train_dataset = eval('dataset.'+cfg.DATASET.DATASET)(
160 |         cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True,
161 |         transforms.Compose([
162 |             transforms.ToTensor(),
163 |             transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
164 |         ])
165 |     )
166 |     valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)(
167 |         cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
168 |         transforms.Compose([
169 |             transforms.ToTensor(),
170 |             transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
171 |         ])
172 |     )
173 |     
174 |     train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
175 |     train_loader = torch.utils.data.DataLoader(
176 |         train_dataset,
177 |         batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS),
178 |         shuffle=(train_sampler is None),
179 |         num_workers=cfg.WORKERS,
180 |         pin_memory=cfg.PIN_MEMORY,
181 |         sampler=train_sampler
182 |     )
183 | 
184 |     valid_loader = torch.utils.data.DataLoader(
185 |         valid_dataset,
186 |         batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS),
187 |         shuffle=False,
188 |         num_workers=cfg.WORKERS,
189 |         pin_memory=cfg.PIN_MEMORY
190 |     )
191 |     logger.info(train_loader.dataset)
192 | 
193 |     best_perf = -1
194 |     best_model = False
195 |     last_epoch = -1
196 |     optimizer = get_optimizer(cfg, model)
197 |     begin_epoch = cfg.TRAIN.BEGIN_EPOCH
198 |     checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth')
199 |     if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
200 |         logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
201 |         checkpoint = torch.load(checkpoint_file)
202 |         begin_epoch = checkpoint['epoch']
203 |         best_perf = checkpoint['perf']
204 |         last_epoch = checkpoint['epoch']
205 |         model.load_state_dict(checkpoint['state_dict'])
206 | 
207 |         optimizer.load_state_dict(checkpoint['optimizer'])
208 |         logger.info("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_file, checkpoint['epoch']))
209 | 
210 |     lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
211 |         optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR,
212 |         last_epoch=last_epoch)
213 | 
214 |     for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
215 |         
216 |         # train for one epoch
217 |         train(cfg, train_loader, model, criterion, optimizer, epoch,
218 |               final_output_dir, tb_log_dir, writer_dict)
219 |         # In PyTorch 1.1.0 and later, you should call `lr_scheduler.step()` after `optimizer.step()`.
220 |         lr_scheduler.step()
221 | 
222 |         # evaluate on validation set
223 |         perf_indicator = validate(
224 |             args, cfg, valid_loader, valid_dataset, model, criterion,
225 |             final_output_dir, tb_log_dir, writer_dict
226 |         )
227 | 
228 |         if perf_indicator >= best_perf:
229 |             best_perf = perf_indicator
230 |             best_model = True
231 |         else:
232 |             best_model = False
233 | 
234 |         if not cfg.MULTIPROCESSING_DISTRIBUTED or (
235 |                 cfg.MULTIPROCESSING_DISTRIBUTED
236 |                 and args.rank == 0
237 |         ):
238 |             logger.info('=> saving checkpoint to {}'.format(final_output_dir))
239 |             save_checkpoint({
240 |                 'epoch': epoch + 1,
241 |                 'model': cfg.MODEL.NAME,
242 |                 'state_dict': model.state_dict(),
243 |                 'best_state_dict': model.module.state_dict(),
244 |                 'perf': perf_indicator,
245 |                 'optimizer': optimizer.state_dict(),
246 |             }, best_model, final_output_dir)
247 | 
248 |     final_model_state_file = os.path.join(
249 |         final_output_dir, 'final_state{}.pth.tar'.format(gpu)
250 |     )
251 | 
252 |     logger.info('saving final model state to {}'.format(
253 |         final_model_state_file))
254 |     torch.save(model.module.state_dict(), final_model_state_file)
255 |     writer_dict['writer'].close()
256 | 
257 | 
258 | if __name__ == '__main__':
259 |     main()


--------------------------------------------------------------------------------
/train_coco_w18_v1.sh:
--------------------------------------------------------------------------------
1 | 
2 | CUDA_VISIBLE_DEVICES="0,1,2,3" python tools/train.py \
3 |     --cfg experiments/coco/hrnet/w18_small_v1_256x192_adam_lr1e-3.yaml
4 | 


--------------------------------------------------------------------------------
/train_coco_w18_v2.sh:
--------------------------------------------------------------------------------
1 | 
2 | CUDA_VISIBLE_DEVICES="0,1,2,3" python tools/train.py \
3 |     --cfg experiments/coco/hrnet/w18_small_v2_256x192_adam_lr1e-3_softargmax.yaml
4 | 


--------------------------------------------------------------------------------
/train_coco_w32.sh:
--------------------------------------------------------------------------------
1 | 
2 | CUDA_VISIBLE_DEVICES="0,1,2,3" python tools/train.py \
3 |     --cfg experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml
4 | 


--------------------------------------------------------------------------------
/train_lpn.sh:
--------------------------------------------------------------------------------
1 | 
2 | CUDA_VISIBLE_DEVICES="0,1,2,3" python tools/train.py \
3 |     --cfg experiments/coco/lpn/lpn50_256x192_gd256x2_gc.yaml
4 | 


--------------------------------------------------------------------------------
/train_mpii.sh:
--------------------------------------------------------------------------------
1 | 
2 | CUDA_VISIBLE_DEVICES="0,1,2,3" python tools/train.py \
3 |     --cfg experiments/mpii/hrnet/w48_256x256_adam_lr1e-3.yaml
4 | 


--------------------------------------------------------------------------------