├── lib
    ├── nms
    │   ├── __init__.py
    │   ├── gpu_nms.hpp
    │   ├── gpu_nms.pyx
    │   ├── cpu_nms.pyx
    │   ├── nms_kernel.cu
    │   ├── setup_linux.py
    │   └── nms.py
    ├── utils
    │   ├── __init__.py
    │   ├── zipreader.py
    │   ├── transforms.py
    │   ├── vis.py
    │   └── utils.py
    ├── Makefile
    ├── config
    │   ├── __init__.py
    │   ├── models.py
    │   └── default.py
    ├── dataset
    │   ├── __init__.py
    │   ├── mpii.py
    │   └── JointsDataset.py
    ├── models
    │   ├── __init__.py
    │   └── pose_resnet.py
    └── core
    │   ├── evaluate.py
    │   ├── inference.py
    │   ├── loss.py
    │   └── function.py
├── _config.yml
├── demo
    ├── .gitignore
    ├── build-docker.sh
    ├── hrnet-demo.gif
    ├── inference_1.jpg
    ├── inference_3.jpg
    ├── inference_5.jpg
    ├── inference_6.jpg
    ├── inference_7.jpg
    ├── _init_paths.py
    ├── README.md
    ├── inference-config.yaml
    ├── Dockerfile
    ├── inference.py
    └── demo.py
├── figures
    ├── hrnet.png
    └── visualization
    │   └── coco
    │       ├── score_610_id_2685_000000002685.png
    │       ├── score_770_id_6954_000000006954.png
    │       ├── score_919_id_53626_000000053626.png
    │       ├── score_710_id_153229_000000153229.png
    │       ├── score_755_id_343561_000000343561.png
    │       └── score_755_id_559842_000000559842.png
├── requirements.txt
├── tools
    ├── _init_paths.py
    ├── test.py
    └── train.py
├── LICENSE
├── .gitignore
├── experiments
    ├── coco
    │   ├── resnet
    │   │   ├── res50_256x192_d256x3_adam_lr1e-3.yaml
    │   │   ├── res50_384x288_d256x3_adam_lr1e-3.yaml
    │   │   ├── res101_256x192_d256x3_adam_lr1e-3.yaml
    │   │   ├── res101_384x288_d256x3_adam_lr1e-3.yaml
    │   │   ├── res152_256x192_d256x3_adam_lr1e-3.yaml
    │   │   └── res152_384x288_d256x3_adam_lr1e-3.yaml
    │   └── hrnet
    │   │   ├── w32_256x192_adam_lr1e-3.yaml
    │   │   ├── w32_384x288_adam_lr1e-3.yaml
    │   │   ├── w48_256x192_adam_lr1e-3.yaml
    │   │   └── w48_384x288_adam_lr1e-3.yaml
    └── mpii
    │   ├── resnet
    │       ├── res101_256x256_d256x3_adam_lr1e-3.yaml
    │       ├── res152_256x256_d256x3_adam_lr1e-3.yaml
    │       └── res50_256x256_d256x3_adam_lr1e-3.yaml
    │   └── hrnet
    │       ├── w32_256x256_adam_lr1e-3.yaml
    │       └── w48_256x256_adam_lr1e-3.yaml
├── visualization
    └── plot_coco.py
└── README.md


/lib/nms/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman


--------------------------------------------------------------------------------
/demo/.gitignore:
--------------------------------------------------------------------------------
1 | output
2 | models
3 | videos
4 | 


--------------------------------------------------------------------------------
/demo/build-docker.sh:
--------------------------------------------------------------------------------
1 | docker build -t hrnet_demo_inference .
2 | 


--------------------------------------------------------------------------------
/figures/hrnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/HEAD/figures/hrnet.png


--------------------------------------------------------------------------------
/demo/hrnet-demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/HEAD/demo/hrnet-demo.gif


--------------------------------------------------------------------------------
/demo/inference_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/HEAD/demo/inference_1.jpg


--------------------------------------------------------------------------------
/demo/inference_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/HEAD/demo/inference_3.jpg


--------------------------------------------------------------------------------
/demo/inference_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/HEAD/demo/inference_5.jpg


--------------------------------------------------------------------------------
/demo/inference_6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/HEAD/demo/inference_6.jpg


--------------------------------------------------------------------------------
/demo/inference_7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/HEAD/demo/inference_7.jpg


--------------------------------------------------------------------------------
/lib/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	cd nms; python setup_linux.py build_ext --inplace; rm -rf build; cd ../../
3 | clean:
4 | 	cd nms; rm *.so; cd ../../
5 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/figures/visualization/coco/score_610_id_2685_000000002685.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/HEAD/figures/visualization/coco/score_610_id_2685_000000002685.png


--------------------------------------------------------------------------------
/figures/visualization/coco/score_770_id_6954_000000006954.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/HEAD/figures/visualization/coco/score_770_id_6954_000000006954.png


--------------------------------------------------------------------------------
/figures/visualization/coco/score_919_id_53626_000000053626.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/HEAD/figures/visualization/coco/score_919_id_53626_000000053626.png


--------------------------------------------------------------------------------
/figures/visualization/coco/score_710_id_153229_000000153229.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/HEAD/figures/visualization/coco/score_710_id_153229_000000153229.png


--------------------------------------------------------------------------------
/figures/visualization/coco/score_755_id_343561_000000343561.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/HEAD/figures/visualization/coco/score_755_id_343561_000000343561.png


--------------------------------------------------------------------------------
/figures/visualization/coco/score_755_id_559842_000000559842.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leoxiaobin/deep-high-resolution-net.pytorch/HEAD/figures/visualization/coco/score_755_id_559842_000000559842.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | EasyDict==1.7
 2 | opencv-python==3.4.1.15
 3 | shapely==1.6.4
 4 | Cython
 5 | scipy
 6 | pandas
 7 | pyyaml
 8 | json_tricks
 9 | scikit-image
10 | yacs>=0.1.5
11 | tensorboardX==1.6
12 | 


--------------------------------------------------------------------------------
/lib/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from .default import _C as cfg
 8 | from .default import update_config
 9 | from .models import MODEL_EXTRAS
10 | 


--------------------------------------------------------------------------------
/lib/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from .mpii import MPIIDataset as mpii
12 | from .coco import COCODataset as coco
13 | 


--------------------------------------------------------------------------------
/lib/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from __future__ import absolute_import
12 | from __future__ import division
13 | from __future__ import print_function
14 | 
15 | import models.pose_resnet
16 | import models.pose_hrnet
17 | 


--------------------------------------------------------------------------------
/demo/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # pose.pytorch
 3 | # Copyright (c) 2018-present Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 6 | # ------------------------------------------------------------------------------
 7 | 
 8 | from __future__ import absolute_import
 9 | from __future__ import division
10 | from __future__ import print_function
11 | 
12 | import os.path as osp
13 | import sys
14 | 
15 | 
16 | def add_path(path):
17 |     if path not in sys.path:
18 |         sys.path.insert(0, path)
19 | 
20 | 
21 | this_dir = osp.dirname(__file__)
22 | 
23 | lib_path = osp.join(this_dir, '..', 'lib')
24 | add_path(lib_path)
25 | 
26 | mm_path = osp.join(this_dir, '..', 'lib/poseeval/py-motmetrics')
27 | add_path(mm_path)
28 | 


--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # pose.pytorch
 3 | # Copyright (c) 2018-present Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 6 | # ------------------------------------------------------------------------------
 7 | 
 8 | from __future__ import absolute_import
 9 | from __future__ import division
10 | from __future__ import print_function
11 | 
12 | import os.path as osp
13 | import sys
14 | 
15 | 
16 | def add_path(path):
17 |     if path not in sys.path:
18 |         sys.path.insert(0, path)
19 | 
20 | 
21 | this_dir = osp.dirname(__file__)
22 | 
23 | lib_path = osp.join(this_dir, '..', 'lib')
24 | add_path(lib_path)
25 | 
26 | mm_path = osp.join(this_dir, '..', 'lib/poseeval/py-motmetrics')
27 | add_path(mm_path)
28 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Leo Xiao
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import numpy as np
12 | cimport numpy as np
13 | 
14 | assert sizeof(int) == sizeof(np.int32_t)
15 | 
16 | cdef extern from "gpu_nms.hpp":
17 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
18 | 
19 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
20 |             np.int32_t device_id=0):
21 |     cdef int boxes_num = dets.shape[0]
22 |     cdef int boxes_dim = dets.shape[1]
23 |     cdef int num_out
24 |     cdef np.ndarray[np.int32_t, ndim=1] \
25 |         keep = np.zeros(boxes_num, dtype=np.int32)
26 |     cdef np.ndarray[np.float32_t, ndim=1] \
27 |         scores = dets[:, 4]
28 |     cdef np.ndarray[np.int32_t, ndim=1] \
29 |         order = scores.argsort()[::-1].astype(np.int32)
30 |     cdef np.ndarray[np.float32_t, ndim=2] \
31 |         sorted_dets = dets[order, :]
32 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
33 |     keep = keep[:num_out]
34 |     return list(order[keep])
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # IntelliJ project files
 2 | .idea
 3 | *.iml
 4 | out
 5 | gen
 6 | 
 7 | ### Vim template
 8 | [._]*.s[a-w][a-z]
 9 | [._]s[a-w][a-z]
10 | *.un~
11 | Session.vim
12 | .netrwhist
13 | *~
14 | 
15 | ### IPythonNotebook template
16 | # Temporary data
17 | .ipynb_checkpoints/
18 | 
19 | ### Python template
20 | # Byte-compiled / optimized / DLL files
21 | __pycache__/
22 | *.py[cod]
23 | *$py.class
24 | 
25 | # C extensions
26 | *.so
27 | 
28 | # Distribution / packaging
29 | .Python
30 | env/
31 | build/
32 | develop-eggs/
33 | dist/
34 | downloads/
35 | eggs/
36 | .eggs/
37 | #lib/
38 | #lib64/
39 | parts/
40 | sdist/
41 | var/
42 | *.egg-info/
43 | .installed.cfg
44 | *.egg
45 | 
46 | # PyInstaller
47 | #  Usually these files are written by a python script from a template
48 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
49 | *.manifest
50 | *.spec
51 | 
52 | # Installer logs
53 | pip-log.txt
54 | pip-delete-this-directory.txt
55 | 
56 | # Unit test / coverage reports
57 | htmlcov/
58 | .tox/
59 | .coverage
60 | .coverage.*
61 | .cache
62 | nosetests.xml
63 | coverage.xml
64 | *,cover
65 | 
66 | # Translations
67 | *.mo
68 | *.pot
69 | 
70 | # Django stuff:
71 | *.log
72 | 
73 | # Sphinx documentation
74 | docs/_build/
75 | 
76 | # PyBuilder
77 | target/
78 | 
79 | *.ipynb
80 | *.params
81 | *.json
82 | .vscode/
83 | 
84 | lib/pycocotools/_mask.c
85 | lib/nms/cpu_nms.c
86 | 
87 | output/*
88 | models/*
89 | log/*
90 | data/*
91 | external/
92 | 
93 | draws/
94 | plot/
95 | 
96 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet/res50_256x192_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: true
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: 'output'
 9 | LOG_DIR: 'log'
10 | WORKERS: 24
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: false
15 |   DATASET: 'coco'
16 |   ROOT: 'data/coco/'
17 |   TEST_SET: 'val2017'
18 |   TRAIN_SET: 'train2017'
19 |   FLIP: true
20 |   ROT_FACTOR: 40
21 |   SCALE_FACTOR: 0.3
22 | MODEL:
23 |   NAME: 'pose_resnet'
24 |   PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth'
25 |   IMAGE_SIZE:
26 |   - 192
27 |   - 256
28 |   HEATMAP_SIZE:
29 |   - 48
30 |   - 64
31 |   SIGMA: 2
32 |   NUM_JOINTS: 17
33 |   TARGET_TYPE: 'gaussian'
34 |   EXTRA:
35 |     FINAL_CONV_KERNEL: 1
36 |     DECONV_WITH_BIAS: false
37 |     NUM_DECONV_LAYERS: 3
38 |     NUM_DECONV_FILTERS:
39 |     - 256
40 |     - 256
41 |     - 256
42 |     NUM_DECONV_KERNELS:
43 |     - 4
44 |     - 4
45 |     - 4
46 |     NUM_LAYERS: 50
47 | LOSS:
48 |   USE_TARGET_WEIGHT: true
49 | TRAIN:
50 |   BATCH_SIZE_PER_GPU: 32
51 |   SHUFFLE: true
52 |   BEGIN_EPOCH: 0
53 |   END_EPOCH: 140
54 |   OPTIMIZER: 'adam'
55 |   LR: 0.001
56 |   LR_FACTOR: 0.1
57 |   LR_STEP:
58 |   - 90
59 |   - 120
60 |   WD: 0.0001
61 |   GAMMA1: 0.99
62 |   GAMMA2: 0.0
63 |   MOMENTUM: 0.9
64 |   NESTEROV: false
65 | TEST:
66 |   BATCH_SIZE_PER_GPU: 32
67 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
68 |   BBOX_THRE: 1.0
69 |   IMAGE_THRE: 0.0
70 |   IN_VIS_THRE: 0.2
71 |   MODEL_FILE: ''
72 |   NMS_THRE: 1.0
73 |   OKS_THRE: 0.9
74 |   FLIP_TEST: true
75 |   POST_PROCESS: true
76 |   SHIFT_HEATMAP: true
77 |   USE_GT_BBOX: true
78 | DEBUG:
79 |   DEBUG: true
80 |   SAVE_BATCH_IMAGES_GT: true
81 |   SAVE_BATCH_IMAGES_PRED: true
82 |   SAVE_HEATMAPS_GT: true
83 |   SAVE_HEATMAPS_PRED: true
84 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet/res50_384x288_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: true
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: 'output'
 9 | LOG_DIR: 'log'
10 | WORKERS: 24
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: false
15 |   DATASET: 'coco'
16 |   ROOT: 'data/coco/'
17 |   TEST_SET: 'val2017'
18 |   TRAIN_SET: 'train2017'
19 |   FLIP: true
20 |   ROT_FACTOR: 40
21 |   SCALE_FACTOR: 0.3
22 | MODEL:
23 |   NAME: 'pose_resnet'
24 |   PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth'
25 |   IMAGE_SIZE:
26 |   - 288
27 |   - 384
28 |   HEATMAP_SIZE:
29 |   - 72
30 |   - 96
31 |   SIGMA: 3
32 |   NUM_JOINTS: 17
33 |   TARGET_TYPE: 'gaussian'
34 |   EXTRA:
35 |     FINAL_CONV_KERNEL: 1
36 |     DECONV_WITH_BIAS: false
37 |     NUM_DECONV_LAYERS: 3
38 |     NUM_DECONV_FILTERS:
39 |     - 256
40 |     - 256
41 |     - 256
42 |     NUM_DECONV_KERNELS:
43 |     - 4
44 |     - 4
45 |     - 4
46 |     NUM_LAYERS: 50
47 | LOSS:
48 |   USE_TARGET_WEIGHT: true
49 | TRAIN:
50 |   BATCH_SIZE_PER_GPU: 32
51 |   SHUFFLE: true
52 |   BEGIN_EPOCH: 0
53 |   END_EPOCH: 140
54 |   OPTIMIZER: 'adam'
55 |   LR: 0.001
56 |   LR_FACTOR: 0.1
57 |   LR_STEP:
58 |   - 90
59 |   - 120
60 |   WD: 0.0001
61 |   GAMMA1: 0.99
62 |   GAMMA2: 0.0
63 |   MOMENTUM: 0.9
64 |   NESTEROV: false
65 | TEST:
66 |   BATCH_SIZE_PER_GPU: 32
67 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
68 |   BBOX_THRE: 1.0
69 |   IMAGE_THRE: 0.0
70 |   IN_VIS_THRE: 0.2
71 |   MODEL_FILE: ''
72 |   NMS_THRE: 1.0
73 |   OKS_THRE: 0.9
74 |   FLIP_TEST: true
75 |   POST_PROCESS: true
76 |   SHIFT_HEATMAP: true
77 |   USE_GT_BBOX: true
78 | DEBUG:
79 |   DEBUG: true
80 |   SAVE_BATCH_IMAGES_GT: true
81 |   SAVE_BATCH_IMAGES_PRED: true
82 |   SAVE_HEATMAPS_GT: true
83 |   SAVE_HEATMAPS_PRED: true
84 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet/res101_256x192_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: true
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: 'output'
 9 | LOG_DIR: 'log'
10 | WORKERS: 24
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: false
15 |   DATASET: 'coco'
16 |   ROOT: 'data/coco/'
17 |   TEST_SET: 'val2017'
18 |   TRAIN_SET: 'train2017'
19 |   FLIP: true
20 |   ROT_FACTOR: 40
21 |   SCALE_FACTOR: 0.3
22 | MODEL:
23 |   NAME: 'pose_resnet'
24 |   PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth'
25 |   IMAGE_SIZE:
26 |   - 192
27 |   - 256
28 |   HEATMAP_SIZE:
29 |   - 48
30 |   - 64
31 |   SIGMA: 2
32 |   NUM_JOINTS: 17
33 |   TARGET_TYPE: 'gaussian'
34 |   EXTRA:
35 |     FINAL_CONV_KERNEL: 1
36 |     DECONV_WITH_BIAS: false
37 |     NUM_DECONV_LAYERS: 3
38 |     NUM_DECONV_FILTERS:
39 |     - 256
40 |     - 256
41 |     - 256
42 |     NUM_DECONV_KERNELS:
43 |     - 4
44 |     - 4
45 |     - 4
46 |     NUM_LAYERS: 101
47 | LOSS:
48 |   USE_TARGET_WEIGHT: true
49 | TRAIN:
50 |   BATCH_SIZE_PER_GPU: 32
51 |   SHUFFLE: true
52 |   BEGIN_EPOCH: 0
53 |   END_EPOCH: 140
54 |   OPTIMIZER: 'adam'
55 |   LR: 0.001
56 |   LR_FACTOR: 0.1
57 |   LR_STEP:
58 |   - 90
59 |   - 120
60 |   WD: 0.0001
61 |   GAMMA1: 0.99
62 |   GAMMA2: 0.0
63 |   MOMENTUM: 0.9
64 |   NESTEROV: false
65 | TEST:
66 |   BATCH_SIZE_PER_GPU: 32
67 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
68 |   BBOX_THRE: 1.0
69 |   IMAGE_THRE: 0.0
70 |   IN_VIS_THRE: 0.2
71 |   MODEL_FILE: ''
72 |   NMS_THRE: 1.0
73 |   OKS_THRE: 0.9
74 |   FLIP_TEST: true
75 |   POST_PROCESS: true
76 |   SHIFT_HEATMAP: true
77 |   USE_GT_BBOX: true
78 | DEBUG:
79 |   DEBUG: true
80 |   SAVE_BATCH_IMAGES_GT: true
81 |   SAVE_BATCH_IMAGES_PRED: true
82 |   SAVE_HEATMAPS_GT: true
83 |   SAVE_HEATMAPS_PRED: true
84 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet/res101_384x288_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: true
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: 'output'
 9 | LOG_DIR: 'log'
10 | WORKERS: 24
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: false
15 |   DATASET: 'coco'
16 |   ROOT: 'data/coco/'
17 |   TEST_SET: 'val2017'
18 |   TRAIN_SET: 'train2017'
19 |   FLIP: true
20 |   ROT_FACTOR: 40
21 |   SCALE_FACTOR: 0.3
22 | MODEL:
23 |   NAME: 'pose_resnet'
24 |   PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth'
25 |   IMAGE_SIZE:
26 |   - 288
27 |   - 384
28 |   HEATMAP_SIZE:
29 |   - 72
30 |   - 96
31 |   SIGMA: 3
32 |   NUM_JOINTS: 17
33 |   TARGET_TYPE: 'gaussian'
34 |   EXTRA:
35 |     FINAL_CONV_KERNEL: 1
36 |     DECONV_WITH_BIAS: false
37 |     NUM_DECONV_LAYERS: 3
38 |     NUM_DECONV_FILTERS:
39 |     - 256
40 |     - 256
41 |     - 256
42 |     NUM_DECONV_KERNELS:
43 |     - 4
44 |     - 4
45 |     - 4
46 |     NUM_LAYERS: 101
47 | LOSS:
48 |   USE_TARGET_WEIGHT: true
49 | TRAIN:
50 |   BATCH_SIZE_PER_GPU: 32
51 |   SHUFFLE: true
52 |   BEGIN_EPOCH: 0
53 |   END_EPOCH: 140
54 |   OPTIMIZER: 'adam'
55 |   LR: 0.001
56 |   LR_FACTOR: 0.1
57 |   LR_STEP:
58 |   - 90
59 |   - 120
60 |   WD: 0.0001
61 |   GAMMA1: 0.99
62 |   GAMMA2: 0.0
63 |   MOMENTUM: 0.9
64 |   NESTEROV: false
65 | TEST:
66 |   BATCH_SIZE_PER_GPU: 32
67 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
68 |   BBOX_THRE: 1.0
69 |   IMAGE_THRE: 0.0
70 |   IN_VIS_THRE: 0.2
71 |   MODEL_FILE: ''
72 |   NMS_THRE: 1.0
73 |   OKS_THRE: 0.9
74 |   FLIP_TEST: true
75 |   POST_PROCESS: true
76 |   SHIFT_HEATMAP: true
77 |   USE_GT_BBOX: true
78 | DEBUG:
79 |   DEBUG: true
80 |   SAVE_BATCH_IMAGES_GT: true
81 |   SAVE_BATCH_IMAGES_PRED: true
82 |   SAVE_HEATMAPS_GT: true
83 |   SAVE_HEATMAPS_PRED: true
84 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet/res152_256x192_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: true
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: 'output'
 9 | LOG_DIR: 'log'
10 | WORKERS: 24
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: false
15 |   DATASET: 'coco'
16 |   ROOT: 'data/coco/'
17 |   TEST_SET: 'val2017'
18 |   TRAIN_SET: 'train2017'
19 |   FLIP: true
20 |   ROT_FACTOR: 40
21 |   SCALE_FACTOR: 0.3
22 | MODEL:
23 |   NAME: 'pose_resnet'
24 |   PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth'
25 |   IMAGE_SIZE:
26 |   - 192
27 |   - 256
28 |   HEATMAP_SIZE:
29 |   - 48
30 |   - 64
31 |   SIGMA: 2
32 |   NUM_JOINTS: 17
33 |   TARGET_TYPE: 'gaussian'
34 |   EXTRA:
35 |     FINAL_CONV_KERNEL: 1
36 |     DECONV_WITH_BIAS: false
37 |     NUM_DECONV_LAYERS: 3
38 |     NUM_DECONV_FILTERS:
39 |     - 256
40 |     - 256
41 |     - 256
42 |     NUM_DECONV_KERNELS:
43 |     - 4
44 |     - 4
45 |     - 4
46 |     NUM_LAYERS: 152
47 | LOSS:
48 |   USE_TARGET_WEIGHT: true
49 | TRAIN:
50 |   BATCH_SIZE_PER_GPU: 32
51 |   SHUFFLE: true
52 |   BEGIN_EPOCH: 0
53 |   END_EPOCH: 140
54 |   OPTIMIZER: 'adam'
55 |   LR: 0.001
56 |   LR_FACTOR: 0.1
57 |   LR_STEP:
58 |   - 90
59 |   - 120
60 |   WD: 0.0001
61 |   GAMMA1: 0.99
62 |   GAMMA2: 0.0
63 |   MOMENTUM: 0.9
64 |   NESTEROV: false
65 | TEST:
66 |   BATCH_SIZE_PER_GPU: 32
67 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
68 |   BBOX_THRE: 1.0
69 |   IMAGE_THRE: 0.0
70 |   IN_VIS_THRE: 0.2
71 |   MODEL_FILE: ''
72 |   NMS_THRE: 1.0
73 |   OKS_THRE: 0.9
74 |   FLIP_TEST: true
75 |   POST_PROCESS: true
76 |   SHIFT_HEATMAP: true
77 |   USE_GT_BBOX: true
78 | DEBUG:
79 |   DEBUG: true
80 |   SAVE_BATCH_IMAGES_GT: true
81 |   SAVE_BATCH_IMAGES_PRED: true
82 |   SAVE_HEATMAPS_GT: true
83 |   SAVE_HEATMAPS_PRED: true
84 | 


--------------------------------------------------------------------------------
/experiments/coco/resnet/res152_384x288_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: true
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: 'output'
 9 | LOG_DIR: 'log'
10 | WORKERS: 24
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: false
15 |   DATASET: 'coco'
16 |   ROOT: 'data/coco/'
17 |   TEST_SET: 'val2017'
18 |   TRAIN_SET: 'train2017'
19 |   FLIP: true
20 |   ROT_FACTOR: 40
21 |   SCALE_FACTOR: 0.3
22 | MODEL:
23 |   NAME: 'pose_resnet'
24 |   PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth'
25 |   IMAGE_SIZE:
26 |   - 288
27 |   - 384
28 |   HEATMAP_SIZE:
29 |   - 72
30 |   - 96
31 |   SIGMA: 3
32 |   NUM_JOINTS: 17
33 |   TARGET_TYPE: 'gaussian'
34 |   EXTRA:
35 |     FINAL_CONV_KERNEL: 1
36 |     DECONV_WITH_BIAS: false
37 |     NUM_DECONV_LAYERS: 3
38 |     NUM_DECONV_FILTERS:
39 |     - 256
40 |     - 256
41 |     - 256
42 |     NUM_DECONV_KERNELS:
43 |     - 4
44 |     - 4
45 |     - 4
46 |     NUM_LAYERS: 152
47 | LOSS:
48 |   USE_TARGET_WEIGHT: true
49 | TRAIN:
50 |   BATCH_SIZE_PER_GPU: 32
51 |   SHUFFLE: true
52 |   BEGIN_EPOCH: 0
53 |   END_EPOCH: 140
54 |   OPTIMIZER: 'adam'
55 |   LR: 0.001
56 |   LR_FACTOR: 0.1
57 |   LR_STEP:
58 |   - 90
59 |   - 120
60 |   WD: 0.0001
61 |   GAMMA1: 0.99
62 |   GAMMA2: 0.0
63 |   MOMENTUM: 0.9
64 |   NESTEROV: false
65 | TEST:
66 |   BATCH_SIZE_PER_GPU: 32
67 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
68 |   BBOX_THRE: 1.0
69 |   IMAGE_THRE: 0.0
70 |   IN_VIS_THRE: 0.2
71 |   MODEL_FILE: ''
72 |   NMS_THRE: 1.0
73 |   OKS_THRE: 0.9
74 |   FLIP_TEST: true
75 |   POST_PROCESS: true
76 |   SHIFT_HEATMAP: true
77 |   USE_GT_BBOX: true
78 | DEBUG:
79 |   DEBUG: true
80 |   SAVE_BATCH_IMAGES_GT: true
81 |   SAVE_BATCH_IMAGES_PRED: true
82 |   SAVE_HEATMAPS_GT: true
83 |   SAVE_HEATMAPS_PRED: true
84 | 


--------------------------------------------------------------------------------
/experiments/mpii/resnet/res101_256x256_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: true
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: 'output'
 9 | LOG_DIR: 'log'
10 | WORKERS: 24
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: false
15 |   DATASET: mpii
16 |   DATA_FORMAT: jpg
17 |   FLIP: true
18 |   NUM_JOINTS_HALF_BODY: 8
19 |   PROB_HALF_BODY: -1.0
20 |   ROOT: 'data/mpii/'
21 |   ROT_FACTOR: 30
22 |   SCALE_FACTOR: 0.25
23 |   TEST_SET: valid
24 |   TRAIN_SET: train
25 | MODEL:
26 |   NAME: 'pose_resnet'
27 |   PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth'
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 256
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 64
34 |   SIGMA: 2
35 |   NUM_JOINTS: 16
36 |   TARGET_TYPE: 'gaussian'
37 |   EXTRA:
38 |     FINAL_CONV_KERNEL: 1
39 |     DECONV_WITH_BIAS: false
40 |     NUM_DECONV_LAYERS: 3
41 |     NUM_DECONV_FILTERS:
42 |     - 256
43 |     - 256
44 |     - 256
45 |     NUM_DECONV_KERNELS:
46 |     - 4
47 |     - 4
48 |     - 4
49 |     NUM_LAYERS: 101
50 | LOSS:
51 |   USE_TARGET_WEIGHT: true
52 | TRAIN:
53 |   BATCH_SIZE_PER_GPU: 32
54 |   SHUFFLE: true
55 |   BEGIN_EPOCH: 0
56 |   END_EPOCH: 140
57 |   OPTIMIZER: 'adam'
58 |   LR: 0.001
59 |   LR_FACTOR: 0.1
60 |   LR_STEP:
61 |   - 90
62 |   - 120
63 |   WD: 0.0001
64 |   GAMMA1: 0.99
65 |   GAMMA2: 0.0
66 |   MOMENTUM: 0.9
67 |   NESTEROV: false
68 | TEST:
69 |   BATCH_SIZE_PER_GPU: 32
70 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
71 |   BBOX_THRE: 1.0
72 |   IMAGE_THRE: 0.0
73 |   IN_VIS_THRE: 0.2
74 |   MODEL_FILE: ''
75 |   NMS_THRE: 1.0
76 |   OKS_THRE: 0.9
77 |   FLIP_TEST: true
78 |   POST_PROCESS: true
79 |   SHIFT_HEATMAP: true
80 |   USE_GT_BBOX: true
81 | DEBUG:
82 |   DEBUG: true
83 |   SAVE_BATCH_IMAGES_GT: true
84 |   SAVE_BATCH_IMAGES_PRED: true
85 |   SAVE_HEATMAPS_GT: true
86 |   SAVE_HEATMAPS_PRED: true
87 | 


--------------------------------------------------------------------------------
/experiments/mpii/resnet/res152_256x256_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: true
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: 'output'
 9 | LOG_DIR: 'log'
10 | WORKERS: 24
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: false
15 |   DATASET: mpii
16 |   DATA_FORMAT: jpg
17 |   FLIP: true
18 |   NUM_JOINTS_HALF_BODY: 8
19 |   PROB_HALF_BODY: -1.0
20 |   ROOT: 'data/mpii/'
21 |   ROT_FACTOR: 30
22 |   SCALE_FACTOR: 0.25
23 |   TEST_SET: valid
24 |   TRAIN_SET: train
25 | MODEL:
26 |   NAME: 'pose_resnet'
27 |   PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth'
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 256
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 64
34 |   SIGMA: 2
35 |   NUM_JOINTS: 16
36 |   TARGET_TYPE: 'gaussian'
37 |   EXTRA:
38 |     FINAL_CONV_KERNEL: 1
39 |     DECONV_WITH_BIAS: false
40 |     NUM_DECONV_LAYERS: 3
41 |     NUM_DECONV_FILTERS:
42 |     - 256
43 |     - 256
44 |     - 256
45 |     NUM_DECONV_KERNELS:
46 |     - 4
47 |     - 4
48 |     - 4
49 |     NUM_LAYERS: 152
50 | LOSS:
51 |   USE_TARGET_WEIGHT: true
52 | TRAIN:
53 |   BATCH_SIZE_PER_GPU: 32
54 |   SHUFFLE: true
55 |   BEGIN_EPOCH: 0
56 |   END_EPOCH: 140
57 |   OPTIMIZER: 'adam'
58 |   LR: 0.001
59 |   LR_FACTOR: 0.1
60 |   LR_STEP:
61 |   - 90
62 |   - 120
63 |   WD: 0.0001
64 |   GAMMA1: 0.99
65 |   GAMMA2: 0.0
66 |   MOMENTUM: 0.9
67 |   NESTEROV: false
68 | TEST:
69 |   BATCH_SIZE_PER_GPU: 32
70 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
71 |   BBOX_THRE: 1.0
72 |   IMAGE_THRE: 0.0
73 |   IN_VIS_THRE: 0.2
74 |   MODEL_FILE: ''
75 |   NMS_THRE: 1.0
76 |   OKS_THRE: 0.9
77 |   FLIP_TEST: true
78 |   POST_PROCESS: true
79 |   SHIFT_HEATMAP: true
80 |   USE_GT_BBOX: true
81 | DEBUG:
82 |   DEBUG: true
83 |   SAVE_BATCH_IMAGES_GT: true
84 |   SAVE_BATCH_IMAGES_PRED: true
85 |   SAVE_HEATMAPS_GT: true
86 |   SAVE_HEATMAPS_PRED: true
87 | 


--------------------------------------------------------------------------------
/experiments/mpii/resnet/res50_256x256_d256x3_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: true
 2 | CUDNN:
 3 |   BENCHMARK: true
 4 |   DETERMINISTIC: false
 5 |   ENABLED: true
 6 | DATA_DIR: ''
 7 | GPUS: (0,1,2,3)
 8 | OUTPUT_DIR: 'output'
 9 | LOG_DIR: 'log'
10 | WORKERS: 24
11 | PRINT_FREQ: 100
12 | 
13 | DATASET:
14 |   COLOR_RGB: false
15 |   DATASET: mpii
16 |   DATA_FORMAT: jpg
17 |   FLIP: true
18 |   NUM_JOINTS_HALF_BODY: 8
19 |   PROB_HALF_BODY: -1.0
20 |   ROOT: 'data/mpii/'
21 |   ROT_FACTOR: 30
22 |   SCALE_FACTOR: 0.25
23 |   TEST_SET: valid
24 |   TRAIN_SET: train
25 | MODEL:
26 |   NAME: 'pose_resnet'
27 |   PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth'
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 256
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 64
34 |   SIGMA: 2
35 |   NUM_JOINTS: 16
36 |   TARGET_TYPE: 'gaussian'
37 |   EXTRA:
38 |     FINAL_CONV_KERNEL: 1
39 |     DECONV_WITH_BIAS: false
40 |     NUM_DECONV_LAYERS: 3
41 |     NUM_DECONV_FILTERS:
42 |     - 256
43 |     - 256
44 |     - 256
45 |     NUM_DECONV_KERNELS:
46 |     - 4
47 |     - 4
48 |     - 4
49 |     NUM_LAYERS: 50
50 | LOSS:
51 |   USE_TARGET_WEIGHT: true
52 | TRAIN:
53 |   BATCH_SIZE_PER_GPU: 32
54 |   SHUFFLE: true
55 |   BEGIN_EPOCH: 0
56 |   END_EPOCH: 140
57 |   OPTIMIZER: 'adam'
58 |   LR: 0.001
59 |   LR_FACTOR: 0.1
60 |   LR_STEP:
61 |   - 90
62 |   - 120
63 |   WD: 0.0001
64 |   GAMMA1: 0.99
65 |   GAMMA2: 0.0
66 |   MOMENTUM: 0.9
67 |   NESTEROV: false
68 | TEST:
69 |   BATCH_SIZE_PER_GPU: 32
70 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
71 |   BBOX_THRE: 1.0
72 |   IMAGE_THRE: 0.0
73 |   IN_VIS_THRE: 0.2
74 |   MODEL_FILE: ''
75 |   NMS_THRE: 1.0
76 |   OKS_THRE: 0.9
77 |   FLIP_TEST: true
78 |   POST_PROCESS: true
79 |   SHIFT_HEATMAP: true
80 |   USE_GT_BBOX: true
81 | DEBUG:
82 |   DEBUG: true
83 |   SAVE_BATCH_IMAGES_GT: true
84 |   SAVE_BATCH_IMAGES_PRED: true
85 |   SAVE_HEATMAPS_GT: true
86 |   SAVE_HEATMAPS_PRED: true
87 | 


--------------------------------------------------------------------------------
/lib/config/models.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from yacs.config import CfgNode as CN
12 | 
13 | 
14 | # pose_resnet related params
15 | POSE_RESNET = CN()
16 | POSE_RESNET.NUM_LAYERS = 50
17 | POSE_RESNET.DECONV_WITH_BIAS = False
18 | POSE_RESNET.NUM_DECONV_LAYERS = 3
19 | POSE_RESNET.NUM_DECONV_FILTERS = [256, 256, 256]
20 | POSE_RESNET.NUM_DECONV_KERNELS = [4, 4, 4]
21 | POSE_RESNET.FINAL_CONV_KERNEL = 1
22 | POSE_RESNET.PRETRAINED_LAYERS = ['*']
23 | 
24 | # pose_multi_resoluton_net related params
25 | POSE_HIGH_RESOLUTION_NET = CN()
26 | POSE_HIGH_RESOLUTION_NET.PRETRAINED_LAYERS = ['*']
27 | POSE_HIGH_RESOLUTION_NET.STEM_INPLANES = 64
28 | POSE_HIGH_RESOLUTION_NET.FINAL_CONV_KERNEL = 1
29 | 
30 | POSE_HIGH_RESOLUTION_NET.STAGE2 = CN()
31 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_MODULES = 1
32 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BRANCHES = 2
33 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BLOCKS = [4, 4]
34 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_CHANNELS = [32, 64]
35 | POSE_HIGH_RESOLUTION_NET.STAGE2.BLOCK = 'BASIC'
36 | POSE_HIGH_RESOLUTION_NET.STAGE2.FUSE_METHOD = 'SUM'
37 | 
38 | POSE_HIGH_RESOLUTION_NET.STAGE3 = CN()
39 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_MODULES = 1
40 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BRANCHES = 3
41 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BLOCKS = [4, 4, 4]
42 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_CHANNELS = [32, 64, 128]
43 | POSE_HIGH_RESOLUTION_NET.STAGE3.BLOCK = 'BASIC'
44 | POSE_HIGH_RESOLUTION_NET.STAGE3.FUSE_METHOD = 'SUM'
45 | 
46 | POSE_HIGH_RESOLUTION_NET.STAGE4 = CN()
47 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_MODULES = 1
48 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BRANCHES = 4
49 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
50 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
51 | POSE_HIGH_RESOLUTION_NET.STAGE4.BLOCK = 'BASIC'
52 | POSE_HIGH_RESOLUTION_NET.STAGE4.FUSE_METHOD = 'SUM'
53 | 
54 | 
55 | MODEL_EXTRAS = {
56 |     'pose_resnet': POSE_RESNET,
57 |     'pose_high_resolution_net': POSE_HIGH_RESOLUTION_NET,
58 | }
59 | 


--------------------------------------------------------------------------------
/lib/utils/zipreader.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import os
12 | import zipfile
13 | import xml.etree.ElementTree as ET
14 | 
15 | import cv2
16 | import numpy as np
17 | 
18 | _im_zfile = []
19 | _xml_path_zip = []
20 | _xml_zfile = []
21 | 
22 | 
23 | def imread(filename, flags=cv2.IMREAD_COLOR):
24 |     global _im_zfile
25 |     path = filename
26 |     pos_at = path.index('@')
27 |     if pos_at == -1:
28 |         print("character '@' is not found from the given path '%s'"%(path))
29 |         assert 0
30 |     path_zip = path[0: pos_at]
31 |     path_img = path[pos_at + 2:]
32 |     if not os.path.isfile(path_zip):
33 |         print("zip file '%s' is not found"%(path_zip))
34 |         assert 0
35 |     for i in range(len(_im_zfile)):
36 |         if _im_zfile[i]['path'] == path_zip:
37 |             data = _im_zfile[i]['zipfile'].read(path_img)
38 |             return cv2.imdecode(np.frombuffer(data, np.uint8), flags)
39 | 
40 |     _im_zfile.append({
41 |         'path': path_zip,
42 |         'zipfile': zipfile.ZipFile(path_zip, 'r')
43 |     })
44 |     data = _im_zfile[-1]['zipfile'].read(path_img)
45 | 
46 |     return cv2.imdecode(np.frombuffer(data, np.uint8), flags)
47 | 
48 | 
49 | def xmlread(filename):
50 |     global _xml_path_zip
51 |     global _xml_zfile
52 |     path = filename
53 |     pos_at = path.index('@')
54 |     if pos_at == -1:
55 |         print("character '@' is not found from the given path '%s'"%(path))
56 |         assert 0
57 |     path_zip = path[0: pos_at]
58 |     path_xml = path[pos_at + 2:]
59 |     if not os.path.isfile(path_zip):
60 |         print("zip file '%s' is not found"%(path_zip))
61 |         assert 0
62 |     for i in xrange(len(_xml_path_zip)):
63 |         if _xml_path_zip[i] == path_zip:
64 |             data = _xml_zfile[i].open(path_xml)
65 |             return ET.fromstring(data.read())
66 |     _xml_path_zip.append(path_zip)
67 |     print("read new xml file '%s'"%(path_zip))
68 |     _xml_zfile.append(zipfile.ZipFile(path_zip, 'r'))
69 |     data = _xml_zfile[-1].open(path_xml)
70 |     return ET.fromstring(data.read())
71 | 


--------------------------------------------------------------------------------
/lib/core/evaluate.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import numpy as np
12 | 
13 | from core.inference import get_max_preds
14 | 
15 | 
16 | def calc_dists(preds, target, normalize):
17 |     preds = preds.astype(np.float32)
18 |     target = target.astype(np.float32)
19 |     dists = np.zeros((preds.shape[1], preds.shape[0]))
20 |     for n in range(preds.shape[0]):
21 |         for c in range(preds.shape[1]):
22 |             if target[n, c, 0] > 1 and target[n, c, 1] > 1:
23 |                 normed_preds = preds[n, c, :] / normalize[n]
24 |                 normed_targets = target[n, c, :] / normalize[n]
25 |                 dists[c, n] = np.linalg.norm(normed_preds - normed_targets)
26 |             else:
27 |                 dists[c, n] = -1
28 |     return dists
29 | 
30 | 
31 | def dist_acc(dists, thr=0.5):
32 |     ''' Return percentage below threshold while ignoring values with a -1 '''
33 |     dist_cal = np.not_equal(dists, -1)
34 |     num_dist_cal = dist_cal.sum()
35 |     if num_dist_cal > 0:
36 |         return np.less(dists[dist_cal], thr).sum() * 1.0 / num_dist_cal
37 |     else:
38 |         return -1
39 | 
40 | 
41 | def accuracy(output, target, hm_type='gaussian', thr=0.5):
42 |     '''
43 |     Calculate accuracy according to PCK,
44 |     but uses ground truth heatmap rather than x,y locations
45 |     First value to be returned is average accuracy across 'idxs',
46 |     followed by individual accuracies
47 |     '''
48 |     idx = list(range(output.shape[1]))
49 |     norm = 1.0
50 |     if hm_type == 'gaussian':
51 |         pred, _ = get_max_preds(output)
52 |         target, _ = get_max_preds(target)
53 |         h = output.shape[2]
54 |         w = output.shape[3]
55 |         norm = np.ones((pred.shape[0], 2)) * np.array([h, w]) / 10
56 |     dists = calc_dists(pred, target, norm)
57 | 
58 |     acc = np.zeros((len(idx) + 1))
59 |     avg_acc = 0
60 |     cnt = 0
61 | 
62 |     for i in range(len(idx)):
63 |         acc[i + 1] = dist_acc(dists[idx[i]])
64 |         if acc[i + 1] >= 0:
65 |             avg_acc = avg_acc + acc[i + 1]
66 |             cnt += 1
67 | 
68 |     avg_acc = avg_acc / cnt if cnt != 0 else 0
69 |     if cnt != 0:
70 |         acc[0] = avg_acc
71 |     return acc, avg_acc, cnt, pred
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/experiments/mpii/hrnet/w32_256x256_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: true
  2 | CUDNN:
  3 |   BENCHMARK: true
  4 |   DETERMINISTIC: false
  5 |   ENABLED: true
  6 | DATA_DIR: ''
  7 | GPUS: (0,1,2,3)
  8 | OUTPUT_DIR: 'output'
  9 | LOG_DIR: 'log'
 10 | WORKERS: 24
 11 | PRINT_FREQ: 100
 12 | 
 13 | DATASET:
 14 |   COLOR_RGB: true
 15 |   DATASET: mpii
 16 |   DATA_FORMAT: jpg
 17 |   FLIP: true
 18 |   NUM_JOINTS_HALF_BODY: 8
 19 |   PROB_HALF_BODY: -1.0
 20 |   ROOT: 'data/mpii/'
 21 |   ROT_FACTOR: 30
 22 |   SCALE_FACTOR: 0.25
 23 |   TEST_SET: valid
 24 |   TRAIN_SET: train
 25 | MODEL:
 26 |   INIT_WEIGHTS: true
 27 |   NAME: pose_hrnet
 28 |   NUM_JOINTS: 16
 29 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
 30 |   TARGET_TYPE: gaussian
 31 |   IMAGE_SIZE:
 32 |   - 256
 33 |   - 256
 34 |   HEATMAP_SIZE:
 35 |   - 64
 36 |   - 64
 37 |   SIGMA: 2
 38 |   EXTRA:
 39 |     PRETRAINED_LAYERS:
 40 |     - 'conv1'
 41 |     - 'bn1'
 42 |     - 'conv2'
 43 |     - 'bn2'
 44 |     - 'layer1'
 45 |     - 'transition1'
 46 |     - 'stage2'
 47 |     - 'transition2'
 48 |     - 'stage3'
 49 |     - 'transition3'
 50 |     - 'stage4'
 51 |     FINAL_CONV_KERNEL: 1
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 4
 58 |       - 4
 59 |       NUM_CHANNELS:
 60 |       - 32
 61 |       - 64
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 4
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 4
 69 |       - 4
 70 |       - 4
 71 |       NUM_CHANNELS:
 72 |       - 32
 73 |       - 64
 74 |       - 128
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 3
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 4
 82 |       - 4
 83 |       - 4
 84 |       - 4
 85 |       NUM_CHANNELS:
 86 |       - 32
 87 |       - 64
 88 |       - 128
 89 |       - 256
 90 |       FUSE_METHOD: SUM
 91 | LOSS:
 92 |   USE_TARGET_WEIGHT: true
 93 | TRAIN:
 94 |   BATCH_SIZE_PER_GPU: 32
 95 |   SHUFFLE: true
 96 |   BEGIN_EPOCH: 0
 97 |   END_EPOCH: 210
 98 |   OPTIMIZER: adam
 99 |   LR: 0.001
100 |   LR_FACTOR: 0.1
101 |   LR_STEP:
102 |   - 170
103 |   - 200
104 |   WD: 0.0001
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 |   MOMENTUM: 0.9
108 |   NESTEROV: false
109 | TEST:
110 |   BATCH_SIZE_PER_GPU: 32
111 |   MODEL_FILE: ''
112 |   FLIP_TEST: true
113 |   POST_PROCESS: true
114 |   SHIFT_HEATMAP: true
115 | DEBUG:
116 |   DEBUG: true
117 |   SAVE_BATCH_IMAGES_GT: true
118 |   SAVE_BATCH_IMAGES_PRED: true
119 |   SAVE_HEATMAPS_GT: true
120 |   SAVE_HEATMAPS_PRED: true
121 | 


--------------------------------------------------------------------------------
/experiments/mpii/hrnet/w48_256x256_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: true
  2 | CUDNN:
  3 |   BENCHMARK: true
  4 |   DETERMINISTIC: false
  5 |   ENABLED: true
  6 | DATA_DIR: ''
  7 | GPUS: (0,1,2,3)
  8 | OUTPUT_DIR: 'output'
  9 | LOG_DIR: 'log'
 10 | WORKERS: 24
 11 | PRINT_FREQ: 100
 12 | 
 13 | DATASET:
 14 |   COLOR_RGB: true
 15 |   DATASET: mpii
 16 |   DATA_FORMAT: jpg
 17 |   FLIP: true
 18 |   NUM_JOINTS_HALF_BODY: 8
 19 |   PROB_HALF_BODY: -1.0
 20 |   ROOT: 'data/mpii/'
 21 |   ROT_FACTOR: 30
 22 |   SCALE_FACTOR: 0.25
 23 |   TEST_SET: valid
 24 |   TRAIN_SET: train
 25 | MODEL:
 26 |   INIT_WEIGHTS: true
 27 |   NAME: pose_hrnet
 28 |   NUM_JOINTS: 16
 29 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth'
 30 |   TARGET_TYPE: gaussian
 31 |   IMAGE_SIZE:
 32 |   - 256
 33 |   - 256
 34 |   HEATMAP_SIZE:
 35 |   - 64
 36 |   - 64
 37 |   SIGMA: 2
 38 |   EXTRA:
 39 |     PRETRAINED_LAYERS:
 40 |     - 'conv1'
 41 |     - 'bn1'
 42 |     - 'conv2'
 43 |     - 'bn2'
 44 |     - 'layer1'
 45 |     - 'transition1'
 46 |     - 'stage2'
 47 |     - 'transition2'
 48 |     - 'stage3'
 49 |     - 'transition3'
 50 |     - 'stage4'
 51 |     FINAL_CONV_KERNEL: 1
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 4
 58 |       - 4
 59 |       NUM_CHANNELS:
 60 |       - 48
 61 |       - 96
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 4
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 4
 69 |       - 4
 70 |       - 4
 71 |       NUM_CHANNELS:
 72 |       - 48
 73 |       - 96
 74 |       - 192
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 3
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 4
 82 |       - 4
 83 |       - 4
 84 |       - 4
 85 |       NUM_CHANNELS:
 86 |       - 48
 87 |       - 96
 88 |       - 192
 89 |       - 384
 90 |       FUSE_METHOD: SUM
 91 | LOSS:
 92 |   USE_TARGET_WEIGHT: true
 93 | TRAIN:
 94 |   BATCH_SIZE_PER_GPU: 32
 95 |   SHUFFLE: true
 96 |   BEGIN_EPOCH: 0
 97 |   END_EPOCH: 210
 98 |   OPTIMIZER: adam
 99 |   LR: 0.001
100 |   LR_FACTOR: 0.1
101 |   LR_STEP:
102 |   - 170
103 |   - 200
104 |   WD: 0.0001
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 |   MOMENTUM: 0.9
108 |   NESTEROV: false
109 | TEST:
110 |   BATCH_SIZE_PER_GPU: 32
111 |   MODEL_FILE: ''
112 |   FLIP_TEST: true
113 |   POST_PROCESS: true
114 |   SHIFT_HEATMAP: true
115 | DEBUG:
116 |   DEBUG: true
117 |   SAVE_BATCH_IMAGES_GT: true
118 |   SAVE_BATCH_IMAGES_PRED: true
119 |   SAVE_HEATMAPS_GT: true
120 |   SAVE_HEATMAPS_PRED: true
121 | 


--------------------------------------------------------------------------------
/lib/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import numpy as np
12 | cimport numpy as np
13 | 
14 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
15 |     return a if a >= b else b
16 | 
17 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
18 |     return a if a <= b else b
19 | 
20 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
21 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
22 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
23 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
24 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
25 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
26 | 
27 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
28 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1].astype('i')
29 | 
30 |     cdef int ndets = dets.shape[0]
31 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
32 |             np.zeros((ndets), dtype=np.int)
33 | 
34 |     # nominal indices
35 |     cdef int _i, _j
36 |     # sorted indices
37 |     cdef int i, j
38 |     # temp variables for box i's (the box currently under consideration)
39 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
40 |     # variables for computing overlap with box j (lower scoring box)
41 |     cdef np.float32_t xx1, yy1, xx2, yy2
42 |     cdef np.float32_t w, h
43 |     cdef np.float32_t inter, ovr
44 | 
45 |     keep = []
46 |     for _i in range(ndets):
47 |         i = order[_i]
48 |         if suppressed[i] == 1:
49 |             continue
50 |         keep.append(i)
51 |         ix1 = x1[i]
52 |         iy1 = y1[i]
53 |         ix2 = x2[i]
54 |         iy2 = y2[i]
55 |         iarea = areas[i]
56 |         for _j in range(_i + 1, ndets):
57 |             j = order[_j]
58 |             if suppressed[j] == 1:
59 |                 continue
60 |             xx1 = max(ix1, x1[j])
61 |             yy1 = max(iy1, y1[j])
62 |             xx2 = min(ix2, x2[j])
63 |             yy2 = min(iy2, y2[j])
64 |             w = max(0.0, xx2 - xx1 + 1)
65 |             h = max(0.0, yy2 - yy1 + 1)
66 |             inter = w * h
67 |             ovr = inter / (iarea + areas[j] - inter)
68 |             if ovr >= thresh:
69 |                 suppressed[j] = 1
70 | 
71 |     return keep
72 | 


--------------------------------------------------------------------------------
/demo/README.md:
--------------------------------------------------------------------------------
 1 | # Inference hrnet
 2 | 
 3 | Inferencing the deep-high-resolution-net.pytoch without using Docker. 
 4 | 
 5 | ## Prep
 6 | 1. Download the researchers' pretrained pose estimator from [google drive](https://drive.google.com/drive/folders/1hOTihvbyIxsm5ygDpbUuJ7O_tzv4oXjC?usp=sharing) to this directory under `models/`
 7 | 2. Put the video file you'd like to infer on in this directory under `videos`
 8 | 3. (OPTIONAL) build the docker container in this directory with `./build-docker.sh` (this can take time because it involves compiling opencv)
 9 | 4. update the `inference-config.yaml` file to reflect the number of GPUs you have available and which trained model you want to use.
10 | 
11 | ## Running the Model
12 | ### 1. Running on the video
13 | ```
14 | python demo/inference.py --cfg demo/inference-config.yaml \
15 |     --videoFile ../../multi_people.mp4 \
16 |     --writeBoxFrames \
17 |     --outputDir output \
18 |     TEST.MODEL_FILE ../models/pytorch/pose_coco/pose_hrnet_w32_256x192.pth 
19 | 
20 | ```
21 | 
22 | The above command will create a video under *output* directory and a lot of pose image under *output/pose* directory. 
23 | Even with usage of GPU (GTX1080 in my case), the person detection will take nearly **0.06 sec**, the person pose match will
24 |  take nearly **0.07 sec**. In total. inference time per frame will be **0.13 sec**, nearly 10fps. So if you prefer a real-time (fps >= 20) 
25 |  pose estimation then you should try other approach.
26 | 
27 | **===Result===**
28 | 
29 | Some output images are as:
30 | 
31 | ![1 person](inference_1.jpg)
32 | Fig: 1 person inference
33 | 
34 | ![3 person](inference_3.jpg)
35 | Fig: 3 person inference
36 | 
37 | ![3 person](inference_5.jpg)
38 | Fig: 3 person inference
39 | 
40 | ### 2. Demo with more common functions
41 | Remember to update` TEST.MODEL_FILE` in `demo/inference-config.yaml `according to your model path.
42 | 
43 | `demo.py` provides the following functions:
44 | 
45 | - use `--webcam` when the input is a real-time camera.
46 | - use `--video [video-path]`  when the input is a video.
47 | - use `--image [image-path]` when the input is an image.
48 | - use `--write` to save the image, camera or video result.
49 | - use `--showFps` to show the fps (this fps includes the detection part).
50 | - draw connections between joints.
51 | 
52 | #### (1) the input is a real-time carema
53 | ```python
54 | python demo/demo.py --webcam --showFps --write
55 | ```
56 | 
57 | #### (2) the input is a video
58 | ```python
59 | python demo/demo.py --video test.mp4 --showFps --write
60 | ```
61 | #### (3) the input is a image
62 | 
63 | ```python
64 | python demo/demo.py --image test.jpg --showFps --write
65 | ```
66 | 
67 | **===Result===**
68 | 
69 | ![show_fps](inference_6.jpg)
70 | 
71 | Fig: show fps
72 | 
73 | ![multi-people](inference_7.jpg)
74 | 
75 | Fig: multi-people


--------------------------------------------------------------------------------
/experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: true
  2 | CUDNN:
  3 |   BENCHMARK: true
  4 |   DETERMINISTIC: false
  5 |   ENABLED: true
  6 | DATA_DIR: ''
  7 | GPUS: (0,1,2,3)
  8 | OUTPUT_DIR: 'output'
  9 | LOG_DIR: 'log'
 10 | WORKERS: 24
 11 | PRINT_FREQ: 100
 12 | 
 13 | DATASET:
 14 |   COLOR_RGB: true
 15 |   DATASET: 'coco'
 16 |   DATA_FORMAT: jpg
 17 |   FLIP: true
 18 |   NUM_JOINTS_HALF_BODY: 8
 19 |   PROB_HALF_BODY: 0.3
 20 |   ROOT: 'data/coco/'
 21 |   ROT_FACTOR: 45
 22 |   SCALE_FACTOR: 0.35
 23 |   TEST_SET: 'val2017'
 24 |   TRAIN_SET: 'train2017'
 25 | MODEL:
 26 |   INIT_WEIGHTS: true
 27 |   NAME: pose_hrnet
 28 |   NUM_JOINTS: 17
 29 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
 30 |   TARGET_TYPE: gaussian
 31 |   IMAGE_SIZE:
 32 |   - 192
 33 |   - 256
 34 |   HEATMAP_SIZE:
 35 |   - 48
 36 |   - 64
 37 |   SIGMA: 2
 38 |   EXTRA:
 39 |     PRETRAINED_LAYERS:
 40 |     - 'conv1'
 41 |     - 'bn1'
 42 |     - 'conv2'
 43 |     - 'bn2'
 44 |     - 'layer1'
 45 |     - 'transition1'
 46 |     - 'stage2'
 47 |     - 'transition2'
 48 |     - 'stage3'
 49 |     - 'transition3'
 50 |     - 'stage4'
 51 |     FINAL_CONV_KERNEL: 1
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 4
 58 |       - 4
 59 |       NUM_CHANNELS:
 60 |       - 32
 61 |       - 64
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 4
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 4
 69 |       - 4
 70 |       - 4
 71 |       NUM_CHANNELS:
 72 |       - 32
 73 |       - 64
 74 |       - 128
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 3
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 4
 82 |       - 4
 83 |       - 4
 84 |       - 4
 85 |       NUM_CHANNELS:
 86 |       - 32
 87 |       - 64
 88 |       - 128
 89 |       - 256
 90 |       FUSE_METHOD: SUM
 91 | LOSS:
 92 |   USE_TARGET_WEIGHT: true
 93 | TRAIN:
 94 |   BATCH_SIZE_PER_GPU: 32
 95 |   SHUFFLE: true
 96 |   BEGIN_EPOCH: 0
 97 |   END_EPOCH: 210
 98 |   OPTIMIZER: adam
 99 |   LR: 0.001
100 |   LR_FACTOR: 0.1
101 |   LR_STEP:
102 |   - 170
103 |   - 200
104 |   WD: 0.0001
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 |   MOMENTUM: 0.9
108 |   NESTEROV: false
109 | TEST:
110 |   BATCH_SIZE_PER_GPU: 32
111 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
112 |   BBOX_THRE: 1.0
113 |   IMAGE_THRE: 0.0
114 |   IN_VIS_THRE: 0.2
115 |   MODEL_FILE: ''
116 |   NMS_THRE: 1.0
117 |   OKS_THRE: 0.9
118 |   USE_GT_BBOX: true
119 |   FLIP_TEST: true
120 |   POST_PROCESS: true
121 |   SHIFT_HEATMAP: true
122 | DEBUG:
123 |   DEBUG: true
124 |   SAVE_BATCH_IMAGES_GT: true
125 |   SAVE_BATCH_IMAGES_PRED: true
126 |   SAVE_HEATMAPS_GT: true
127 |   SAVE_HEATMAPS_PRED: true
128 | 


--------------------------------------------------------------------------------
/experiments/coco/hrnet/w32_384x288_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: true
  2 | CUDNN:
  3 |   BENCHMARK: true
  4 |   DETERMINISTIC: false
  5 |   ENABLED: true
  6 | DATA_DIR: ''
  7 | GPUS: (0,1,2,3)
  8 | OUTPUT_DIR: 'output'
  9 | LOG_DIR: 'log'
 10 | WORKERS: 24
 11 | PRINT_FREQ: 100
 12 | 
 13 | DATASET:
 14 |   COLOR_RGB: true
 15 |   DATASET: 'coco'
 16 |   DATA_FORMAT: jpg
 17 |   FLIP: true
 18 |   NUM_JOINTS_HALF_BODY: 8
 19 |   PROB_HALF_BODY: 0.3
 20 |   ROOT: 'data/coco/'
 21 |   ROT_FACTOR: 45
 22 |   SCALE_FACTOR: 0.35
 23 |   TEST_SET: 'val2017'
 24 |   TRAIN_SET: 'train2017'
 25 | MODEL:
 26 |   INIT_WEIGHTS: true
 27 |   NAME: pose_hrnet
 28 |   NUM_JOINTS: 17
 29 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
 30 |   TARGET_TYPE: gaussian
 31 |   IMAGE_SIZE:
 32 |   - 288
 33 |   - 384
 34 |   HEATMAP_SIZE:
 35 |   - 72
 36 |   - 96
 37 |   SIGMA: 3
 38 |   EXTRA:
 39 |     PRETRAINED_LAYERS:
 40 |     - 'conv1'
 41 |     - 'bn1'
 42 |     - 'conv2'
 43 |     - 'bn2'
 44 |     - 'layer1'
 45 |     - 'transition1'
 46 |     - 'stage2'
 47 |     - 'transition2'
 48 |     - 'stage3'
 49 |     - 'transition3'
 50 |     - 'stage4'
 51 |     FINAL_CONV_KERNEL: 1
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 4
 58 |       - 4
 59 |       NUM_CHANNELS:
 60 |       - 32
 61 |       - 64
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 4
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 4
 69 |       - 4
 70 |       - 4
 71 |       NUM_CHANNELS:
 72 |       - 32
 73 |       - 64
 74 |       - 128
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 3
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 4
 82 |       - 4
 83 |       - 4
 84 |       - 4
 85 |       NUM_CHANNELS:
 86 |       - 32
 87 |       - 64
 88 |       - 128
 89 |       - 256
 90 |       FUSE_METHOD: SUM
 91 | LOSS:
 92 |   USE_TARGET_WEIGHT: true
 93 | TRAIN:
 94 |   BATCH_SIZE_PER_GPU: 32
 95 |   SHUFFLE: true
 96 |   BEGIN_EPOCH: 0
 97 |   END_EPOCH: 210
 98 |   OPTIMIZER: adam
 99 |   LR: 0.001
100 |   LR_FACTOR: 0.1
101 |   LR_STEP:
102 |   - 170
103 |   - 200
104 |   WD: 0.0001
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 |   MOMENTUM: 0.9
108 |   NESTEROV: false
109 | TEST:
110 |   BATCH_SIZE_PER_GPU: 32
111 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
112 |   BBOX_THRE: 1.0
113 |   IMAGE_THRE: 0.0
114 |   IN_VIS_THRE: 0.2
115 |   MODEL_FILE: ''
116 |   NMS_THRE: 1.0
117 |   OKS_THRE: 0.9
118 |   USE_GT_BBOX: true
119 |   FLIP_TEST: true
120 |   POST_PROCESS: true
121 |   SHIFT_HEATMAP: true
122 | DEBUG:
123 |   DEBUG: true
124 |   SAVE_BATCH_IMAGES_GT: true
125 |   SAVE_BATCH_IMAGES_PRED: true
126 |   SAVE_HEATMAPS_GT: true
127 |   SAVE_HEATMAPS_PRED: true
128 | 


--------------------------------------------------------------------------------
/experiments/coco/hrnet/w48_256x192_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: true
  2 | CUDNN:
  3 |   BENCHMARK: true
  4 |   DETERMINISTIC: false
  5 |   ENABLED: true
  6 | DATA_DIR: ''
  7 | GPUS: (0,1,2,3)
  8 | OUTPUT_DIR: 'output'
  9 | LOG_DIR: 'log'
 10 | WORKERS: 24
 11 | PRINT_FREQ: 100
 12 | 
 13 | DATASET:
 14 |   COLOR_RGB: true
 15 |   DATASET: 'coco'
 16 |   DATA_FORMAT: jpg
 17 |   FLIP: true
 18 |   NUM_JOINTS_HALF_BODY: 8
 19 |   PROB_HALF_BODY: 0.3
 20 |   ROOT: 'data/coco/'
 21 |   ROT_FACTOR: 45
 22 |   SCALE_FACTOR: 0.35
 23 |   TEST_SET: 'val2017'
 24 |   TRAIN_SET: 'train2017'
 25 | MODEL:
 26 |   INIT_WEIGHTS: true
 27 |   NAME: pose_hrnet
 28 |   NUM_JOINTS: 17
 29 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth'
 30 |   TARGET_TYPE: gaussian
 31 |   IMAGE_SIZE:
 32 |   - 192
 33 |   - 256
 34 |   HEATMAP_SIZE:
 35 |   - 48
 36 |   - 64
 37 |   SIGMA: 2
 38 |   EXTRA:
 39 |     PRETRAINED_LAYERS:
 40 |     - 'conv1'
 41 |     - 'bn1'
 42 |     - 'conv2'
 43 |     - 'bn2'
 44 |     - 'layer1'
 45 |     - 'transition1'
 46 |     - 'stage2'
 47 |     - 'transition2'
 48 |     - 'stage3'
 49 |     - 'transition3'
 50 |     - 'stage4'
 51 |     FINAL_CONV_KERNEL: 1
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 4
 58 |       - 4
 59 |       NUM_CHANNELS:
 60 |       - 48
 61 |       - 96
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 4
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 4
 69 |       - 4
 70 |       - 4
 71 |       NUM_CHANNELS:
 72 |       - 48
 73 |       - 96
 74 |       - 192
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 3
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 4
 82 |       - 4
 83 |       - 4
 84 |       - 4
 85 |       NUM_CHANNELS:
 86 |       - 48
 87 |       - 96
 88 |       - 192
 89 |       - 384
 90 |       FUSE_METHOD: SUM
 91 | LOSS:
 92 |   USE_TARGET_WEIGHT: true
 93 | TRAIN:
 94 |   BATCH_SIZE_PER_GPU: 32
 95 |   SHUFFLE: true
 96 |   BEGIN_EPOCH: 0
 97 |   END_EPOCH: 210
 98 |   OPTIMIZER: adam
 99 |   LR: 0.001
100 |   LR_FACTOR: 0.1
101 |   LR_STEP:
102 |   - 170
103 |   - 200
104 |   WD: 0.0001
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 |   MOMENTUM: 0.9
108 |   NESTEROV: false
109 | TEST:
110 |   BATCH_SIZE_PER_GPU: 32
111 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
112 |   BBOX_THRE: 1.0
113 |   IMAGE_THRE: 0.0
114 |   IN_VIS_THRE: 0.2
115 |   MODEL_FILE: ''
116 |   NMS_THRE: 1.0
117 |   OKS_THRE: 0.9
118 |   USE_GT_BBOX: true
119 |   FLIP_TEST: true
120 |   POST_PROCESS: true
121 |   SHIFT_HEATMAP: true
122 | DEBUG:
123 |   DEBUG: true
124 |   SAVE_BATCH_IMAGES_GT: true
125 |   SAVE_BATCH_IMAGES_PRED: true
126 |   SAVE_HEATMAPS_GT: true
127 |   SAVE_HEATMAPS_PRED: true
128 | 


--------------------------------------------------------------------------------
/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: true
  2 | CUDNN:
  3 |   BENCHMARK: true
  4 |   DETERMINISTIC: false
  5 |   ENABLED: true
  6 | DATA_DIR: ''
  7 | GPUS: (0,1,2,3)
  8 | OUTPUT_DIR: 'output'
  9 | LOG_DIR: 'log'
 10 | WORKERS: 24
 11 | PRINT_FREQ: 100
 12 | 
 13 | DATASET:
 14 |   COLOR_RGB: true
 15 |   DATASET: 'coco'
 16 |   DATA_FORMAT: jpg
 17 |   FLIP: true
 18 |   NUM_JOINTS_HALF_BODY: 8
 19 |   PROB_HALF_BODY: 0.3
 20 |   ROOT: 'data/coco/'
 21 |   ROT_FACTOR: 45
 22 |   SCALE_FACTOR: 0.35
 23 |   TEST_SET: 'val2017'
 24 |   TRAIN_SET: 'train2017'
 25 | MODEL:
 26 |   INIT_WEIGHTS: true
 27 |   NAME: pose_hrnet
 28 |   NUM_JOINTS: 17
 29 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth'
 30 |   TARGET_TYPE: gaussian
 31 |   IMAGE_SIZE:
 32 |   - 288
 33 |   - 384
 34 |   HEATMAP_SIZE:
 35 |   - 72
 36 |   - 96
 37 |   SIGMA: 3
 38 |   EXTRA:
 39 |     PRETRAINED_LAYERS:
 40 |     - 'conv1'
 41 |     - 'bn1'
 42 |     - 'conv2'
 43 |     - 'bn2'
 44 |     - 'layer1'
 45 |     - 'transition1'
 46 |     - 'stage2'
 47 |     - 'transition2'
 48 |     - 'stage3'
 49 |     - 'transition3'
 50 |     - 'stage4'
 51 |     FINAL_CONV_KERNEL: 1
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 4
 58 |       - 4
 59 |       NUM_CHANNELS:
 60 |       - 48
 61 |       - 96
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 4
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 4
 69 |       - 4
 70 |       - 4
 71 |       NUM_CHANNELS:
 72 |       - 48
 73 |       - 96
 74 |       - 192
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 3
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 4
 82 |       - 4
 83 |       - 4
 84 |       - 4
 85 |       NUM_CHANNELS:
 86 |       - 48
 87 |       - 96
 88 |       - 192
 89 |       - 384
 90 |       FUSE_METHOD: SUM
 91 | LOSS:
 92 |   USE_TARGET_WEIGHT: true
 93 | TRAIN:
 94 |   BATCH_SIZE_PER_GPU: 24
 95 |   SHUFFLE: true
 96 |   BEGIN_EPOCH: 0
 97 |   END_EPOCH: 210
 98 |   OPTIMIZER: adam
 99 |   LR: 0.001
100 |   LR_FACTOR: 0.1
101 |   LR_STEP:
102 |   - 170
103 |   - 200
104 |   WD: 0.0001
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 |   MOMENTUM: 0.9
108 |   NESTEROV: false
109 | TEST:
110 |   BATCH_SIZE_PER_GPU: 24
111 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
112 |   BBOX_THRE: 1.0
113 |   IMAGE_THRE: 0.0
114 |   IN_VIS_THRE: 0.2
115 |   MODEL_FILE: ''
116 |   NMS_THRE: 1.0
117 |   OKS_THRE: 0.9
118 |   USE_GT_BBOX: true
119 |   FLIP_TEST: true
120 |   POST_PROCESS: true
121 |   SHIFT_HEATMAP: true
122 | DEBUG:
123 |   DEBUG: true
124 |   SAVE_BATCH_IMAGES_GT: true
125 |   SAVE_BATCH_IMAGES_PRED: true
126 |   SAVE_HEATMAPS_GT: true
127 |   SAVE_HEATMAPS_PRED: true
128 | 


--------------------------------------------------------------------------------
/demo/inference-config.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: true
  2 | CUDNN:
  3 |   BENCHMARK: true
  4 |   DETERMINISTIC: false
  5 |   ENABLED: true
  6 | DATA_DIR: ''
  7 | GPUS: (0,)
  8 | OUTPUT_DIR: 'output'
  9 | LOG_DIR: 'log'
 10 | WORKERS: 24
 11 | PRINT_FREQ: 100
 12 | 
 13 | DATASET:
 14 |   COLOR_RGB: true
 15 |   DATASET: 'coco'
 16 |   DATA_FORMAT: jpg
 17 |   FLIP: true
 18 |   NUM_JOINTS_HALF_BODY: 8
 19 |   PROB_HALF_BODY: 0.3
 20 |   ROOT: 'data/coco/'
 21 |   ROT_FACTOR: 45
 22 |   SCALE_FACTOR: 0.35
 23 |   TEST_SET: 'val2017'
 24 |   TRAIN_SET: 'train2017'
 25 | MODEL:
 26 |   INIT_WEIGHTS: true
 27 |   NAME: pose_hrnet
 28 |   NUM_JOINTS: 17
 29 |   PRETRAINED: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth'
 30 |   TARGET_TYPE: gaussian
 31 |   IMAGE_SIZE:
 32 |   - 288
 33 |   - 384
 34 |   HEATMAP_SIZE:
 35 |   - 72
 36 |   - 96
 37 |   SIGMA: 3
 38 |   EXTRA:
 39 |     PRETRAINED_LAYERS:
 40 |     - 'conv1'
 41 |     - 'bn1'
 42 |     - 'conv2'
 43 |     - 'bn2'
 44 |     - 'layer1'
 45 |     - 'transition1'
 46 |     - 'stage2'
 47 |     - 'transition2'
 48 |     - 'stage3'
 49 |     - 'transition3'
 50 |     - 'stage4'
 51 |     FINAL_CONV_KERNEL: 1
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 4
 58 |       - 4
 59 |       NUM_CHANNELS:
 60 |       - 32
 61 |       - 64
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 4
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 4
 69 |       - 4
 70 |       - 4
 71 |       NUM_CHANNELS:
 72 |       - 32
 73 |       - 64
 74 |       - 128
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 3
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 4
 82 |       - 4
 83 |       - 4
 84 |       - 4
 85 |       NUM_CHANNELS:
 86 |       - 32
 87 |       - 64
 88 |       - 128
 89 |       - 256
 90 |       FUSE_METHOD: SUM
 91 | LOSS:
 92 |   USE_TARGET_WEIGHT: true
 93 | TRAIN:
 94 |   BATCH_SIZE_PER_GPU: 32
 95 |   SHUFFLE: true
 96 |   BEGIN_EPOCH: 0
 97 |   END_EPOCH: 210
 98 |   OPTIMIZER: adam
 99 |   LR: 0.001
100 |   LR_FACTOR: 0.1
101 |   LR_STEP:
102 |   - 170
103 |   - 200
104 |   WD: 0.0001
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 |   MOMENTUM: 0.9
108 |   NESTEROV: false
109 | TEST:
110 |   BATCH_SIZE_PER_GPU: 32
111 |   COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
112 |   BBOX_THRE: 1.0
113 |   IMAGE_THRE: 0.0
114 |   IN_VIS_THRE: 0.2
115 |   MODEL_FILE: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth'
116 |   NMS_THRE: 1.0
117 |   OKS_THRE: 0.9
118 |   USE_GT_BBOX: true
119 |   FLIP_TEST: true
120 |   POST_PROCESS: true
121 |   SHIFT_HEATMAP: true
122 | DEBUG:
123 |   DEBUG: true
124 |   SAVE_BATCH_IMAGES_GT: true
125 |   SAVE_BATCH_IMAGES_PRED: true
126 |   SAVE_HEATMAPS_GT: true
127 |   SAVE_HEATMAPS_PRED: true
128 | 


--------------------------------------------------------------------------------
/lib/core/inference.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import math
12 | 
13 | import numpy as np
14 | 
15 | from utils.transforms import transform_preds
16 | 
17 | 
18 | def get_max_preds(batch_heatmaps):
19 |     '''
20 |     get predictions from score maps
21 |     heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
22 |     '''
23 |     assert isinstance(batch_heatmaps, np.ndarray), \
24 |         'batch_heatmaps should be numpy.ndarray'
25 |     assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim'
26 | 
27 |     batch_size = batch_heatmaps.shape[0]
28 |     num_joints = batch_heatmaps.shape[1]
29 |     width = batch_heatmaps.shape[3]
30 |     heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
31 |     idx = np.argmax(heatmaps_reshaped, 2)
32 |     maxvals = np.amax(heatmaps_reshaped, 2)
33 | 
34 |     maxvals = maxvals.reshape((batch_size, num_joints, 1))
35 |     idx = idx.reshape((batch_size, num_joints, 1))
36 | 
37 |     preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
38 | 
39 |     preds[:, :, 0] = (preds[:, :, 0]) % width
40 |     preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
41 | 
42 |     pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
43 |     pred_mask = pred_mask.astype(np.float32)
44 | 
45 |     preds *= pred_mask
46 |     return preds, maxvals
47 | 
48 | 
49 | def get_final_preds(config, batch_heatmaps, center, scale):
50 |     coords, maxvals = get_max_preds(batch_heatmaps)
51 | 
52 |     heatmap_height = batch_heatmaps.shape[2]
53 |     heatmap_width = batch_heatmaps.shape[3]
54 | 
55 |     # post-processing
56 |     if config.TEST.POST_PROCESS:
57 |         for n in range(coords.shape[0]):
58 |             for p in range(coords.shape[1]):
59 |                 hm = batch_heatmaps[n][p]
60 |                 px = int(math.floor(coords[n][p][0] + 0.5))
61 |                 py = int(math.floor(coords[n][p][1] + 0.5))
62 |                 if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1:
63 |                     diff = np.array(
64 |                         [
65 |                             hm[py][px+1] - hm[py][px-1],
66 |                             hm[py+1][px]-hm[py-1][px]
67 |                         ]
68 |                     )
69 |                     coords[n][p] += np.sign(diff) * .25
70 | 
71 |     preds = coords.copy()
72 | 
73 |     # Transform back
74 |     for i in range(coords.shape[0]):
75 |         preds[i] = transform_preds(
76 |             coords[i], center[i], scale[i], [heatmap_width, heatmap_height]
77 |         )
78 | 
79 |     return preds, maxvals
80 | 


--------------------------------------------------------------------------------
/lib/core/loss.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import torch
12 | import torch.nn as nn
13 | 
14 | 
15 | class JointsMSELoss(nn.Module):
16 |     def __init__(self, use_target_weight):
17 |         super(JointsMSELoss, self).__init__()
18 |         self.criterion = nn.MSELoss(reduction='mean')
19 |         self.use_target_weight = use_target_weight
20 | 
21 |     def forward(self, output, target, target_weight):
22 |         batch_size = output.size(0)
23 |         num_joints = output.size(1)
24 |         heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1)
25 |         heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1)
26 |         loss = 0
27 | 
28 |         for idx in range(num_joints):
29 |             heatmap_pred = heatmaps_pred[idx].squeeze()
30 |             heatmap_gt = heatmaps_gt[idx].squeeze()
31 |             if self.use_target_weight:
32 |                 loss += 0.5 * self.criterion(
33 |                     heatmap_pred.mul(target_weight[:, idx]),
34 |                     heatmap_gt.mul(target_weight[:, idx])
35 |                 )
36 |             else:
37 |                 loss += 0.5 * self.criterion(heatmap_pred, heatmap_gt)
38 | 
39 |         return loss / num_joints
40 | 
41 | 
42 | class JointsOHKMMSELoss(nn.Module):
43 |     def __init__(self, use_target_weight, topk=8):
44 |         super(JointsOHKMMSELoss, self).__init__()
45 |         self.criterion = nn.MSELoss(reduction='none')
46 |         self.use_target_weight = use_target_weight
47 |         self.topk = topk
48 | 
49 |     def ohkm(self, loss):
50 |         ohkm_loss = 0.
51 |         for i in range(loss.size()[0]):
52 |             sub_loss = loss[i]
53 |             topk_val, topk_idx = torch.topk(
54 |                 sub_loss, k=self.topk, dim=0, sorted=False
55 |             )
56 |             tmp_loss = torch.gather(sub_loss, 0, topk_idx)
57 |             ohkm_loss += torch.sum(tmp_loss) / self.topk
58 |         ohkm_loss /= loss.size()[0]
59 |         return ohkm_loss
60 | 
61 |     def forward(self, output, target, target_weight):
62 |         batch_size = output.size(0)
63 |         num_joints = output.size(1)
64 |         heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1)
65 |         heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1)
66 | 
67 |         loss = []
68 |         for idx in range(num_joints):
69 |             heatmap_pred = heatmaps_pred[idx].squeeze()
70 |             heatmap_gt = heatmaps_gt[idx].squeeze()
71 |             if self.use_target_weight:
72 |                 loss.append(0.5 * self.criterion(
73 |                     heatmap_pred.mul(target_weight[:, idx]),
74 |                     heatmap_gt.mul(target_weight[:, idx])
75 |                 ))
76 |             else:
77 |                 loss.append(
78 |                     0.5 * self.criterion(heatmap_pred, heatmap_gt)
79 |                 )
80 | 
81 |         loss = [l.mean(dim=1).unsqueeze(dim=1) for l in loss]
82 |         loss = torch.cat(loss, dim=1)
83 | 
84 |         return self.ohkm(loss)
85 | 


--------------------------------------------------------------------------------
/demo/Dockerfile:
--------------------------------------------------------------------------------
  1 | FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu16.04
  2 | 
  3 | ENV OPENCV_VERSION="3.4.6"
  4 | 
  5 | # Basic toolchain
  6 | RUN apt-get update && apt-get install -y \
  7 |         apt-utils \
  8 |         build-essential \
  9 |         git \
 10 |         wget \
 11 |         unzip \
 12 |         yasm \
 13 |         pkg-config \
 14 |         libcurl4-openssl-dev \
 15 |         zlib1g-dev \
 16 |         htop \
 17 |         cmake \
 18 |         nano \
 19 |         python3-pip \
 20 |         python3-dev \
 21 |         python3-tk \
 22 |         libx264-dev \
 23 |     && cd /usr/local/bin \
 24 |     && ln -s /usr/bin/python3 python \
 25 |     && pip3 install --upgrade pip \
 26 |     && apt-get autoremove -y
 27 | 
 28 | # Getting OpenCV dependencies available with apt
 29 | RUN apt-get update && apt-get install -y \
 30 |         libeigen3-dev \
 31 |         libjpeg-dev \
 32 |         libpng-dev \
 33 |         libtiff-dev \
 34 |         libjasper-dev \
 35 |         libswscale-dev \
 36 |         libavcodec-dev \
 37 |         libavformat-dev && \
 38 |     apt-get autoremove -y
 39 | 
 40 | # Getting other dependencies
 41 | RUN apt-get update && apt-get install -y \
 42 |         cppcheck \
 43 |         graphviz \
 44 |         doxygen \
 45 |         p7zip-full \
 46 |         libdlib18 \
 47 |         libdlib-dev && \
 48 |     apt-get autoremove -y
 49 | 
 50 | 
 51 | # Install OpenCV + OpenCV contrib (takes forever)
 52 | RUN mkdir -p /tmp && \
 53 |     cd /tmp && \
 54 |     wget --no-check-certificate -O opencv.zip https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.zip && \
 55 |     wget --no-check-certificate -O opencv_contrib.zip https://github.com/opencv/opencv_contrib/archive/${OPENCV_VERSION}.zip && \
 56 |     unzip opencv.zip && \
 57 |     unzip opencv_contrib.zip && \
 58 |     mkdir opencv-${OPENCV_VERSION}/build && \
 59 |     cd opencv-${OPENCV_VERSION}/build && \
 60 |     cmake -D CMAKE_BUILD_TYPE=RELEASE \
 61 |         -D CMAKE_INSTALL_PREFIX=/usr/local \
 62 |         -D WITH_CUDA=ON \
 63 |         -D CUDA_FAST_MATH=1 \
 64 |         -D WITH_CUBLAS=1 \
 65 |         -D WITH_FFMPEG=ON \
 66 |         -D WITH_OPENCL=ON \
 67 |         -D WITH_V4L=ON \
 68 |         -D WITH_OPENGL=ON \
 69 |         -D OPENCV_EXTRA_MODULES_PATH=/tmp/opencv_contrib-${OPENCV_VERSION}/modules \
 70 |         .. && \
 71 |     make -j$(nproc) && \
 72 |     make install && \
 73 |     echo "/usr/local/lib" > /etc/ld.so.conf.d/opencv.conf && \
 74 |     ldconfig && \
 75 |     cd /tmp && \
 76 |     rm -rf opencv-${OPENCV_VERSION} opencv.zip opencv_contrib-${OPENCV_VERSION} opencv_contrib.zip && \
 77 |     cd /
 78 | 
 79 | # Compile and install ffmpeg from source
 80 | RUN git clone https://github.com/FFmpeg/FFmpeg /root/ffmpeg && \
 81 |     cd /root/ffmpeg && \
 82 |     ./configure --enable-gpl --enable-libx264 --enable-nonfree --disable-shared --extra-cflags=-I/usr/local/include && \
 83 |     make -j8 && make install -j8
 84 | 
 85 | # clone deep-high-resolution-net
 86 | ARG POSE_ROOT=/pose_root
 87 | RUN git clone https://github.com/leoxiaobin/deep-high-resolution-net.pytorch.git $POSE_ROOT
 88 | WORKDIR $POSE_ROOT
 89 | RUN mkdir output && mkdir log
 90 | 
 91 | RUN pip3 install -r requirements.txt && \
 92 |     pip3 install torch==1.1.0 \
 93 |     torchvision==0.3.0 \
 94 |     opencv-python \
 95 |     pillow==6.2.1
 96 | 
 97 | # build deep-high-resolution-net lib
 98 | WORKDIR $POSE_ROOT/lib
 99 | RUN make
100 | 
101 | # install COCO API
102 | ARG COCOAPI=/cocoapi
103 | RUN git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
104 | WORKDIR $COCOAPI/PythonAPI
105 | # Install into global site-packages
106 | RUN make install
107 | 
108 | # download fastrrnn pretrained model for person detection
109 | RUN python -c "import torchvision; model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True); model.eval()"
110 | 
111 | COPY inference.py $POSE_ROOT/tools
112 | COPY inference-config.yaml $POSE_ROOT/
113 | 


--------------------------------------------------------------------------------
/lib/utils/transforms.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import numpy as np
 12 | import cv2
 13 | 
 14 | 
 15 | def flip_back(output_flipped, matched_parts):
 16 |     '''
 17 |     ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width)
 18 |     '''
 19 |     assert output_flipped.ndim == 4,\
 20 |         'output_flipped should be [batch_size, num_joints, height, width]'
 21 | 
 22 |     output_flipped = output_flipped[:, :, :, ::-1]
 23 | 
 24 |     for pair in matched_parts:
 25 |         tmp = output_flipped[:, pair[0], :, :].copy()
 26 |         output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
 27 |         output_flipped[:, pair[1], :, :] = tmp
 28 | 
 29 |     return output_flipped
 30 | 
 31 | 
 32 | def fliplr_joints(joints, joints_vis, width, matched_parts):
 33 |     """
 34 |     flip coords
 35 |     """
 36 |     # Flip horizontal
 37 |     joints[:, 0] = width - joints[:, 0] - 1
 38 | 
 39 |     # Change left-right parts
 40 |     for pair in matched_parts:
 41 |         joints[pair[0], :], joints[pair[1], :] = \
 42 |             joints[pair[1], :], joints[pair[0], :].copy()
 43 |         joints_vis[pair[0], :], joints_vis[pair[1], :] = \
 44 |             joints_vis[pair[1], :], joints_vis[pair[0], :].copy()
 45 | 
 46 |     return joints*joints_vis, joints_vis
 47 | 
 48 | 
 49 | def transform_preds(coords, center, scale, output_size):
 50 |     target_coords = np.zeros(coords.shape)
 51 |     trans = get_affine_transform(center, scale, 0, output_size, inv=1)
 52 |     for p in range(coords.shape[0]):
 53 |         target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
 54 |     return target_coords
 55 | 
 56 | 
 57 | def get_affine_transform(
 58 |         center, scale, rot, output_size,
 59 |         shift=np.array([0, 0], dtype=np.float32), inv=0
 60 | ):
 61 |     if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
 62 |         print(scale)
 63 |         scale = np.array([scale, scale])
 64 | 
 65 |     scale_tmp = scale * 200.0
 66 |     src_w = scale_tmp[0]
 67 |     dst_w = output_size[0]
 68 |     dst_h = output_size[1]
 69 | 
 70 |     rot_rad = np.pi * rot / 180
 71 |     src_dir = get_dir([0, src_w * -0.5], rot_rad)
 72 |     dst_dir = np.array([0, dst_w * -0.5], np.float32)
 73 | 
 74 |     src = np.zeros((3, 2), dtype=np.float32)
 75 |     dst = np.zeros((3, 2), dtype=np.float32)
 76 |     src[0, :] = center + scale_tmp * shift
 77 |     src[1, :] = center + src_dir + scale_tmp * shift
 78 |     dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
 79 |     dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
 80 | 
 81 |     src[2:, :] = get_3rd_point(src[0, :], src[1, :])
 82 |     dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
 83 | 
 84 |     if inv:
 85 |         trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
 86 |     else:
 87 |         trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
 88 | 
 89 |     return trans
 90 | 
 91 | 
 92 | def affine_transform(pt, t):
 93 |     new_pt = np.array([pt[0], pt[1], 1.]).T
 94 |     new_pt = np.dot(t, new_pt)
 95 |     return new_pt[:2]
 96 | 
 97 | 
 98 | def get_3rd_point(a, b):
 99 |     direct = a - b
100 |     return b + np.array([-direct[1], direct[0]], dtype=np.float32)
101 | 
102 | 
103 | def get_dir(src_point, rot_rad):
104 |     sn, cs = np.sin(rot_rad), np.cos(rot_rad)
105 | 
106 |     src_result = [0, 0]
107 |     src_result[0] = src_point[0] * cs - src_point[1] * sn
108 |     src_result[1] = src_point[0] * sn + src_point[1] * cs
109 | 
110 |     return src_result
111 | 
112 | 
113 | def crop(img, center, scale, output_size, rot=0):
114 |     trans = get_affine_transform(center, scale, rot, output_size)
115 | 
116 |     dst_img = cv2.warpAffine(
117 |         img, trans, (int(output_size[0]), int(output_size[1])),
118 |         flags=cv2.INTER_LINEAR
119 |     )
120 | 
121 |     return dst_img
122 | 


--------------------------------------------------------------------------------
/lib/config/default.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # ------------------------------------------------------------------------------
  3 | # Copyright (c) Microsoft
  4 | # Licensed under the MIT License.
  5 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  6 | # ------------------------------------------------------------------------------
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | import os
 13 | 
 14 | from yacs.config import CfgNode as CN
 15 | 
 16 | 
 17 | _C = CN()
 18 | 
 19 | _C.OUTPUT_DIR = ''
 20 | _C.LOG_DIR = ''
 21 | _C.DATA_DIR = ''
 22 | _C.GPUS = (0,)
 23 | _C.WORKERS = 4
 24 | _C.PRINT_FREQ = 20
 25 | _C.AUTO_RESUME = False
 26 | _C.PIN_MEMORY = True
 27 | _C.RANK = 0
 28 | 
 29 | # Cudnn related params
 30 | _C.CUDNN = CN()
 31 | _C.CUDNN.BENCHMARK = True
 32 | _C.CUDNN.DETERMINISTIC = False
 33 | _C.CUDNN.ENABLED = True
 34 | 
 35 | # common params for NETWORK
 36 | _C.MODEL = CN()
 37 | _C.MODEL.NAME = 'pose_hrnet'
 38 | _C.MODEL.INIT_WEIGHTS = True
 39 | _C.MODEL.PRETRAINED = ''
 40 | _C.MODEL.NUM_JOINTS = 17
 41 | _C.MODEL.TAG_PER_JOINT = True
 42 | _C.MODEL.TARGET_TYPE = 'gaussian'
 43 | _C.MODEL.IMAGE_SIZE = [256, 256]  # width * height, ex: 192 * 256
 44 | _C.MODEL.HEATMAP_SIZE = [64, 64]  # width * height, ex: 24 * 32
 45 | _C.MODEL.SIGMA = 2
 46 | _C.MODEL.EXTRA = CN(new_allowed=True)
 47 | 
 48 | _C.LOSS = CN()
 49 | _C.LOSS.USE_OHKM = False
 50 | _C.LOSS.TOPK = 8
 51 | _C.LOSS.USE_TARGET_WEIGHT = True
 52 | _C.LOSS.USE_DIFFERENT_JOINTS_WEIGHT = False
 53 | 
 54 | # DATASET related params
 55 | _C.DATASET = CN()
 56 | _C.DATASET.ROOT = ''
 57 | _C.DATASET.DATASET = 'mpii'
 58 | _C.DATASET.TRAIN_SET = 'train'
 59 | _C.DATASET.TEST_SET = 'valid'
 60 | _C.DATASET.DATA_FORMAT = 'jpg'
 61 | _C.DATASET.HYBRID_JOINTS_TYPE = ''
 62 | _C.DATASET.SELECT_DATA = False
 63 | 
 64 | # training data augmentation
 65 | _C.DATASET.FLIP = True
 66 | _C.DATASET.SCALE_FACTOR = 0.25
 67 | _C.DATASET.ROT_FACTOR = 30
 68 | _C.DATASET.PROB_HALF_BODY = 0.0
 69 | _C.DATASET.NUM_JOINTS_HALF_BODY = 8
 70 | _C.DATASET.COLOR_RGB = False
 71 | 
 72 | # train
 73 | _C.TRAIN = CN()
 74 | 
 75 | _C.TRAIN.LR_FACTOR = 0.1
 76 | _C.TRAIN.LR_STEP = [90, 110]
 77 | _C.TRAIN.LR = 0.001
 78 | 
 79 | _C.TRAIN.OPTIMIZER = 'adam'
 80 | _C.TRAIN.MOMENTUM = 0.9
 81 | _C.TRAIN.WD = 0.0001
 82 | _C.TRAIN.NESTEROV = False
 83 | _C.TRAIN.GAMMA1 = 0.99
 84 | _C.TRAIN.GAMMA2 = 0.0
 85 | 
 86 | _C.TRAIN.BEGIN_EPOCH = 0
 87 | _C.TRAIN.END_EPOCH = 140
 88 | 
 89 | _C.TRAIN.RESUME = False
 90 | _C.TRAIN.CHECKPOINT = ''
 91 | 
 92 | _C.TRAIN.BATCH_SIZE_PER_GPU = 32
 93 | _C.TRAIN.SHUFFLE = True
 94 | 
 95 | # testing
 96 | _C.TEST = CN()
 97 | 
 98 | # size of images for each device
 99 | _C.TEST.BATCH_SIZE_PER_GPU = 32
100 | # Test Model Epoch
101 | _C.TEST.FLIP_TEST = False
102 | _C.TEST.POST_PROCESS = False
103 | _C.TEST.SHIFT_HEATMAP = False
104 | 
105 | _C.TEST.USE_GT_BBOX = False
106 | 
107 | # nms
108 | _C.TEST.IMAGE_THRE = 0.1
109 | _C.TEST.NMS_THRE = 0.6
110 | _C.TEST.SOFT_NMS = False
111 | _C.TEST.OKS_THRE = 0.5
112 | _C.TEST.IN_VIS_THRE = 0.0
113 | _C.TEST.COCO_BBOX_FILE = ''
114 | _C.TEST.BBOX_THRE = 1.0
115 | _C.TEST.MODEL_FILE = ''
116 | 
117 | # debug
118 | _C.DEBUG = CN()
119 | _C.DEBUG.DEBUG = False
120 | _C.DEBUG.SAVE_BATCH_IMAGES_GT = False
121 | _C.DEBUG.SAVE_BATCH_IMAGES_PRED = False
122 | _C.DEBUG.SAVE_HEATMAPS_GT = False
123 | _C.DEBUG.SAVE_HEATMAPS_PRED = False
124 | 
125 | 
126 | def update_config(cfg, args):
127 |     cfg.defrost()
128 |     cfg.merge_from_file(args.cfg)
129 |     cfg.merge_from_list(args.opts)
130 | 
131 |     if args.modelDir:
132 |         cfg.OUTPUT_DIR = args.modelDir
133 | 
134 |     if args.logDir:
135 |         cfg.LOG_DIR = args.logDir
136 | 
137 |     if args.dataDir:
138 |         cfg.DATA_DIR = args.dataDir
139 | 
140 |     cfg.DATASET.ROOT = os.path.join(
141 |         cfg.DATA_DIR, cfg.DATASET.ROOT
142 |     )
143 | 
144 |     cfg.MODEL.PRETRAINED = os.path.join(
145 |         cfg.DATA_DIR, cfg.MODEL.PRETRAINED
146 |     )
147 | 
148 |     if cfg.TEST.MODEL_FILE:
149 |         cfg.TEST.MODEL_FILE = os.path.join(
150 |             cfg.DATA_DIR, cfg.TEST.MODEL_FILE
151 |         )
152 | 
153 |     cfg.freeze()
154 | 
155 | 
156 | if __name__ == '__main__':
157 |     import sys
158 |     with open(sys.argv[1], 'w') as f:
159 |         print(_C, file=f)
160 | 
161 | 


--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # pose.pytorch
  3 | # Copyright (c) 2018-present Microsoft
  4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
  5 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  6 | # ------------------------------------------------------------------------------
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | import argparse
 13 | import os
 14 | import pprint
 15 | 
 16 | import torch
 17 | import torch.nn.parallel
 18 | import torch.backends.cudnn as cudnn
 19 | import torch.optim
 20 | import torch.utils.data
 21 | import torch.utils.data.distributed
 22 | import torchvision.transforms as transforms
 23 | 
 24 | import _init_paths
 25 | from config import cfg
 26 | from config import update_config
 27 | from core.loss import JointsMSELoss
 28 | from core.function import validate
 29 | from utils.utils import create_logger
 30 | 
 31 | import dataset
 32 | import models
 33 | 
 34 | 
 35 | def parse_args():
 36 |     parser = argparse.ArgumentParser(description='Train keypoints network')
 37 |     # general
 38 |     parser.add_argument('--cfg',
 39 |                         help='experiment configure file name',
 40 |                         required=True,
 41 |                         type=str)
 42 | 
 43 |     parser.add_argument('opts',
 44 |                         help="Modify config options using the command-line",
 45 |                         default=None,
 46 |                         nargs=argparse.REMAINDER)
 47 | 
 48 |     parser.add_argument('--modelDir',
 49 |                         help='model directory',
 50 |                         type=str,
 51 |                         default='')
 52 |     parser.add_argument('--logDir',
 53 |                         help='log directory',
 54 |                         type=str,
 55 |                         default='')
 56 |     parser.add_argument('--dataDir',
 57 |                         help='data directory',
 58 |                         type=str,
 59 |                         default='')
 60 |     parser.add_argument('--prevModelDir',
 61 |                         help='prev Model directory',
 62 |                         type=str,
 63 |                         default='')
 64 | 
 65 |     args = parser.parse_args()
 66 |     return args
 67 | 
 68 | 
 69 | def main():
 70 |     args = parse_args()
 71 |     update_config(cfg, args)
 72 | 
 73 |     logger, final_output_dir, tb_log_dir = create_logger(
 74 |         cfg, args.cfg, 'valid')
 75 | 
 76 |     logger.info(pprint.pformat(args))
 77 |     logger.info(cfg)
 78 | 
 79 |     # cudnn related setting
 80 |     cudnn.benchmark = cfg.CUDNN.BENCHMARK
 81 |     torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
 82 |     torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
 83 | 
 84 |     model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
 85 |         cfg, is_train=False
 86 |     )
 87 | 
 88 |     if cfg.TEST.MODEL_FILE:
 89 |         logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
 90 |         model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
 91 |     else:
 92 |         model_state_file = os.path.join(
 93 |             final_output_dir, 'final_state.pth'
 94 |         )
 95 |         logger.info('=> loading model from {}'.format(model_state_file))
 96 |         model.load_state_dict(torch.load(model_state_file))
 97 | 
 98 |     model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
 99 | 
100 |     # define loss function (criterion) and optimizer
101 |     criterion = JointsMSELoss(
102 |         use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT
103 |     ).cuda()
104 | 
105 |     # Data loading code
106 |     normalize = transforms.Normalize(
107 |         mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
108 |     )
109 |     valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)(
110 |         cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
111 |         transforms.Compose([
112 |             transforms.ToTensor(),
113 |             normalize,
114 |         ])
115 |     )
116 |     valid_loader = torch.utils.data.DataLoader(
117 |         valid_dataset,
118 |         batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS),
119 |         shuffle=False,
120 |         num_workers=cfg.WORKERS,
121 |         pin_memory=True
122 |     )
123 | 
124 |     # evaluate on validation set
125 |     validate(cfg, valid_loader, valid_dataset, model, criterion,
126 |              final_output_dir, tb_log_dir)
127 | 
128 | 
129 | if __name__ == '__main__':
130 |     main()
131 | 


--------------------------------------------------------------------------------
/lib/utils/vis.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import math
 12 | 
 13 | import numpy as np
 14 | import torchvision
 15 | import cv2
 16 | 
 17 | from core.inference import get_max_preds
 18 | 
 19 | 
 20 | def save_batch_image_with_joints(batch_image, batch_joints, batch_joints_vis,
 21 |                                  file_name, nrow=8, padding=2):
 22 |     '''
 23 |     batch_image: [batch_size, channel, height, width]
 24 |     batch_joints: [batch_size, num_joints, 3],
 25 |     batch_joints_vis: [batch_size, num_joints, 1],
 26 |     }
 27 |     '''
 28 |     grid = torchvision.utils.make_grid(batch_image, nrow, padding, True)
 29 |     ndarr = grid.mul(255).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy()
 30 |     ndarr = ndarr.copy()
 31 | 
 32 |     nmaps = batch_image.size(0)
 33 |     xmaps = min(nrow, nmaps)
 34 |     ymaps = int(math.ceil(float(nmaps) / xmaps))
 35 |     height = int(batch_image.size(2) + padding)
 36 |     width = int(batch_image.size(3) + padding)
 37 |     k = 0
 38 |     for y in range(ymaps):
 39 |         for x in range(xmaps):
 40 |             if k >= nmaps:
 41 |                 break
 42 |             joints = batch_joints[k]
 43 |             joints_vis = batch_joints_vis[k]
 44 | 
 45 |             for joint, joint_vis in zip(joints, joints_vis):
 46 |                 joint[0] = x * width + padding + joint[0]
 47 |                 joint[1] = y * height + padding + joint[1]
 48 |                 if joint_vis[0]:
 49 |                     cv2.circle(ndarr, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2)
 50 |             k = k + 1
 51 |     cv2.imwrite(file_name, ndarr)
 52 | 
 53 | 
 54 | def save_batch_heatmaps(batch_image, batch_heatmaps, file_name,
 55 |                         normalize=True):
 56 |     '''
 57 |     batch_image: [batch_size, channel, height, width]
 58 |     batch_heatmaps: ['batch_size, num_joints, height, width]
 59 |     file_name: saved file name
 60 |     '''
 61 |     if normalize:
 62 |         batch_image = batch_image.clone()
 63 |         min = float(batch_image.min())
 64 |         max = float(batch_image.max())
 65 | 
 66 |         batch_image.add_(-min).div_(max - min + 1e-5)
 67 | 
 68 |     batch_size = batch_heatmaps.size(0)
 69 |     num_joints = batch_heatmaps.size(1)
 70 |     heatmap_height = batch_heatmaps.size(2)
 71 |     heatmap_width = batch_heatmaps.size(3)
 72 | 
 73 |     grid_image = np.zeros((batch_size*heatmap_height,
 74 |                            (num_joints+1)*heatmap_width,
 75 |                            3),
 76 |                           dtype=np.uint8)
 77 | 
 78 |     preds, maxvals = get_max_preds(batch_heatmaps.detach().cpu().numpy())
 79 | 
 80 |     for i in range(batch_size):
 81 |         image = batch_image[i].mul(255)\
 82 |                               .clamp(0, 255)\
 83 |                               .byte()\
 84 |                               .permute(1, 2, 0)\
 85 |                               .cpu().numpy()
 86 |         heatmaps = batch_heatmaps[i].mul(255)\
 87 |                                     .clamp(0, 255)\
 88 |                                     .byte()\
 89 |                                     .cpu().numpy()
 90 | 
 91 |         resized_image = cv2.resize(image,
 92 |                                    (int(heatmap_width), int(heatmap_height)))
 93 | 
 94 |         height_begin = heatmap_height * i
 95 |         height_end = heatmap_height * (i + 1)
 96 |         for j in range(num_joints):
 97 |             cv2.circle(resized_image,
 98 |                        (int(preds[i][j][0]), int(preds[i][j][1])),
 99 |                        1, [0, 0, 255], 1)
100 |             heatmap = heatmaps[j, :, :]
101 |             colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
102 |             masked_image = colored_heatmap*0.7 + resized_image*0.3
103 |             cv2.circle(masked_image,
104 |                        (int(preds[i][j][0]), int(preds[i][j][1])),
105 |                        1, [0, 0, 255], 1)
106 | 
107 |             width_begin = heatmap_width * (j+1)
108 |             width_end = heatmap_width * (j+2)
109 |             grid_image[height_begin:height_end, width_begin:width_end, :] = \
110 |                 masked_image
111 |             # grid_image[height_begin:height_end, width_begin:width_end, :] = \
112 |             #     colored_heatmap*0.7 + resized_image*0.3
113 | 
114 |         grid_image[height_begin:height_end, 0:heatmap_width, :] = resized_image
115 | 
116 |     cv2.imwrite(file_name, grid_image)
117 | 
118 | 
119 | def save_debug_images(config, input, meta, target, joints_pred, output,
120 |                       prefix):
121 |     if not config.DEBUG.DEBUG:
122 |         return
123 | 
124 |     if config.DEBUG.SAVE_BATCH_IMAGES_GT:
125 |         save_batch_image_with_joints(
126 |             input, meta['joints'], meta['joints_vis'],
127 |             '{}_gt.jpg'.format(prefix)
128 |         )
129 |     if config.DEBUG.SAVE_BATCH_IMAGES_PRED:
130 |         save_batch_image_with_joints(
131 |             input, joints_pred, meta['joints_vis'],
132 |             '{}_pred.jpg'.format(prefix)
133 |         )
134 |     if config.DEBUG.SAVE_HEATMAPS_GT:
135 |         save_batch_heatmaps(
136 |             input, target, '{}_hm_gt.jpg'.format(prefix)
137 |         )
138 |     if config.DEBUG.SAVE_HEATMAPS_PRED:
139 |         save_batch_heatmaps(
140 |             input, output, '{}_hm_pred.jpg'.format(prefix)
141 |         )
142 | 


--------------------------------------------------------------------------------
/lib/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Copyright (c) Microsoft
  3 | // Licensed under The MIT License
  4 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
  5 | // ------------------------------------------------------------------
  6 | 
  7 | #include "gpu_nms.hpp"
  8 | #include <vector>
  9 | #include <iostream>
 10 | 
 11 | #define CUDA_CHECK(condition) \
 12 |   /* Code block avoids redefinition of cudaError_t error */ \
 13 |   do { \
 14 |     cudaError_t error = condition; \
 15 |     if (error != cudaSuccess) { \
 16 |       std::cout << cudaGetErrorString(error) << std::endl; \
 17 |     } \
 18 |   } while (0)
 19 | 
 20 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 21 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 22 | 
 23 | __device__ inline float devIoU(float const * const a, float const * const b) {
 24 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 25 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 26 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 27 |   float interS = width * height;
 28 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 29 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 30 |   return interS / (Sa + Sb - interS);
 31 | }
 32 | 
 33 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 34 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 35 |   const int row_start = blockIdx.y;
 36 |   const int col_start = blockIdx.x;
 37 | 
 38 |   // if (row_start > col_start) return;
 39 | 
 40 |   const int row_size =
 41 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 42 |   const int col_size =
 43 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 44 | 
 45 |   __shared__ float block_boxes[threadsPerBlock * 5];
 46 |   if (threadIdx.x < col_size) {
 47 |     block_boxes[threadIdx.x * 5 + 0] =
 48 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 49 |     block_boxes[threadIdx.x * 5 + 1] =
 50 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 51 |     block_boxes[threadIdx.x * 5 + 2] =
 52 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 53 |     block_boxes[threadIdx.x * 5 + 3] =
 54 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 55 |     block_boxes[threadIdx.x * 5 + 4] =
 56 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 57 |   }
 58 |   __syncthreads();
 59 | 
 60 |   if (threadIdx.x < row_size) {
 61 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 62 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 63 |     int i = 0;
 64 |     unsigned long long t = 0;
 65 |     int start = 0;
 66 |     if (row_start == col_start) {
 67 |       start = threadIdx.x + 1;
 68 |     }
 69 |     for (i = start; i < col_size; i++) {
 70 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 71 |         t |= 1ULL << i;
 72 |       }
 73 |     }
 74 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 75 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 76 |   }
 77 | }
 78 | 
 79 | void _set_device(int device_id) {
 80 |   int current_device;
 81 |   CUDA_CHECK(cudaGetDevice(&current_device));
 82 |   if (current_device == device_id) {
 83 |     return;
 84 |   }
 85 |   // The call to cudaSetDevice must come before any calls to Get, which
 86 |   // may perform initialization using the GPU.
 87 |   CUDA_CHECK(cudaSetDevice(device_id));
 88 | }
 89 | 
 90 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 91 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 92 |   _set_device(device_id);
 93 | 
 94 |   float* boxes_dev = NULL;
 95 |   unsigned long long* mask_dev = NULL;
 96 | 
 97 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 98 | 
 99 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
100 |                         boxes_num * boxes_dim * sizeof(float)));
101 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
102 |                         boxes_host,
103 |                         boxes_num * boxes_dim * sizeof(float),
104 |                         cudaMemcpyHostToDevice));
105 | 
106 |   CUDA_CHECK(cudaMalloc(&mask_dev,
107 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
108 | 
109 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
110 |               DIVUP(boxes_num, threadsPerBlock));
111 |   dim3 threads(threadsPerBlock);
112 |   nms_kernel<<<blocks, threads>>>(boxes_num,
113 |                                   nms_overlap_thresh,
114 |                                   boxes_dev,
115 |                                   mask_dev);
116 | 
117 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
118 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
119 |                         mask_dev,
120 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
121 |                         cudaMemcpyDeviceToHost));
122 | 
123 |   std::vector<unsigned long long> remv(col_blocks);
124 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
125 | 
126 |   int num_to_keep = 0;
127 |   for (int i = 0; i < boxes_num; i++) {
128 |     int nblock = i / threadsPerBlock;
129 |     int inblock = i % threadsPerBlock;
130 | 
131 |     if (!(remv[nblock] & (1ULL << inblock))) {
132 |       keep_out[num_to_keep++] = i;
133 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
134 |       for (int j = nblock; j < col_blocks; j++) {
135 |         remv[j] |= p[j];
136 |       }
137 |     }
138 |   }
139 |   *num_out = num_to_keep;
140 | 
141 |   CUDA_CHECK(cudaFree(boxes_dev));
142 |   CUDA_CHECK(cudaFree(mask_dev));
143 | }
144 | 


--------------------------------------------------------------------------------
/lib/nms/setup_linux.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Pose.gluon
  3 | # Copyright (c) 2018-present Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | from setuptools import setup
 11 | from distutils.extension import Extension
 12 | from Cython.Distutils import build_ext
 13 | import numpy as np
 14 | 
 15 | 
 16 | def find_in_path(name, path):
 17 |     "Find a file in a search path"
 18 |     # Adapted fom
 19 |     # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 20 |     for dir in path.split(os.pathsep):
 21 |         binpath = pjoin(dir, name)
 22 |         if os.path.exists(binpath):
 23 |             return os.path.abspath(binpath)
 24 |     return None
 25 | 
 26 | 
 27 | def locate_cuda():
 28 |     """Locate the CUDA environment on the system
 29 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 30 |     and values giving the absolute path to each directory.
 31 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 32 |     is based on finding 'nvcc' in the PATH.
 33 |     """
 34 | 
 35 |     # first check if the CUDAHOME env variable is in use
 36 |     if 'CUDAHOME' in os.environ:
 37 |         home = os.environ['CUDAHOME']
 38 |         nvcc = pjoin(home, 'bin', 'nvcc')
 39 |     else:
 40 |         # otherwise, search the PATH for NVCC
 41 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 42 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 43 |         if nvcc is None:
 44 |             raise EnvironmentError('The nvcc binary could not be '
 45 |                 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 46 |         home = os.path.dirname(os.path.dirname(nvcc))
 47 | 
 48 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 49 |                   'include': pjoin(home, 'include'),
 50 |                   'lib64': pjoin(home, 'lib64')}
 51 |     for k, v in cudaconfig.items():
 52 |         if not os.path.exists(v):
 53 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 54 | 
 55 |     return cudaconfig
 56 | CUDA = locate_cuda()
 57 | 
 58 | 
 59 | # Obtain the numpy include directory.  This logic works across numpy versions.
 60 | try:
 61 |     numpy_include = np.get_include()
 62 | except AttributeError:
 63 |     numpy_include = np.get_numpy_include()
 64 | 
 65 | 
 66 | def customize_compiler_for_nvcc(self):
 67 |     """inject deep into distutils to customize how the dispatch
 68 |     to gcc/nvcc works.
 69 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 70 |     injected in, and still have the right customizations (i.e.
 71 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 72 |     the OO route, I have this. Note, it's kindof like a wierd functional
 73 |     subclassing going on."""
 74 | 
 75 |     # tell the compiler it can processes .cu
 76 |     self.src_extensions.append('.cu')
 77 | 
 78 |     # save references to the default compiler_so and _comple methods
 79 |     default_compiler_so = self.compiler_so
 80 |     super = self._compile
 81 | 
 82 |     # now redefine the _compile method. This gets executed for each
 83 |     # object but distutils doesn't have the ability to change compilers
 84 |     # based on source extension: we add it.
 85 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 86 |         if os.path.splitext(src)[1] == '.cu':
 87 |             # use the cuda for .cu files
 88 |             self.set_executable('compiler_so', CUDA['nvcc'])
 89 |             # use only a subset of the extra_postargs, which are 1-1 translated
 90 |             # from the extra_compile_args in the Extension class
 91 |             postargs = extra_postargs['nvcc']
 92 |         else:
 93 |             postargs = extra_postargs['gcc']
 94 | 
 95 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 96 |         # reset the default compiler_so, which we might have changed for cuda
 97 |         self.compiler_so = default_compiler_so
 98 | 
 99 |     # inject our redefined _compile method into the class
100 |     self._compile = _compile
101 | 
102 | 
103 | # run the customize_compiler
104 | class custom_build_ext(build_ext):
105 |     def build_extensions(self):
106 |         customize_compiler_for_nvcc(self.compiler)
107 |         build_ext.build_extensions(self)
108 | 
109 | 
110 | ext_modules = [
111 |     Extension(
112 |         "cpu_nms",
113 |         ["cpu_nms.pyx"],
114 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
115 |         include_dirs = [numpy_include]
116 |     ),
117 |     Extension('gpu_nms',
118 |         ['nms_kernel.cu', 'gpu_nms.pyx'],
119 |         library_dirs=[CUDA['lib64']],
120 |         libraries=['cudart'],
121 |         language='c++',
122 |         runtime_library_dirs=[CUDA['lib64']],
123 |         # this syntax is specific to this build system
124 |         # we're only going to use certain compiler args with nvcc and not with
125 |         # gcc the implementation of this trick is in customize_compiler() below
126 |         extra_compile_args={'gcc': ["-Wno-unused-function"],
127 |                             'nvcc': ['-arch=sm_35',
128 |                                      '--ptxas-options=-v',
129 |                                      '-c',
130 |                                      '--compiler-options',
131 |                                      "'-fPIC'"]},
132 |         include_dirs = [numpy_include, CUDA['include']]
133 |     ),
134 | ]
135 | 
136 | setup(
137 |     name='nms',
138 |     ext_modules=ext_modules,
139 |     # inject our custom trigger
140 |     cmdclass={'build_ext': custom_build_ext},
141 | )
142 | 


--------------------------------------------------------------------------------
/lib/nms/nms.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import numpy as np
 12 | 
 13 | from .cpu_nms import cpu_nms
 14 | from .gpu_nms import gpu_nms
 15 | 
 16 | 
 17 | def py_nms_wrapper(thresh):
 18 |     def _nms(dets):
 19 |         return nms(dets, thresh)
 20 |     return _nms
 21 | 
 22 | 
 23 | def cpu_nms_wrapper(thresh):
 24 |     def _nms(dets):
 25 |         return cpu_nms(dets, thresh)
 26 |     return _nms
 27 | 
 28 | 
 29 | def gpu_nms_wrapper(thresh, device_id):
 30 |     def _nms(dets):
 31 |         return gpu_nms(dets, thresh, device_id)
 32 |     return _nms
 33 | 
 34 | 
 35 | def nms(dets, thresh):
 36 |     """
 37 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
 38 |     rule out overlap >= thresh
 39 |     :param dets: [[x1, y1, x2, y2 score]]
 40 |     :param thresh: retain overlap < thresh
 41 |     :return: indexes to keep
 42 |     """
 43 |     if dets.shape[0] == 0:
 44 |         return []
 45 | 
 46 |     x1 = dets[:, 0]
 47 |     y1 = dets[:, 1]
 48 |     x2 = dets[:, 2]
 49 |     y2 = dets[:, 3]
 50 |     scores = dets[:, 4]
 51 | 
 52 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 53 |     order = scores.argsort()[::-1]
 54 | 
 55 |     keep = []
 56 |     while order.size > 0:
 57 |         i = order[0]
 58 |         keep.append(i)
 59 |         xx1 = np.maximum(x1[i], x1[order[1:]])
 60 |         yy1 = np.maximum(y1[i], y1[order[1:]])
 61 |         xx2 = np.minimum(x2[i], x2[order[1:]])
 62 |         yy2 = np.minimum(y2[i], y2[order[1:]])
 63 | 
 64 |         w = np.maximum(0.0, xx2 - xx1 + 1)
 65 |         h = np.maximum(0.0, yy2 - yy1 + 1)
 66 |         inter = w * h
 67 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
 68 | 
 69 |         inds = np.where(ovr <= thresh)[0]
 70 |         order = order[inds + 1]
 71 | 
 72 |     return keep
 73 | 
 74 | 
 75 | def oks_iou(g, d, a_g, a_d, sigmas=None, in_vis_thre=None):
 76 |     if not isinstance(sigmas, np.ndarray):
 77 |         sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0
 78 |     vars = (sigmas * 2) ** 2
 79 |     xg = g[0::3]
 80 |     yg = g[1::3]
 81 |     vg = g[2::3]
 82 |     ious = np.zeros((d.shape[0]))
 83 |     for n_d in range(0, d.shape[0]):
 84 |         xd = d[n_d, 0::3]
 85 |         yd = d[n_d, 1::3]
 86 |         vd = d[n_d, 2::3]
 87 |         dx = xd - xg
 88 |         dy = yd - yg
 89 |         e = (dx ** 2 + dy ** 2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2
 90 |         if in_vis_thre is not None:
 91 |             ind = list(vg > in_vis_thre) and list(vd > in_vis_thre)
 92 |             e = e[ind]
 93 |         ious[n_d] = np.sum(np.exp(-e)) / e.shape[0] if e.shape[0] != 0 else 0.0
 94 |     return ious
 95 | 
 96 | 
 97 | def oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
 98 |     """
 99 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
100 |     rule out overlap >= thresh, overlap = oks
101 |     :param kpts_db
102 |     :param thresh: retain overlap < thresh
103 |     :return: indexes to keep
104 |     """
105 |     if len(kpts_db) == 0:
106 |         return []
107 | 
108 |     scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
109 |     kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
110 |     areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
111 | 
112 |     order = scores.argsort()[::-1]
113 | 
114 |     keep = []
115 |     while order.size > 0:
116 |         i = order[0]
117 |         keep.append(i)
118 | 
119 |         oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre)
120 | 
121 |         inds = np.where(oks_ovr <= thresh)[0]
122 |         order = order[inds + 1]
123 | 
124 |     return keep
125 | 
126 | 
127 | def rescore(overlap, scores, thresh, type='gaussian'):
128 |     assert overlap.shape[0] == scores.shape[0]
129 |     if type == 'linear':
130 |         inds = np.where(overlap >= thresh)[0]
131 |         scores[inds] = scores[inds] * (1 - overlap[inds])
132 |     else:
133 |         scores = scores * np.exp(- overlap**2 / thresh)
134 | 
135 |     return scores
136 | 
137 | 
138 | def soft_oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
139 |     """
140 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
141 |     rule out overlap >= thresh, overlap = oks
142 |     :param kpts_db
143 |     :param thresh: retain overlap < thresh
144 |     :return: indexes to keep
145 |     """
146 |     if len(kpts_db) == 0:
147 |         return []
148 | 
149 |     scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
150 |     kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
151 |     areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
152 | 
153 |     order = scores.argsort()[::-1]
154 |     scores = scores[order]
155 | 
156 |     # max_dets = order.size
157 |     max_dets = 20
158 |     keep = np.zeros(max_dets, dtype=np.intp)
159 |     keep_cnt = 0
160 |     while order.size > 0 and keep_cnt < max_dets:
161 |         i = order[0]
162 | 
163 |         oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre)
164 | 
165 |         order = order[1:]
166 |         scores = rescore(oks_ovr, scores[1:], thresh)
167 | 
168 |         tmp = scores.argsort()[::-1]
169 |         order = order[tmp]
170 |         scores = scores[tmp]
171 | 
172 |         keep[keep_cnt] = i
173 |         keep_cnt += 1
174 | 
175 |     keep = keep[:keep_cnt]
176 | 
177 |     return keep
178 |     # kpts_db = kpts_db[:keep_cnt]
179 | 
180 |     # return kpts_db
181 | 


--------------------------------------------------------------------------------
/lib/dataset/mpii.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import logging
 12 | import os
 13 | import json_tricks as json
 14 | from collections import OrderedDict
 15 | 
 16 | import numpy as np
 17 | from scipy.io import loadmat, savemat
 18 | 
 19 | from dataset.JointsDataset import JointsDataset
 20 | 
 21 | 
 22 | logger = logging.getLogger(__name__)
 23 | 
 24 | 
 25 | class MPIIDataset(JointsDataset):
 26 |     def __init__(self, cfg, root, image_set, is_train, transform=None):
 27 |         super().__init__(cfg, root, image_set, is_train, transform)
 28 | 
 29 |         self.num_joints = 16
 30 |         self.flip_pairs = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]]
 31 |         self.parent_ids = [1, 2, 6, 6, 3, 4, 6, 6, 7, 8, 11, 12, 7, 7, 13, 14]
 32 | 
 33 |         self.upper_body_ids = (7, 8, 9, 10, 11, 12, 13, 14, 15)
 34 |         self.lower_body_ids = (0, 1, 2, 3, 4, 5, 6)
 35 | 
 36 |         self.db = self._get_db()
 37 | 
 38 |         if is_train and cfg.DATASET.SELECT_DATA:
 39 |             self.db = self.select_data(self.db)
 40 | 
 41 |         logger.info('=> load {} samples'.format(len(self.db)))
 42 | 
 43 |     def _get_db(self):
 44 |         # create train/val split
 45 |         file_name = os.path.join(
 46 |             self.root, 'annot', self.image_set+'.json'
 47 |         )
 48 |         with open(file_name) as anno_file:
 49 |             anno = json.load(anno_file)
 50 | 
 51 |         gt_db = []
 52 |         for a in anno:
 53 |             image_name = a['image']
 54 | 
 55 |             c = np.array(a['center'], dtype=np.float)
 56 |             s = np.array([a['scale'], a['scale']], dtype=np.float)
 57 | 
 58 |             # Adjust center/scale slightly to avoid cropping limbs
 59 |             if c[0] != -1:
 60 |                 c[1] = c[1] + 15 * s[1]
 61 |                 s = s * 1.25
 62 | 
 63 |             # MPII uses matlab format, index is based 1,
 64 |             # we should first convert to 0-based index
 65 |             c = c - 1
 66 | 
 67 |             joints_3d = np.zeros((self.num_joints, 3), dtype=np.float)
 68 |             joints_3d_vis = np.zeros((self.num_joints,  3), dtype=np.float)
 69 |             if self.image_set != 'test':
 70 |                 joints = np.array(a['joints'])
 71 |                 joints[:, 0:2] = joints[:, 0:2] - 1
 72 |                 joints_vis = np.array(a['joints_vis'])
 73 |                 assert len(joints) == self.num_joints, \
 74 |                     'joint num diff: {} vs {}'.format(len(joints),
 75 |                                                       self.num_joints)
 76 | 
 77 |                 joints_3d[:, 0:2] = joints[:, 0:2]
 78 |                 joints_3d_vis[:, 0] = joints_vis[:]
 79 |                 joints_3d_vis[:, 1] = joints_vis[:]
 80 | 
 81 |             image_dir = 'images.zip@' if self.data_format == 'zip' else 'images'
 82 |             gt_db.append(
 83 |                 {
 84 |                     'image': os.path.join(self.root, image_dir, image_name),
 85 |                     'center': c,
 86 |                     'scale': s,
 87 |                     'joints_3d': joints_3d,
 88 |                     'joints_3d_vis': joints_3d_vis,
 89 |                     'filename': '',
 90 |                     'imgnum': 0,
 91 |                 }
 92 |             )
 93 | 
 94 |         return gt_db
 95 | 
 96 |     def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
 97 |         # convert 0-based index to 1-based index
 98 |         preds = preds[:, :, 0:2] + 1.0
 99 | 
100 |         if output_dir:
101 |             pred_file = os.path.join(output_dir, 'pred.mat')
102 |             savemat(pred_file, mdict={'preds': preds})
103 | 
104 |         if 'test' in cfg.DATASET.TEST_SET:
105 |             return {'Null': 0.0}, 0.0
106 | 
107 |         SC_BIAS = 0.6
108 |         threshold = 0.5
109 | 
110 |         gt_file = os.path.join(cfg.DATASET.ROOT,
111 |                                'annot',
112 |                                'gt_{}.mat'.format(cfg.DATASET.TEST_SET))
113 |         gt_dict = loadmat(gt_file)
114 |         dataset_joints = gt_dict['dataset_joints']
115 |         jnt_missing = gt_dict['jnt_missing']
116 |         pos_gt_src = gt_dict['pos_gt_src']
117 |         headboxes_src = gt_dict['headboxes_src']
118 | 
119 |         pos_pred_src = np.transpose(preds, [1, 2, 0])
120 | 
121 |         head = np.where(dataset_joints == 'head')[1][0]
122 |         lsho = np.where(dataset_joints == 'lsho')[1][0]
123 |         lelb = np.where(dataset_joints == 'lelb')[1][0]
124 |         lwri = np.where(dataset_joints == 'lwri')[1][0]
125 |         lhip = np.where(dataset_joints == 'lhip')[1][0]
126 |         lkne = np.where(dataset_joints == 'lkne')[1][0]
127 |         lank = np.where(dataset_joints == 'lank')[1][0]
128 | 
129 |         rsho = np.where(dataset_joints == 'rsho')[1][0]
130 |         relb = np.where(dataset_joints == 'relb')[1][0]
131 |         rwri = np.where(dataset_joints == 'rwri')[1][0]
132 |         rkne = np.where(dataset_joints == 'rkne')[1][0]
133 |         rank = np.where(dataset_joints == 'rank')[1][0]
134 |         rhip = np.where(dataset_joints == 'rhip')[1][0]
135 | 
136 |         jnt_visible = 1 - jnt_missing
137 |         uv_error = pos_pred_src - pos_gt_src
138 |         uv_err = np.linalg.norm(uv_error, axis=1)
139 |         headsizes = headboxes_src[1, :, :] - headboxes_src[0, :, :]
140 |         headsizes = np.linalg.norm(headsizes, axis=0)
141 |         headsizes *= SC_BIAS
142 |         scale = np.multiply(headsizes, np.ones((len(uv_err), 1)))
143 |         scaled_uv_err = np.divide(uv_err, scale)
144 |         scaled_uv_err = np.multiply(scaled_uv_err, jnt_visible)
145 |         jnt_count = np.sum(jnt_visible, axis=1)
146 |         less_than_threshold = np.multiply((scaled_uv_err <= threshold),
147 |                                           jnt_visible)
148 |         PCKh = np.divide(100.*np.sum(less_than_threshold, axis=1), jnt_count)
149 | 
150 |         # save
151 |         rng = np.arange(0, 0.5+0.01, 0.01)
152 |         pckAll = np.zeros((len(rng), 16))
153 | 
154 |         for r in range(len(rng)):
155 |             threshold = rng[r]
156 |             less_than_threshold = np.multiply(scaled_uv_err <= threshold,
157 |                                               jnt_visible)
158 |             pckAll[r, :] = np.divide(100.*np.sum(less_than_threshold, axis=1),
159 |                                      jnt_count)
160 | 
161 |         PCKh = np.ma.array(PCKh, mask=False)
162 |         PCKh.mask[6:8] = True
163 | 
164 |         jnt_count = np.ma.array(jnt_count, mask=False)
165 |         jnt_count.mask[6:8] = True
166 |         jnt_ratio = jnt_count / np.sum(jnt_count).astype(np.float64)
167 | 
168 |         name_value = [
169 |             ('Head', PCKh[head]),
170 |             ('Shoulder', 0.5 * (PCKh[lsho] + PCKh[rsho])),
171 |             ('Elbow', 0.5 * (PCKh[lelb] + PCKh[relb])),
172 |             ('Wrist', 0.5 * (PCKh[lwri] + PCKh[rwri])),
173 |             ('Hip', 0.5 * (PCKh[lhip] + PCKh[rhip])),
174 |             ('Knee', 0.5 * (PCKh[lkne] + PCKh[rkne])),
175 |             ('Ankle', 0.5 * (PCKh[lank] + PCKh[rank])),
176 |             ('Mean', np.sum(PCKh * jnt_ratio)),
177 |             ('Mean@0.1', np.sum(pckAll[11, :] * jnt_ratio))
178 |         ]
179 |         name_value = OrderedDict(name_value)
180 | 
181 |         return name_value, name_value['Mean']
182 | 


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import argparse
 12 | import os
 13 | import pprint
 14 | import shutil
 15 | 
 16 | import torch
 17 | import torch.nn.parallel
 18 | import torch.backends.cudnn as cudnn
 19 | import torch.optim
 20 | import torch.utils.data
 21 | import torch.utils.data.distributed
 22 | import torchvision.transforms as transforms
 23 | from tensorboardX import SummaryWriter
 24 | 
 25 | import _init_paths
 26 | from config import cfg
 27 | from config import update_config
 28 | from core.loss import JointsMSELoss
 29 | from core.function import train
 30 | from core.function import validate
 31 | from utils.utils import get_optimizer
 32 | from utils.utils import save_checkpoint
 33 | from utils.utils import create_logger
 34 | from utils.utils import get_model_summary
 35 | 
 36 | import dataset
 37 | import models
 38 | 
 39 | 
 40 | def parse_args():
 41 |     parser = argparse.ArgumentParser(description='Train keypoints network')
 42 |     # general
 43 |     parser.add_argument('--cfg',
 44 |                         help='experiment configure file name',
 45 |                         required=True,
 46 |                         type=str)
 47 | 
 48 |     parser.add_argument('opts',
 49 |                         help="Modify config options using the command-line",
 50 |                         default=None,
 51 |                         nargs=argparse.REMAINDER)
 52 | 
 53 |     # philly
 54 |     parser.add_argument('--modelDir',
 55 |                         help='model directory',
 56 |                         type=str,
 57 |                         default='')
 58 |     parser.add_argument('--logDir',
 59 |                         help='log directory',
 60 |                         type=str,
 61 |                         default='')
 62 |     parser.add_argument('--dataDir',
 63 |                         help='data directory',
 64 |                         type=str,
 65 |                         default='')
 66 |     parser.add_argument('--prevModelDir',
 67 |                         help='prev Model directory',
 68 |                         type=str,
 69 |                         default='')
 70 | 
 71 |     args = parser.parse_args()
 72 | 
 73 |     return args
 74 | 
 75 | 
 76 | def main():
 77 |     args = parse_args()
 78 |     update_config(cfg, args)
 79 | 
 80 |     logger, final_output_dir, tb_log_dir = create_logger(
 81 |         cfg, args.cfg, 'train')
 82 | 
 83 |     logger.info(pprint.pformat(args))
 84 |     logger.info(cfg)
 85 | 
 86 |     # cudnn related setting
 87 |     cudnn.benchmark = cfg.CUDNN.BENCHMARK
 88 |     torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
 89 |     torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
 90 | 
 91 |     model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
 92 |         cfg, is_train=True
 93 |     )
 94 | 
 95 |     # copy model file
 96 |     this_dir = os.path.dirname(__file__)
 97 |     shutil.copy2(
 98 |         os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
 99 |         final_output_dir)
100 |     # logger.info(pprint.pformat(model))
101 | 
102 |     writer_dict = {
103 |         'writer': SummaryWriter(log_dir=tb_log_dir),
104 |         'train_global_steps': 0,
105 |         'valid_global_steps': 0,
106 |     }
107 | 
108 |     dump_input = torch.rand(
109 |         (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])
110 |     )
111 |     writer_dict['writer'].add_graph(model, (dump_input, ))
112 | 
113 |     logger.info(get_model_summary(model, dump_input))
114 | 
115 |     model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
116 | 
117 |     # define loss function (criterion) and optimizer
118 |     criterion = JointsMSELoss(
119 |         use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT
120 |     ).cuda()
121 | 
122 |     # Data loading code
123 |     normalize = transforms.Normalize(
124 |         mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
125 |     )
126 |     train_dataset = eval('dataset.'+cfg.DATASET.DATASET)(
127 |         cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True,
128 |         transforms.Compose([
129 |             transforms.ToTensor(),
130 |             normalize,
131 |         ])
132 |     )
133 |     valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)(
134 |         cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
135 |         transforms.Compose([
136 |             transforms.ToTensor(),
137 |             normalize,
138 |         ])
139 |     )
140 | 
141 |     train_loader = torch.utils.data.DataLoader(
142 |         train_dataset,
143 |         batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS),
144 |         shuffle=cfg.TRAIN.SHUFFLE,
145 |         num_workers=cfg.WORKERS,
146 |         pin_memory=cfg.PIN_MEMORY
147 |     )
148 |     valid_loader = torch.utils.data.DataLoader(
149 |         valid_dataset,
150 |         batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS),
151 |         shuffle=False,
152 |         num_workers=cfg.WORKERS,
153 |         pin_memory=cfg.PIN_MEMORY
154 |     )
155 | 
156 |     best_perf = 0.0
157 |     best_model = False
158 |     last_epoch = -1
159 |     optimizer = get_optimizer(cfg, model)
160 |     begin_epoch = cfg.TRAIN.BEGIN_EPOCH
161 |     checkpoint_file = os.path.join(
162 |         final_output_dir, 'checkpoint.pth'
163 |     )
164 | 
165 |     if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
166 |         logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
167 |         checkpoint = torch.load(checkpoint_file)
168 |         begin_epoch = checkpoint['epoch']
169 |         best_perf = checkpoint['perf']
170 |         last_epoch = checkpoint['epoch']
171 |         model.load_state_dict(checkpoint['state_dict'])
172 | 
173 |         optimizer.load_state_dict(checkpoint['optimizer'])
174 |         logger.info("=> loaded checkpoint '{}' (epoch {})".format(
175 |             checkpoint_file, checkpoint['epoch']))
176 | 
177 |     lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
178 |         optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR,
179 |         last_epoch=last_epoch
180 |     )
181 | 
182 |     for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
183 |         lr_scheduler.step()
184 | 
185 |         # train for one epoch
186 |         train(cfg, train_loader, model, criterion, optimizer, epoch,
187 |               final_output_dir, tb_log_dir, writer_dict)
188 | 
189 | 
190 |         # evaluate on validation set
191 |         perf_indicator = validate(
192 |             cfg, valid_loader, valid_dataset, model, criterion,
193 |             final_output_dir, tb_log_dir, writer_dict
194 |         )
195 | 
196 |         if perf_indicator >= best_perf:
197 |             best_perf = perf_indicator
198 |             best_model = True
199 |         else:
200 |             best_model = False
201 | 
202 |         logger.info('=> saving checkpoint to {}'.format(final_output_dir))
203 |         save_checkpoint({
204 |             'epoch': epoch + 1,
205 |             'model': cfg.MODEL.NAME,
206 |             'state_dict': model.state_dict(),
207 |             'best_state_dict': model.module.state_dict(),
208 |             'perf': perf_indicator,
209 |             'optimizer': optimizer.state_dict(),
210 |         }, best_model, final_output_dir)
211 | 
212 |     final_model_state_file = os.path.join(
213 |         final_output_dir, 'final_state.pth'
214 |     )
215 |     logger.info('=> saving final model state to {}'.format(
216 |         final_model_state_file)
217 |     )
218 |     torch.save(model.module.state_dict(), final_model_state_file)
219 |     writer_dict['writer'].close()
220 | 
221 | 
222 | if __name__ == '__main__':
223 |     main()
224 | 


--------------------------------------------------------------------------------
/lib/utils/utils.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os
 12 | import logging
 13 | import time
 14 | from collections import namedtuple
 15 | from pathlib import Path
 16 | 
 17 | import torch
 18 | import torch.optim as optim
 19 | import torch.nn as nn
 20 | 
 21 | 
 22 | def create_logger(cfg, cfg_name, phase='train'):
 23 |     root_output_dir = Path(cfg.OUTPUT_DIR)
 24 |     # set up logger
 25 |     if not root_output_dir.exists():
 26 |         print('=> creating {}'.format(root_output_dir))
 27 |         root_output_dir.mkdir()
 28 | 
 29 |     dataset = cfg.DATASET.DATASET + '_' + cfg.DATASET.HYBRID_JOINTS_TYPE \
 30 |         if cfg.DATASET.HYBRID_JOINTS_TYPE else cfg.DATASET.DATASET
 31 |     dataset = dataset.replace(':', '_')
 32 |     model = cfg.MODEL.NAME
 33 |     cfg_name = os.path.basename(cfg_name).split('.')[0]
 34 | 
 35 |     final_output_dir = root_output_dir / dataset / model / cfg_name
 36 | 
 37 |     print('=> creating {}'.format(final_output_dir))
 38 |     final_output_dir.mkdir(parents=True, exist_ok=True)
 39 | 
 40 |     time_str = time.strftime('%Y-%m-%d-%H-%M')
 41 |     log_file = '{}_{}_{}.log'.format(cfg_name, time_str, phase)
 42 |     final_log_file = final_output_dir / log_file
 43 |     head = '%(asctime)-15s %(message)s'
 44 |     logging.basicConfig(filename=str(final_log_file),
 45 |                         format=head)
 46 |     logger = logging.getLogger()
 47 |     logger.setLevel(logging.INFO)
 48 |     console = logging.StreamHandler()
 49 |     logging.getLogger('').addHandler(console)
 50 | 
 51 |     tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / \
 52 |         (cfg_name + '_' + time_str)
 53 | 
 54 |     print('=> creating {}'.format(tensorboard_log_dir))
 55 |     tensorboard_log_dir.mkdir(parents=True, exist_ok=True)
 56 | 
 57 |     return logger, str(final_output_dir), str(tensorboard_log_dir)
 58 | 
 59 | 
 60 | def get_optimizer(cfg, model):
 61 |     optimizer = None
 62 |     if cfg.TRAIN.OPTIMIZER == 'sgd':
 63 |         optimizer = optim.SGD(
 64 |             model.parameters(),
 65 |             lr=cfg.TRAIN.LR,
 66 |             momentum=cfg.TRAIN.MOMENTUM,
 67 |             weight_decay=cfg.TRAIN.WD,
 68 |             nesterov=cfg.TRAIN.NESTEROV
 69 |         )
 70 |     elif cfg.TRAIN.OPTIMIZER == 'adam':
 71 |         optimizer = optim.Adam(
 72 |             model.parameters(),
 73 |             lr=cfg.TRAIN.LR
 74 |         )
 75 | 
 76 |     return optimizer
 77 | 
 78 | 
 79 | def save_checkpoint(states, is_best, output_dir,
 80 |                     filename='checkpoint.pth'):
 81 |     torch.save(states, os.path.join(output_dir, filename))
 82 |     if is_best and 'state_dict' in states:
 83 |         torch.save(states['best_state_dict'],
 84 |                    os.path.join(output_dir, 'model_best.pth'))
 85 | 
 86 | 
 87 | def get_model_summary(model, *input_tensors, item_length=26, verbose=False):
 88 |     """
 89 |     :param model:
 90 |     :param input_tensors:
 91 |     :param item_length:
 92 |     :return:
 93 |     """
 94 | 
 95 |     summary = []
 96 | 
 97 |     ModuleDetails = namedtuple(
 98 |         "Layer", ["name", "input_size", "output_size", "num_parameters", "multiply_adds"])
 99 |     hooks = []
100 |     layer_instances = {}
101 | 
102 |     def add_hooks(module):
103 | 
104 |         def hook(module, input, output):
105 |             class_name = str(module.__class__.__name__)
106 | 
107 |             instance_index = 1
108 |             if class_name not in layer_instances:
109 |                 layer_instances[class_name] = instance_index
110 |             else:
111 |                 instance_index = layer_instances[class_name] + 1
112 |                 layer_instances[class_name] = instance_index
113 | 
114 |             layer_name = class_name + "_" + str(instance_index)
115 | 
116 |             params = 0
117 | 
118 |             if class_name.find("Conv") != -1 or class_name.find("BatchNorm") != -1 or \
119 |                class_name.find("Linear") != -1:
120 |                 for param_ in module.parameters():
121 |                     params += param_.view(-1).size(0)
122 | 
123 |             flops = "Not Available"
124 |             if class_name.find("Conv") != -1 and hasattr(module, "weight"):
125 |                 flops = (
126 |                     torch.prod(
127 |                         torch.LongTensor(list(module.weight.data.size()))) *
128 |                     torch.prod(
129 |                         torch.LongTensor(list(output.size())[2:]))).item()
130 |             elif isinstance(module, nn.Linear):
131 |                 flops = (torch.prod(torch.LongTensor(list(output.size()))) \
132 |                          * input[0].size(1)).item()
133 | 
134 |             if isinstance(input[0], list):
135 |                 input = input[0]
136 |             if isinstance(output, list):
137 |                 output = output[0]
138 | 
139 |             summary.append(
140 |                 ModuleDetails(
141 |                     name=layer_name,
142 |                     input_size=list(input[0].size()),
143 |                     output_size=list(output.size()),
144 |                     num_parameters=params,
145 |                     multiply_adds=flops)
146 |             )
147 | 
148 |         if not isinstance(module, nn.ModuleList) \
149 |            and not isinstance(module, nn.Sequential) \
150 |            and module != model:
151 |             hooks.append(module.register_forward_hook(hook))
152 | 
153 |     model.eval()
154 |     model.apply(add_hooks)
155 | 
156 |     space_len = item_length
157 | 
158 |     model(*input_tensors)
159 |     for hook in hooks:
160 |         hook.remove()
161 | 
162 |     details = ''
163 |     if verbose:
164 |         details = "Model Summary" + \
165 |             os.linesep + \
166 |             "Name{}Input Size{}Output Size{}Parameters{}Multiply Adds (Flops){}".format(
167 |                 ' ' * (space_len - len("Name")),
168 |                 ' ' * (space_len - len("Input Size")),
169 |                 ' ' * (space_len - len("Output Size")),
170 |                 ' ' * (space_len - len("Parameters")),
171 |                 ' ' * (space_len - len("Multiply Adds (Flops)"))) \
172 |                 + os.linesep + '-' * space_len * 5 + os.linesep
173 | 
174 |     params_sum = 0
175 |     flops_sum = 0
176 |     for layer in summary:
177 |         params_sum += layer.num_parameters
178 |         if layer.multiply_adds != "Not Available":
179 |             flops_sum += layer.multiply_adds
180 |         if verbose:
181 |             details += "{}{}{}{}{}{}{}{}{}{}".format(
182 |                 layer.name,
183 |                 ' ' * (space_len - len(layer.name)),
184 |                 layer.input_size,
185 |                 ' ' * (space_len - len(str(layer.input_size))),
186 |                 layer.output_size,
187 |                 ' ' * (space_len - len(str(layer.output_size))),
188 |                 layer.num_parameters,
189 |                 ' ' * (space_len - len(str(layer.num_parameters))),
190 |                 layer.multiply_adds,
191 |                 ' ' * (space_len - len(str(layer.multiply_adds)))) \
192 |                 + os.linesep + '-' * space_len * 5 + os.linesep
193 | 
194 |     details += os.linesep \
195 |         + "Total Parameters: {:,}".format(params_sum) \
196 |         + os.linesep + '-' * space_len * 5 + os.linesep
197 |     details += "Total Multiply Adds (For Convolution and Linear Layers only): {:,} GFLOPs".format(flops_sum/(1024**3)) \
198 |         + os.linesep + '-' * space_len * 5 + os.linesep
199 |     details += "Number of Layers" + os.linesep
200 |     for layer in layer_instances:
201 |         details += "{} : {} layers   ".format(layer, layer_instances[layer])
202 | 
203 |     return details
204 | 


--------------------------------------------------------------------------------
/lib/core/function.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 |  
 11 | import time
 12 | import logging
 13 | import os
 14 | 
 15 | import numpy as np
 16 | import torch
 17 | 
 18 | from core.evaluate import accuracy
 19 | from core.inference import get_final_preds
 20 | from utils.transforms import flip_back
 21 | from utils.vis import save_debug_images
 22 | 
 23 | 
 24 | logger = logging.getLogger(__name__)
 25 | 
 26 | 
 27 | def train(config, train_loader, model, criterion, optimizer, epoch,
 28 |           output_dir, tb_log_dir, writer_dict):
 29 |     batch_time = AverageMeter()
 30 |     data_time = AverageMeter()
 31 |     losses = AverageMeter()
 32 |     acc = AverageMeter()
 33 | 
 34 |     # switch to train mode
 35 |     model.train()
 36 | 
 37 |     end = time.time()
 38 |     for i, (input, target, target_weight, meta) in enumerate(train_loader):
 39 |         # measure data loading time
 40 |         data_time.update(time.time() - end)
 41 | 
 42 |         # compute output
 43 |         outputs = model(input)
 44 | 
 45 |         target = target.cuda(non_blocking=True)
 46 |         target_weight = target_weight.cuda(non_blocking=True)
 47 | 
 48 |         if isinstance(outputs, list):
 49 |             loss = criterion(outputs[0], target, target_weight)
 50 |             for output in outputs[1:]:
 51 |                 loss += criterion(output, target, target_weight)
 52 |         else:
 53 |             output = outputs
 54 |             loss = criterion(output, target, target_weight)
 55 | 
 56 |         # loss = criterion(output, target, target_weight)
 57 | 
 58 |         # compute gradient and do update step
 59 |         optimizer.zero_grad()
 60 |         loss.backward()
 61 |         optimizer.step()
 62 | 
 63 |         # measure accuracy and record loss
 64 |         losses.update(loss.item(), input.size(0))
 65 | 
 66 |         _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(),
 67 |                                          target.detach().cpu().numpy())
 68 |         acc.update(avg_acc, cnt)
 69 | 
 70 |         # measure elapsed time
 71 |         batch_time.update(time.time() - end)
 72 |         end = time.time()
 73 | 
 74 |         if i % config.PRINT_FREQ == 0:
 75 |             msg = 'Epoch: [{0}][{1}/{2}]\t' \
 76 |                   'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \
 77 |                   'Speed {speed:.1f} samples/s\t' \
 78 |                   'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \
 79 |                   'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \
 80 |                   'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
 81 |                       epoch, i, len(train_loader), batch_time=batch_time,
 82 |                       speed=input.size(0)/batch_time.val,
 83 |                       data_time=data_time, loss=losses, acc=acc)
 84 |             logger.info(msg)
 85 | 
 86 |             writer = writer_dict['writer']
 87 |             global_steps = writer_dict['train_global_steps']
 88 |             writer.add_scalar('train_loss', losses.val, global_steps)
 89 |             writer.add_scalar('train_acc', acc.val, global_steps)
 90 |             writer_dict['train_global_steps'] = global_steps + 1
 91 | 
 92 |             prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i)
 93 |             save_debug_images(config, input, meta, target, pred*4, output,
 94 |                               prefix)
 95 | 
 96 | 
 97 | def validate(config, val_loader, val_dataset, model, criterion, output_dir,
 98 |              tb_log_dir, writer_dict=None):
 99 |     batch_time = AverageMeter()
100 |     losses = AverageMeter()
101 |     acc = AverageMeter()
102 | 
103 |     # switch to evaluate mode
104 |     model.eval()
105 | 
106 |     num_samples = len(val_dataset)
107 |     all_preds = np.zeros(
108 |         (num_samples, config.MODEL.NUM_JOINTS, 3),
109 |         dtype=np.float32
110 |     )
111 |     all_boxes = np.zeros((num_samples, 6))
112 |     image_path = []
113 |     filenames = []
114 |     imgnums = []
115 |     idx = 0
116 |     with torch.no_grad():
117 |         end = time.time()
118 |         for i, (input, target, target_weight, meta) in enumerate(val_loader):
119 |             # compute output
120 |             outputs = model(input)
121 |             if isinstance(outputs, list):
122 |                 output = outputs[-1]
123 |             else:
124 |                 output = outputs
125 | 
126 |             if config.TEST.FLIP_TEST:
127 |                 input_flipped = input.flip(3)
128 |                 outputs_flipped = model(input_flipped)
129 | 
130 |                 if isinstance(outputs_flipped, list):
131 |                     output_flipped = outputs_flipped[-1]
132 |                 else:
133 |                     output_flipped = outputs_flipped
134 | 
135 |                 output_flipped = flip_back(output_flipped.cpu().numpy(),
136 |                                            val_dataset.flip_pairs)
137 |                 output_flipped = torch.from_numpy(output_flipped.copy()).cuda()
138 | 
139 | 
140 |                 # feature is not aligned, shift flipped heatmap for higher accuracy
141 |                 if config.TEST.SHIFT_HEATMAP:
142 |                     output_flipped[:, :, :, 1:] = \
143 |                         output_flipped.clone()[:, :, :, 0:-1]
144 | 
145 |                 output = (output + output_flipped) * 0.5
146 | 
147 |             target = target.cuda(non_blocking=True)
148 |             target_weight = target_weight.cuda(non_blocking=True)
149 | 
150 |             loss = criterion(output, target, target_weight)
151 | 
152 |             num_images = input.size(0)
153 |             # measure accuracy and record loss
154 |             losses.update(loss.item(), num_images)
155 |             _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(),
156 |                                              target.cpu().numpy())
157 | 
158 |             acc.update(avg_acc, cnt)
159 | 
160 |             # measure elapsed time
161 |             batch_time.update(time.time() - end)
162 |             end = time.time()
163 | 
164 |             c = meta['center'].numpy()
165 |             s = meta['scale'].numpy()
166 |             score = meta['score'].numpy()
167 | 
168 |             preds, maxvals = get_final_preds(
169 |                 config, output.clone().cpu().numpy(), c, s)
170 | 
171 |             all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2]
172 |             all_preds[idx:idx + num_images, :, 2:3] = maxvals
173 |             # double check this all_boxes parts
174 |             all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2]
175 |             all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2]
176 |             all_boxes[idx:idx + num_images, 4] = np.prod(s*200, 1)
177 |             all_boxes[idx:idx + num_images, 5] = score
178 |             image_path.extend(meta['image'])
179 | 
180 |             idx += num_images
181 | 
182 |             if i % config.PRINT_FREQ == 0:
183 |                 msg = 'Test: [{0}/{1}]\t' \
184 |                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \
185 |                       'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \
186 |                       'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
187 |                           i, len(val_loader), batch_time=batch_time,
188 |                           loss=losses, acc=acc)
189 |                 logger.info(msg)
190 | 
191 |                 prefix = '{}_{}'.format(
192 |                     os.path.join(output_dir, 'val'), i
193 |                 )
194 |                 save_debug_images(config, input, meta, target, pred*4, output,
195 |                                   prefix)
196 | 
197 |         name_values, perf_indicator = val_dataset.evaluate(
198 |             config, all_preds, output_dir, all_boxes, image_path,
199 |             filenames, imgnums
200 |         )
201 | 
202 |         model_name = config.MODEL.NAME
203 |         if isinstance(name_values, list):
204 |             for name_value in name_values:
205 |                 _print_name_value(name_value, model_name)
206 |         else:
207 |             _print_name_value(name_values, model_name)
208 | 
209 |         if writer_dict:
210 |             writer = writer_dict['writer']
211 |             global_steps = writer_dict['valid_global_steps']
212 |             writer.add_scalar(
213 |                 'valid_loss',
214 |                 losses.avg,
215 |                 global_steps
216 |             )
217 |             writer.add_scalar(
218 |                 'valid_acc',
219 |                 acc.avg,
220 |                 global_steps
221 |             )
222 |             if isinstance(name_values, list):
223 |                 for name_value in name_values:
224 |                     writer.add_scalars(
225 |                         'valid',
226 |                         dict(name_value),
227 |                         global_steps
228 |                     )
229 |             else:
230 |                 writer.add_scalars(
231 |                     'valid',
232 |                     dict(name_values),
233 |                     global_steps
234 |                 )
235 |             writer_dict['valid_global_steps'] = global_steps + 1
236 | 
237 |     return perf_indicator
238 | 
239 | 
240 | # markdown format output
241 | def _print_name_value(name_value, full_arch_name):
242 |     names = name_value.keys()
243 |     values = name_value.values()
244 |     num_values = len(name_value)
245 |     logger.info(
246 |         '| Arch ' +
247 |         ' '.join(['| {}'.format(name) for name in names]) +
248 |         ' |'
249 |     )
250 |     logger.info('|---' * (num_values+1) + '|')
251 | 
252 |     if len(full_arch_name) > 15:
253 |         full_arch_name = full_arch_name[:8] + '...'
254 |     logger.info(
255 |         '| ' + full_arch_name + ' ' +
256 |         ' '.join(['| {:.3f}'.format(value) for value in values]) +
257 |          ' |'
258 |     )
259 | 
260 | 
261 | class AverageMeter(object):
262 |     """Computes and stores the average and current value"""
263 |     def __init__(self):
264 |         self.reset()
265 | 
266 |     def reset(self):
267 |         self.val = 0
268 |         self.avg = 0
269 |         self.sum = 0
270 |         self.count = 0
271 | 
272 |     def update(self, val, n=1):
273 |         self.val = val
274 |         self.sum += val * n
275 |         self.count += n
276 |         self.avg = self.sum / self.count if self.count != 0 else 0
277 | 


--------------------------------------------------------------------------------
/lib/models/pose_resnet.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os
 12 | import logging
 13 | 
 14 | import torch
 15 | import torch.nn as nn
 16 | 
 17 | 
 18 | BN_MOMENTUM = 0.1
 19 | logger = logging.getLogger(__name__)
 20 | 
 21 | 
 22 | def conv3x3(in_planes, out_planes, stride=1):
 23 |     """3x3 convolution with padding"""
 24 |     return nn.Conv2d(
 25 |         in_planes, out_planes, kernel_size=3, stride=stride,
 26 |         padding=1, bias=False
 27 |     )
 28 | 
 29 | 
 30 | class BasicBlock(nn.Module):
 31 |     expansion = 1
 32 | 
 33 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 34 |         super(BasicBlock, self).__init__()
 35 |         self.conv1 = conv3x3(inplanes, planes, stride)
 36 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 37 |         self.relu = nn.ReLU(inplace=True)
 38 |         self.conv2 = conv3x3(planes, planes)
 39 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 40 |         self.downsample = downsample
 41 |         self.stride = stride
 42 | 
 43 |     def forward(self, x):
 44 |         residual = x
 45 | 
 46 |         out = self.conv1(x)
 47 |         out = self.bn1(out)
 48 |         out = self.relu(out)
 49 | 
 50 |         out = self.conv2(out)
 51 |         out = self.bn2(out)
 52 | 
 53 |         if self.downsample is not None:
 54 |             residual = self.downsample(x)
 55 | 
 56 |         out += residual
 57 |         out = self.relu(out)
 58 | 
 59 |         return out
 60 | 
 61 | 
 62 | class Bottleneck(nn.Module):
 63 |     expansion = 4
 64 | 
 65 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 66 |         super(Bottleneck, self).__init__()
 67 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 68 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 69 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 70 |                                padding=1, bias=False)
 71 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 72 |         self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
 73 |                                bias=False)
 74 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion,
 75 |                                   momentum=BN_MOMENTUM)
 76 |         self.relu = nn.ReLU(inplace=True)
 77 |         self.downsample = downsample
 78 |         self.stride = stride
 79 | 
 80 |     def forward(self, x):
 81 |         residual = x
 82 | 
 83 |         out = self.conv1(x)
 84 |         out = self.bn1(out)
 85 |         out = self.relu(out)
 86 | 
 87 |         out = self.conv2(out)
 88 |         out = self.bn2(out)
 89 |         out = self.relu(out)
 90 | 
 91 |         out = self.conv3(out)
 92 |         out = self.bn3(out)
 93 | 
 94 |         if self.downsample is not None:
 95 |             residual = self.downsample(x)
 96 | 
 97 |         out += residual
 98 |         out = self.relu(out)
 99 | 
100 |         return out
101 | 
102 | 
103 | class PoseResNet(nn.Module):
104 | 
105 |     def __init__(self, block, layers, cfg, **kwargs):
106 |         self.inplanes = 64
107 |         extra = cfg.MODEL.EXTRA
108 |         self.deconv_with_bias = extra.DECONV_WITH_BIAS
109 | 
110 |         super(PoseResNet, self).__init__()
111 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
112 |                                bias=False)
113 |         self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
114 |         self.relu = nn.ReLU(inplace=True)
115 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
116 |         self.layer1 = self._make_layer(block, 64, layers[0])
117 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
118 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
119 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
120 | 
121 |         # used for deconv layers
122 |         self.deconv_layers = self._make_deconv_layer(
123 |             extra.NUM_DECONV_LAYERS,
124 |             extra.NUM_DECONV_FILTERS,
125 |             extra.NUM_DECONV_KERNELS,
126 |         )
127 | 
128 |         self.final_layer = nn.Conv2d(
129 |             in_channels=extra.NUM_DECONV_FILTERS[-1],
130 |             out_channels=cfg.MODEL.NUM_JOINTS,
131 |             kernel_size=extra.FINAL_CONV_KERNEL,
132 |             stride=1,
133 |             padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0
134 |         )
135 | 
136 |     def _make_layer(self, block, planes, blocks, stride=1):
137 |         downsample = None
138 |         if stride != 1 or self.inplanes != planes * block.expansion:
139 |             downsample = nn.Sequential(
140 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
141 |                           kernel_size=1, stride=stride, bias=False),
142 |                 nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
143 |             )
144 | 
145 |         layers = []
146 |         layers.append(block(self.inplanes, planes, stride, downsample))
147 |         self.inplanes = planes * block.expansion
148 |         for i in range(1, blocks):
149 |             layers.append(block(self.inplanes, planes))
150 | 
151 |         return nn.Sequential(*layers)
152 | 
153 |     def _get_deconv_cfg(self, deconv_kernel, index):
154 |         if deconv_kernel == 4:
155 |             padding = 1
156 |             output_padding = 0
157 |         elif deconv_kernel == 3:
158 |             padding = 1
159 |             output_padding = 1
160 |         elif deconv_kernel == 2:
161 |             padding = 0
162 |             output_padding = 0
163 | 
164 |         return deconv_kernel, padding, output_padding
165 | 
166 |     def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
167 |         assert num_layers == len(num_filters), \
168 |             'ERROR: num_deconv_layers is different len(num_deconv_filters)'
169 |         assert num_layers == len(num_kernels), \
170 |             'ERROR: num_deconv_layers is different len(num_deconv_filters)'
171 | 
172 |         layers = []
173 |         for i in range(num_layers):
174 |             kernel, padding, output_padding = \
175 |                 self._get_deconv_cfg(num_kernels[i], i)
176 | 
177 |             planes = num_filters[i]
178 |             layers.append(
179 |                 nn.ConvTranspose2d(
180 |                     in_channels=self.inplanes,
181 |                     out_channels=planes,
182 |                     kernel_size=kernel,
183 |                     stride=2,
184 |                     padding=padding,
185 |                     output_padding=output_padding,
186 |                     bias=self.deconv_with_bias))
187 |             layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
188 |             layers.append(nn.ReLU(inplace=True))
189 |             self.inplanes = planes
190 | 
191 |         return nn.Sequential(*layers)
192 | 
193 |     def forward(self, x):
194 |         x = self.conv1(x)
195 |         x = self.bn1(x)
196 |         x = self.relu(x)
197 |         x = self.maxpool(x)
198 | 
199 |         x = self.layer1(x)
200 |         x = self.layer2(x)
201 |         x = self.layer3(x)
202 |         x = self.layer4(x)
203 | 
204 |         x = self.deconv_layers(x)
205 |         x = self.final_layer(x)
206 | 
207 |         return x
208 | 
209 |     def init_weights(self, pretrained=''):
210 |         if os.path.isfile(pretrained):
211 |             logger.info('=> init deconv weights from normal distribution')
212 |             for name, m in self.deconv_layers.named_modules():
213 |                 if isinstance(m, nn.ConvTranspose2d):
214 |                     logger.info('=> init {}.weight as normal(0, 0.001)'.format(name))
215 |                     logger.info('=> init {}.bias as 0'.format(name))
216 |                     nn.init.normal_(m.weight, std=0.001)
217 |                     if self.deconv_with_bias:
218 |                         nn.init.constant_(m.bias, 0)
219 |                 elif isinstance(m, nn.BatchNorm2d):
220 |                     logger.info('=> init {}.weight as 1'.format(name))
221 |                     logger.info('=> init {}.bias as 0'.format(name))
222 |                     nn.init.constant_(m.weight, 1)
223 |                     nn.init.constant_(m.bias, 0)
224 |             logger.info('=> init final conv weights from normal distribution')
225 |             for m in self.final_layer.modules():
226 |                 if isinstance(m, nn.Conv2d):
227 |                     # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
228 |                     logger.info('=> init {}.weight as normal(0, 0.001)'.format(name))
229 |                     logger.info('=> init {}.bias as 0'.format(name))
230 |                     nn.init.normal_(m.weight, std=0.001)
231 |                     nn.init.constant_(m.bias, 0)
232 | 
233 |             pretrained_state_dict = torch.load(pretrained)
234 |             logger.info('=> loading pretrained model {}'.format(pretrained))
235 |             self.load_state_dict(pretrained_state_dict, strict=False)
236 |         else:
237 |             logger.info('=> init weights from normal distribution')
238 |             for m in self.modules():
239 |                 if isinstance(m, nn.Conv2d):
240 |                     # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
241 |                     nn.init.normal_(m.weight, std=0.001)
242 |                     # nn.init.constant_(m.bias, 0)
243 |                 elif isinstance(m, nn.BatchNorm2d):
244 |                     nn.init.constant_(m.weight, 1)
245 |                     nn.init.constant_(m.bias, 0)
246 |                 elif isinstance(m, nn.ConvTranspose2d):
247 |                     nn.init.normal_(m.weight, std=0.001)
248 |                     if self.deconv_with_bias:
249 |                         nn.init.constant_(m.bias, 0)
250 | 
251 | 
252 | resnet_spec = {
253 |     18: (BasicBlock, [2, 2, 2, 2]),
254 |     34: (BasicBlock, [3, 4, 6, 3]),
255 |     50: (Bottleneck, [3, 4, 6, 3]),
256 |     101: (Bottleneck, [3, 4, 23, 3]),
257 |     152: (Bottleneck, [3, 8, 36, 3])
258 | }
259 | 
260 | 
261 | def get_pose_net(cfg, is_train, **kwargs):
262 |     num_layers = cfg.MODEL.EXTRA.NUM_LAYERS
263 | 
264 |     block_class, layers = resnet_spec[num_layers]
265 | 
266 |     model = PoseResNet(block_class, layers, cfg, **kwargs)
267 | 
268 |     if is_train and cfg.MODEL.INIT_WEIGHTS:
269 |         model.init_weights(cfg.MODEL.PRETRAINED)
270 | 
271 |     return model
272 | 


--------------------------------------------------------------------------------
/lib/dataset/JointsDataset.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import copy
 12 | import logging
 13 | import random
 14 | 
 15 | import cv2
 16 | import numpy as np
 17 | import torch
 18 | from torch.utils.data import Dataset
 19 | 
 20 | from utils.transforms import get_affine_transform
 21 | from utils.transforms import affine_transform
 22 | from utils.transforms import fliplr_joints
 23 | 
 24 | 
 25 | logger = logging.getLogger(__name__)
 26 | 
 27 | 
 28 | class JointsDataset(Dataset):
 29 |     def __init__(self, cfg, root, image_set, is_train, transform=None):
 30 |         self.num_joints = 0
 31 |         self.pixel_std = 200
 32 |         self.flip_pairs = []
 33 |         self.parent_ids = []
 34 | 
 35 |         self.is_train = is_train
 36 |         self.root = root
 37 |         self.image_set = image_set
 38 | 
 39 |         self.output_path = cfg.OUTPUT_DIR
 40 |         self.data_format = cfg.DATASET.DATA_FORMAT
 41 | 
 42 |         self.scale_factor = cfg.DATASET.SCALE_FACTOR
 43 |         self.rotation_factor = cfg.DATASET.ROT_FACTOR
 44 |         self.flip = cfg.DATASET.FLIP
 45 |         self.num_joints_half_body = cfg.DATASET.NUM_JOINTS_HALF_BODY
 46 |         self.prob_half_body = cfg.DATASET.PROB_HALF_BODY
 47 |         self.color_rgb = cfg.DATASET.COLOR_RGB
 48 | 
 49 |         self.target_type = cfg.MODEL.TARGET_TYPE
 50 |         self.image_size = np.array(cfg.MODEL.IMAGE_SIZE)
 51 |         self.heatmap_size = np.array(cfg.MODEL.HEATMAP_SIZE)
 52 |         self.sigma = cfg.MODEL.SIGMA
 53 |         self.use_different_joints_weight = cfg.LOSS.USE_DIFFERENT_JOINTS_WEIGHT
 54 |         self.joints_weight = 1
 55 | 
 56 |         self.transform = transform
 57 |         self.db = []
 58 | 
 59 |     def _get_db(self):
 60 |         raise NotImplementedError
 61 | 
 62 |     def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
 63 |         raise NotImplementedError
 64 | 
 65 |     def half_body_transform(self, joints, joints_vis):
 66 |         upper_joints = []
 67 |         lower_joints = []
 68 |         for joint_id in range(self.num_joints):
 69 |             if joints_vis[joint_id][0] > 0:
 70 |                 if joint_id in self.upper_body_ids:
 71 |                     upper_joints.append(joints[joint_id])
 72 |                 else:
 73 |                     lower_joints.append(joints[joint_id])
 74 | 
 75 |         if np.random.randn() < 0.5 and len(upper_joints) > 2:
 76 |             selected_joints = upper_joints
 77 |         else:
 78 |             selected_joints = lower_joints \
 79 |                 if len(lower_joints) > 2 else upper_joints
 80 | 
 81 |         if len(selected_joints) < 2:
 82 |             return None, None
 83 | 
 84 |         selected_joints = np.array(selected_joints, dtype=np.float32)
 85 |         center = selected_joints.mean(axis=0)[:2]
 86 | 
 87 |         left_top = np.amin(selected_joints, axis=0)
 88 |         right_bottom = np.amax(selected_joints, axis=0)
 89 | 
 90 |         w = right_bottom[0] - left_top[0]
 91 |         h = right_bottom[1] - left_top[1]
 92 | 
 93 |         if w > self.aspect_ratio * h:
 94 |             h = w * 1.0 / self.aspect_ratio
 95 |         elif w < self.aspect_ratio * h:
 96 |             w = h * self.aspect_ratio
 97 | 
 98 |         scale = np.array(
 99 |             [
100 |                 w * 1.0 / self.pixel_std,
101 |                 h * 1.0 / self.pixel_std
102 |             ],
103 |             dtype=np.float32
104 |         )
105 | 
106 |         scale = scale * 1.5
107 | 
108 |         return center, scale
109 | 
110 |     def __len__(self,):
111 |         return len(self.db)
112 | 
113 |     def __getitem__(self, idx):
114 |         db_rec = copy.deepcopy(self.db[idx])
115 | 
116 |         image_file = db_rec['image']
117 |         filename = db_rec['filename'] if 'filename' in db_rec else ''
118 |         imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''
119 | 
120 |         if self.data_format == 'zip':
121 |             from utils import zipreader
122 |             data_numpy = zipreader.imread(
123 |                 image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION
124 |             )
125 |         else:
126 |             data_numpy = cv2.imread(
127 |                 image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION
128 |             )
129 | 
130 |         if self.color_rgb:
131 |             data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)
132 | 
133 |         if data_numpy is None:
134 |             logger.error('=> fail to read {}'.format(image_file))
135 |             raise ValueError('Fail to read {}'.format(image_file))
136 | 
137 |         joints = db_rec['joints_3d']
138 |         joints_vis = db_rec['joints_3d_vis']
139 | 
140 |         c = db_rec['center']
141 |         s = db_rec['scale']
142 |         score = db_rec['score'] if 'score' in db_rec else 1
143 |         r = 0
144 | 
145 |         if self.is_train:
146 |             if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
147 |                 and np.random.rand() < self.prob_half_body):
148 |                 c_half_body, s_half_body = self.half_body_transform(
149 |                     joints, joints_vis
150 |                 )
151 | 
152 |                 if c_half_body is not None and s_half_body is not None:
153 |                     c, s = c_half_body, s_half_body
154 | 
155 |             sf = self.scale_factor
156 |             rf = self.rotation_factor
157 |             s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
158 |             r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
159 |                 if random.random() <= 0.6 else 0
160 | 
161 |             if self.flip and random.random() <= 0.5:
162 |                 data_numpy = data_numpy[:, ::-1, :]
163 |                 joints, joints_vis = fliplr_joints(
164 |                     joints, joints_vis, data_numpy.shape[1], self.flip_pairs)
165 |                 c[0] = data_numpy.shape[1] - c[0] - 1
166 | 
167 |         trans = get_affine_transform(c, s, r, self.image_size)
168 |         input = cv2.warpAffine(
169 |             data_numpy,
170 |             trans,
171 |             (int(self.image_size[0]), int(self.image_size[1])),
172 |             flags=cv2.INTER_LINEAR)
173 | 
174 |         if self.transform:
175 |             input = self.transform(input)
176 | 
177 |         for i in range(self.num_joints):
178 |             if joints_vis[i, 0] > 0.0:
179 |                 joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
180 | 
181 |         target, target_weight = self.generate_target(joints, joints_vis)
182 | 
183 |         target = torch.from_numpy(target)
184 |         target_weight = torch.from_numpy(target_weight)
185 | 
186 |         meta = {
187 |             'image': image_file,
188 |             'filename': filename,
189 |             'imgnum': imgnum,
190 |             'joints': joints,
191 |             'joints_vis': joints_vis,
192 |             'center': c,
193 |             'scale': s,
194 |             'rotation': r,
195 |             'score': score
196 |         }
197 | 
198 |         return input, target, target_weight, meta
199 | 
200 |     def select_data(self, db):
201 |         db_selected = []
202 |         for rec in db:
203 |             num_vis = 0
204 |             joints_x = 0.0
205 |             joints_y = 0.0
206 |             for joint, joint_vis in zip(
207 |                     rec['joints_3d'], rec['joints_3d_vis']):
208 |                 if joint_vis[0] <= 0:
209 |                     continue
210 |                 num_vis += 1
211 | 
212 |                 joints_x += joint[0]
213 |                 joints_y += joint[1]
214 |             if num_vis == 0:
215 |                 continue
216 | 
217 |             joints_x, joints_y = joints_x / num_vis, joints_y / num_vis
218 | 
219 |             area = rec['scale'][0] * rec['scale'][1] * (self.pixel_std**2)
220 |             joints_center = np.array([joints_x, joints_y])
221 |             bbox_center = np.array(rec['center'])
222 |             diff_norm2 = np.linalg.norm((joints_center-bbox_center), 2)
223 |             ks = np.exp(-1.0*(diff_norm2**2) / ((0.2)**2*2.0*area))
224 | 
225 |             metric = (0.2 / 16) * num_vis + 0.45 - 0.2 / 16
226 |             if ks > metric:
227 |                 db_selected.append(rec)
228 | 
229 |         logger.info('=> num db: {}'.format(len(db)))
230 |         logger.info('=> num selected db: {}'.format(len(db_selected)))
231 |         return db_selected
232 | 
233 |     def generate_target(self, joints, joints_vis):
234 |         '''
235 |         :param joints:  [num_joints, 3]
236 |         :param joints_vis: [num_joints, 3]
237 |         :return: target, target_weight(1: visible, 0: invisible)
238 |         '''
239 |         target_weight = np.ones((self.num_joints, 1), dtype=np.float32)
240 |         target_weight[:, 0] = joints_vis[:, 0]
241 | 
242 |         assert self.target_type == 'gaussian', \
243 |             'Only support gaussian map now!'
244 | 
245 |         if self.target_type == 'gaussian':
246 |             target = np.zeros((self.num_joints,
247 |                                self.heatmap_size[1],
248 |                                self.heatmap_size[0]),
249 |                               dtype=np.float32)
250 | 
251 |             tmp_size = self.sigma * 3
252 | 
253 |             for joint_id in range(self.num_joints):
254 |                 feat_stride = self.image_size / self.heatmap_size
255 |                 mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
256 |                 mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
257 |                 # Check that any part of the gaussian is in-bounds
258 |                 ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
259 |                 br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
260 |                 if ul[0] >= self.heatmap_size[0] or ul[1] >= self.heatmap_size[1] \
261 |                         or br[0] < 0 or br[1] < 0:
262 |                     # If not, just return the image as is
263 |                     target_weight[joint_id] = 0
264 |                     continue
265 | 
266 |                 # # Generate gaussian
267 |                 size = 2 * tmp_size + 1
268 |                 x = np.arange(0, size, 1, np.float32)
269 |                 y = x[:, np.newaxis]
270 |                 x0 = y0 = size // 2
271 |                 # The gaussian is not normalized, we want the center value to equal 1
272 |                 g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * self.sigma ** 2))
273 | 
274 |                 # Usable gaussian range
275 |                 g_x = max(0, -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0]
276 |                 g_y = max(0, -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1]
277 |                 # Image range
278 |                 img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0])
279 |                 img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1])
280 | 
281 |                 v = target_weight[joint_id]
282 |                 if v > 0.5:
283 |                     target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
284 |                         g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
285 | 
286 |         if self.use_different_joints_weight:
287 |             target_weight = np.multiply(target_weight, self.joints_weight)
288 | 
289 |         return target, target_weight
290 | 


--------------------------------------------------------------------------------
/demo/inference.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import argparse
  6 | import csv
  7 | import os
  8 | import shutil
  9 | 
 10 | from PIL import Image
 11 | import torch
 12 | import torch.nn.parallel
 13 | import torch.backends.cudnn as cudnn
 14 | import torch.optim
 15 | import torch.utils.data
 16 | import torch.utils.data.distributed
 17 | import torchvision.transforms as transforms
 18 | import torchvision
 19 | import cv2
 20 | import numpy as np
 21 | 
 22 | import sys
 23 | sys.path.append("../lib")
 24 | import time
 25 | 
 26 | # import _init_paths
 27 | import models
 28 | from config import cfg
 29 | from config import update_config
 30 | from core.inference import get_final_preds
 31 | from utils.transforms import get_affine_transform
 32 | 
 33 | CTX = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
 34 | 
 35 | 
 36 | COCO_KEYPOINT_INDEXES = {
 37 |     0: 'nose',
 38 |     1: 'left_eye',
 39 |     2: 'right_eye',
 40 |     3: 'left_ear',
 41 |     4: 'right_ear',
 42 |     5: 'left_shoulder',
 43 |     6: 'right_shoulder',
 44 |     7: 'left_elbow',
 45 |     8: 'right_elbow',
 46 |     9: 'left_wrist',
 47 |     10: 'right_wrist',
 48 |     11: 'left_hip',
 49 |     12: 'right_hip',
 50 |     13: 'left_knee',
 51 |     14: 'right_knee',
 52 |     15: 'left_ankle',
 53 |     16: 'right_ankle'
 54 | }
 55 | 
 56 | COCO_INSTANCE_CATEGORY_NAMES = [
 57 |     '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
 58 |     'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
 59 |     'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
 60 |     'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
 61 |     'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
 62 |     'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
 63 |     'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
 64 |     'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
 65 |     'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
 66 |     'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
 67 |     'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
 68 |     'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
 69 | ]
 70 | 
 71 | 
 72 | def get_person_detection_boxes(model, img, threshold=0.5):
 73 |     pil_image = Image.fromarray(img)  # Load the image
 74 |     transform = transforms.Compose([transforms.ToTensor()])  # Defing PyTorch Transform
 75 |     transformed_img = transform(pil_image)  # Apply the transform to the image
 76 |     pred = model([transformed_img.to(CTX)])  # Pass the image to the model
 77 |     # Use the first detected person
 78 |     pred_classes = [COCO_INSTANCE_CATEGORY_NAMES[i]
 79 |                     for i in list(pred[0]['labels'].cpu().numpy())]  # Get the Prediction Score
 80 |     pred_boxes = [[(i[0], i[1]), (i[2], i[3])]
 81 |                   for i in list(pred[0]['boxes'].cpu().detach().numpy())]  # Bounding boxes
 82 |     pred_scores = list(pred[0]['scores'].cpu().detach().numpy())
 83 | 
 84 |     person_boxes = []
 85 |     # Select box has score larger than threshold and is person
 86 |     for pred_class, pred_box, pred_score in zip(pred_classes, pred_boxes, pred_scores):
 87 |         if (pred_score > threshold) and (pred_class == 'person'):
 88 |             person_boxes.append(pred_box)
 89 | 
 90 |     return person_boxes
 91 | 
 92 | 
 93 | def get_pose_estimation_prediction(pose_model, image, centers, scales, transform):
 94 |     rotation = 0
 95 | 
 96 |     # pose estimation transformation
 97 |     model_inputs = []
 98 |     for center, scale in zip(centers, scales):
 99 |         trans = get_affine_transform(center, scale, rotation, cfg.MODEL.IMAGE_SIZE)
100 |         # Crop smaller image of people
101 |         model_input = cv2.warpAffine(
102 |             image,
103 |             trans,
104 |             (int(cfg.MODEL.IMAGE_SIZE[0]), int(cfg.MODEL.IMAGE_SIZE[1])),
105 |             flags=cv2.INTER_LINEAR)
106 | 
107 |         # hwc -> 1chw
108 |         model_input = transform(model_input)#.unsqueeze(0)
109 |         model_inputs.append(model_input)
110 | 
111 |     # n * 1chw -> nchw
112 |     model_inputs = torch.stack(model_inputs)
113 | 
114 |     # compute output heatmap
115 |     output = pose_model(model_inputs.to(CTX))
116 |     coords, _ = get_final_preds(
117 |         cfg,
118 |         output.cpu().detach().numpy(),
119 |         np.asarray(centers),
120 |         np.asarray(scales))
121 | 
122 |     return coords
123 | 
124 | 
125 | def box_to_center_scale(box, model_image_width, model_image_height):
126 |     """convert a box to center,scale information required for pose transformation
127 |     Parameters
128 |     ----------
129 |     box : list of tuple
130 |         list of length 2 with two tuples of floats representing
131 |         bottom left and top right corner of a box
132 |     model_image_width : int
133 |     model_image_height : int
134 | 
135 |     Returns
136 |     -------
137 |     (numpy array, numpy array)
138 |         Two numpy arrays, coordinates for the center of the box and the scale of the box
139 |     """
140 |     center = np.zeros((2), dtype=np.float32)
141 | 
142 |     bottom_left_corner = box[0]
143 |     top_right_corner = box[1]
144 |     box_width = top_right_corner[0]-bottom_left_corner[0]
145 |     box_height = top_right_corner[1]-bottom_left_corner[1]
146 |     bottom_left_x = bottom_left_corner[0]
147 |     bottom_left_y = bottom_left_corner[1]
148 |     center[0] = bottom_left_x + box_width * 0.5
149 |     center[1] = bottom_left_y + box_height * 0.5
150 | 
151 |     aspect_ratio = model_image_width * 1.0 / model_image_height
152 |     pixel_std = 200
153 | 
154 |     if box_width > aspect_ratio * box_height:
155 |         box_height = box_width * 1.0 / aspect_ratio
156 |     elif box_width < aspect_ratio * box_height:
157 |         box_width = box_height * aspect_ratio
158 |     scale = np.array(
159 |         [box_width * 1.0 / pixel_std, box_height * 1.0 / pixel_std],
160 |         dtype=np.float32)
161 |     if center[0] != -1:
162 |         scale = scale * 1.25
163 | 
164 |     return center, scale
165 | 
166 | 
167 | def prepare_output_dirs(prefix='/output/'):
168 |     pose_dir = os.path.join(prefix, "pose")
169 |     if os.path.exists(pose_dir) and os.path.isdir(pose_dir):
170 |         shutil.rmtree(pose_dir)
171 |     os.makedirs(pose_dir, exist_ok=True)
172 |     return pose_dir
173 | 
174 | 
175 | def parse_args():
176 |     parser = argparse.ArgumentParser(description='Train keypoints network')
177 |     # general
178 |     parser.add_argument('--cfg', type=str, required=True)
179 |     parser.add_argument('--videoFile', type=str, required=True)
180 |     parser.add_argument('--outputDir', type=str, default='/output/')
181 |     parser.add_argument('--inferenceFps', type=int, default=10)
182 |     parser.add_argument('--writeBoxFrames', action='store_true')
183 | 
184 |     parser.add_argument('opts',
185 |                         help='Modify config options using the command-line',
186 |                         default=None,
187 |                         nargs=argparse.REMAINDER)
188 | 
189 |     args = parser.parse_args()
190 | 
191 |     # args expected by supporting codebase
192 |     args.modelDir = ''
193 |     args.logDir = ''
194 |     args.dataDir = ''
195 |     args.prevModelDir = ''
196 |     return args
197 | 
198 | 
199 | def main():
200 |     # transformation
201 |     pose_transform = transforms.Compose([
202 |         transforms.ToTensor(),
203 |         transforms.Normalize(mean=[0.485, 0.456, 0.406],
204 |                              std=[0.229, 0.224, 0.225]),
205 |     ])
206 | 
207 |     # cudnn related setting
208 |     cudnn.benchmark = cfg.CUDNN.BENCHMARK
209 |     torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
210 |     torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
211 | 
212 |     args = parse_args()
213 |     update_config(cfg, args)
214 |     pose_dir = prepare_output_dirs(args.outputDir)
215 |     csv_output_rows = []
216 | 
217 |     box_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
218 |     box_model.to(CTX)
219 |     box_model.eval()
220 |     pose_model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
221 |         cfg, is_train=False
222 |     )
223 | 
224 |     if cfg.TEST.MODEL_FILE:
225 |         print('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
226 |         pose_model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
227 |     else:
228 |         print('expected model defined in config at TEST.MODEL_FILE')
229 | 
230 |     pose_model.to(CTX)
231 |     pose_model.eval()
232 | 
233 |     # Loading an video
234 |     vidcap = cv2.VideoCapture(args.videoFile)
235 |     fps = vidcap.get(cv2.CAP_PROP_FPS)
236 |     if fps < args.inferenceFps:
237 |         print('desired inference fps is '+str(args.inferenceFps)+' but video fps is '+str(fps))
238 |         exit()
239 |     skip_frame_cnt = round(fps / args.inferenceFps)
240 |     frame_width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))
241 |     frame_height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
242 |     outcap = cv2.VideoWriter('{}/{}_pose.avi'.format(args.outputDir, os.path.splitext(os.path.basename(args.videoFile))[0]),
243 |                              cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), int(skip_frame_cnt), (frame_width, frame_height))
244 | 
245 |     count = 0
246 |     while vidcap.isOpened():
247 |         total_now = time.time()
248 |         ret, image_bgr = vidcap.read()
249 |         count += 1
250 | 
251 |         if not ret:
252 |             continue
253 | 
254 |         if count % skip_frame_cnt != 0:
255 |             continue
256 | 
257 |         image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
258 | 
259 |         # Clone 2 image for person detection and pose estimation
260 |         if cfg.DATASET.COLOR_RGB:
261 |             image_per = image_rgb.copy()
262 |             image_pose = image_rgb.copy()
263 |         else:
264 |             image_per = image_bgr.copy()
265 |             image_pose = image_bgr.copy()
266 | 
267 |         # Clone 1 image for debugging purpose
268 |         image_debug = image_bgr.copy()
269 | 
270 |         # object detection box
271 |         now = time.time()
272 |         pred_boxes = get_person_detection_boxes(box_model, image_per, threshold=0.9)
273 |         then = time.time()
274 |         print("Find person bbox in: {} sec".format(then - now))
275 | 
276 |         # Can not find people. Move to next frame
277 |         if not pred_boxes:
278 |             count += 1
279 |             continue
280 | 
281 |         if args.writeBoxFrames:
282 |             for box in pred_boxes:
283 |                 cv2.rectangle(image_debug, box[0], box[1], color=(0, 255, 0),
284 |                               thickness=3)  # Draw Rectangle with the coordinates
285 | 
286 |         # pose estimation : for multiple people
287 |         centers = []
288 |         scales = []
289 |         for box in pred_boxes:
290 |             center, scale = box_to_center_scale(box, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[1])
291 |             centers.append(center)
292 |             scales.append(scale)
293 | 
294 |         now = time.time()
295 |         pose_preds = get_pose_estimation_prediction(pose_model, image_pose, centers, scales, transform=pose_transform)
296 |         then = time.time()
297 |         print("Find person pose in: {} sec".format(then - now))
298 | 
299 |         new_csv_row = []
300 |         for coords in pose_preds:
301 |             # Draw each point on image
302 |             for coord in coords:
303 |                 x_coord, y_coord = int(coord[0]), int(coord[1])
304 |                 cv2.circle(image_debug, (x_coord, y_coord), 4, (255, 0, 0), 2)
305 |                 new_csv_row.extend([x_coord, y_coord])
306 | 
307 |         total_then = time.time()
308 | 
309 |         text = "{:03.2f} sec".format(total_then - total_now)
310 |         cv2.putText(image_debug, text, (100, 50), cv2.FONT_HERSHEY_SIMPLEX,
311 |                             1, (0, 0, 255), 2, cv2.LINE_AA)
312 | 
313 |         cv2.imshow("pos", image_debug)
314 |         if cv2.waitKey(1) & 0xFF == ord('q'):
315 |             break
316 | 
317 |         csv_output_rows.append(new_csv_row)
318 |         img_file = os.path.join(pose_dir, 'pose_{:08d}.jpg'.format(count))
319 |         cv2.imwrite(img_file, image_debug)
320 |         outcap.write(image_debug)
321 | 
322 | 
323 |     # write csv
324 |     csv_headers = ['frame']
325 |     for keypoint in COCO_KEYPOINT_INDEXES.values():
326 |         csv_headers.extend([keypoint+'_x', keypoint+'_y'])
327 | 
328 |     csv_output_filename = os.path.join(args.outputDir, 'pose-data.csv')
329 |     with open(csv_output_filename, 'w', newline='') as csvfile:
330 |         csvwriter = csv.writer(csvfile)
331 |         csvwriter.writerow(csv_headers)
332 |         csvwriter.writerows(csv_output_rows)
333 | 
334 |     vidcap.release()
335 |     outcap.release()
336 | 
337 |     cv2.destroyAllWindows()
338 | 
339 | 
340 | if __name__ == '__main__':
341 |     main()
342 | 


--------------------------------------------------------------------------------
/visualization/plot_coco.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
  5 | # Modified by Depu Meng (mdp@mail.ustc.edu.cn)
  6 | # ------------------------------------------------------------------------------
  7 | 
  8 | import argparse
  9 | import numpy as np
 10 | import matplotlib.pyplot as plt
 11 | import cv2
 12 | import json
 13 | import matplotlib.lines as mlines
 14 | import matplotlib.patches as mpatches
 15 | from pycocotools.coco import COCO
 16 | from pycocotools.cocoeval import COCOeval
 17 | import os
 18 | 
 19 | 
 20 | class ColorStyle:
 21 |     def __init__(self, color, link_pairs, point_color):
 22 |         self.color = color
 23 |         self.link_pairs = link_pairs
 24 |         self.point_color = point_color
 25 | 
 26 |         for i in range(len(self.color)):
 27 |             self.link_pairs[i].append(tuple(np.array(self.color[i])/255.))
 28 | 
 29 |         self.ring_color = []
 30 |         for i in range(len(self.point_color)):
 31 |             self.ring_color.append(tuple(np.array(self.point_color[i])/255.))
 32 |         
 33 | # Xiaochu Style
 34 | # (R,G,B)
 35 | color1 = [(179,0,0),(228,26,28),(255,255,51),
 36 |     (49,163,84), (0,109,45), (255,255,51),
 37 |     (240,2,127),(240,2,127),(240,2,127), (240,2,127), (240,2,127), 
 38 |     (217,95,14), (254,153,41),(255,255,51),
 39 |     (44,127,184),(0,0,255)]
 40 | 
 41 | link_pairs1 = [
 42 |         [15, 13], [13, 11], [11, 5], 
 43 |         [12, 14], [14, 16], [12, 6], 
 44 |         [3, 1],[1, 2],[1, 0],[0, 2],[2,4],
 45 |         [9, 7], [7,5], [5, 6],
 46 |         [6, 8], [8, 10],
 47 |         ]
 48 | 
 49 | point_color1 = [(240,2,127),(240,2,127),(240,2,127), 
 50 |             (240,2,127), (240,2,127), 
 51 |             (255,255,51),(255,255,51),
 52 |             (254,153,41),(44,127,184),
 53 |             (217,95,14),(0,0,255),
 54 |             (255,255,51),(255,255,51),(228,26,28),
 55 |             (49,163,84),(252,176,243),(0,176,240),
 56 |             (255,255,0),(169, 209, 142),
 57 |             (255,255,0),(169, 209, 142),
 58 |             (255,255,0),(169, 209, 142)]
 59 | 
 60 | xiaochu_style = ColorStyle(color1, link_pairs1, point_color1)
 61 | 
 62 | 
 63 | # Chunhua Style
 64 | # (R,G,B)
 65 | color2 = [(252,176,243),(252,176,243),(252,176,243),
 66 |     (0,176,240), (0,176,240), (0,176,240),
 67 |     (240,2,127),(240,2,127),(240,2,127), (240,2,127), (240,2,127), 
 68 |     (255,255,0), (255,255,0),(169, 209, 142),
 69 |     (169, 209, 142),(169, 209, 142)]
 70 | 
 71 | link_pairs2 = [
 72 |         [15, 13], [13, 11], [11, 5], 
 73 |         [12, 14], [14, 16], [12, 6], 
 74 |         [3, 1],[1, 2],[1, 0],[0, 2],[2,4],
 75 |         [9, 7], [7,5], [5, 6], [6, 8], [8, 10],
 76 |         ]
 77 | 
 78 | point_color2 = [(240,2,127),(240,2,127),(240,2,127), 
 79 |             (240,2,127), (240,2,127), 
 80 |             (255,255,0),(169, 209, 142),
 81 |             (255,255,0),(169, 209, 142),
 82 |             (255,255,0),(169, 209, 142),
 83 |             (252,176,243),(0,176,240),(252,176,243),
 84 |             (0,176,240),(252,176,243),(0,176,240),
 85 |             (255,255,0),(169, 209, 142),
 86 |             (255,255,0),(169, 209, 142),
 87 |             (255,255,0),(169, 209, 142)]
 88 | 
 89 | chunhua_style = ColorStyle(color2, link_pairs2, point_color2)
 90 | 
 91 | def parse_args():
 92 |     parser = argparse.ArgumentParser(description='Visualize COCO predictions')
 93 |     # general
 94 |     parser.add_argument('--image-path',
 95 |                         help='Path of COCO val images',
 96 |                         type=str,
 97 |                         default='data/coco/images/val2017/'
 98 |                         )
 99 | 
100 |     parser.add_argument('--gt-anno',
101 |                         help='Path of COCO val annotation',
102 |                         type=str,
103 |                         default='data/coco/annotations/person_keypoints_val2017.json'
104 |                         )
105 | 
106 |     parser.add_argument('--save-path',
107 |                         help="Path to save the visualizations",
108 |                         type=str,
109 |                         default='visualization/coco/')
110 | 
111 |     parser.add_argument('--prediction',
112 |                         help="Prediction file to visualize",
113 |                         type=str,
114 |                         required=True)
115 | 
116 |     parser.add_argument('--style',
117 |                         help="Style of the visualization: Chunhua style or Xiaochu style",
118 |                         type=str,
119 |                         default='chunhua')
120 | 
121 |     args = parser.parse_args()
122 | 
123 |     return args
124 | 
125 | 
126 | def map_joint_dict(joints):
127 |     joints_dict = {}
128 |     for i in range(joints.shape[0]):
129 |         x = int(joints[i][0])
130 |         y = int(joints[i][1])
131 |         id = i
132 |         joints_dict[id] = (x, y)
133 |         
134 |     return joints_dict
135 | 
136 | def plot(data, gt_file, img_path, save_path, 
137 |          link_pairs, ring_color, save=True):
138 |     
139 |     # joints
140 |     coco = COCO(gt_file)
141 |     coco_dt = coco.loadRes(data)
142 |     coco_eval = COCOeval(coco, coco_dt, 'keypoints')
143 |     coco_eval._prepare()
144 |     gts_ = coco_eval._gts
145 |     dts_ = coco_eval._dts
146 |     
147 |     p = coco_eval.params
148 |     p.imgIds = list(np.unique(p.imgIds))
149 |     if p.useCats:
150 |         p.catIds = list(np.unique(p.catIds))
151 |     p.maxDets = sorted(p.maxDets)
152 | 
153 |     # loop through images, area range, max detection number
154 |     catIds = p.catIds if p.useCats else [-1]
155 |     threshold = 0.3
156 |     joint_thres = 0.2
157 |     for catId in catIds:
158 |         for imgId in p.imgIds[:5000]:
159 |             # dimention here should be Nxm
160 |             gts = gts_[imgId, catId]
161 |             dts = dts_[imgId, catId]
162 |             inds = np.argsort([-d['score'] for d in dts], kind='mergesort')
163 |             dts = [dts[i] for i in inds]
164 |             if len(dts) > p.maxDets[-1]:
165 |                 dts = dts[0:p.maxDets[-1]]
166 |             if len(gts) == 0 or len(dts) == 0:
167 |                 continue
168 |             
169 |             sum_score = 0
170 |             num_box = 0
171 |             img_name = str(imgId).zfill(12)
172 |             
173 |             # Read Images
174 |             img_file = img_path + img_name + '.jpg'
175 |             data_numpy = cv2.imread(img_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
176 |             h = data_numpy.shape[0]
177 |             w = data_numpy.shape[1]
178 |             
179 |             # Plot
180 |             fig = plt.figure(figsize=(w/100, h/100), dpi=100)
181 |             ax = plt.subplot(1,1,1)
182 |             bk = plt.imshow(data_numpy[:,:,::-1])
183 |             bk.set_zorder(-1)
184 |             print(img_name)
185 |             for j, gt in enumerate(gts):
186 |                 # matching dt_box and gt_box
187 |                 bb = gt['bbox']
188 |                 x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2
189 |                 y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2
190 | 
191 |                 # create bounds for ignore regions(double the gt bbox)
192 |                 g = np.array(gt['keypoints'])
193 |                 #xg = g[0::3]; yg = g[1::3]; 
194 |                 vg = g[2::3]     
195 |             
196 |                 for i, dt in enumerate(dts):
197 |                     # Calculate IoU
198 |                     dt_bb = dt['bbox']
199 |                     dt_x0 = dt_bb[0] - dt_bb[2]; dt_x1 = dt_bb[0] + dt_bb[2] * 2
200 |                     dt_y0 = dt_bb[1] - dt_bb[3]; dt_y1 = dt_bb[1] + dt_bb[3] * 2          
201 |                     
202 |                     ol_x = min(x1, dt_x1) - max(x0, dt_x0)
203 |                     ol_y = min(y1, dt_y1) - max(y0, dt_y0)
204 |                     ol_area = ol_x * ol_y
205 |                     s_x = max(x1, dt_x1) - min(x0, dt_x0)
206 |                     s_y = max(y1, dt_y1) - min(y0, dt_y0)
207 |                     sum_area = s_x * s_y
208 |                     iou = ol_area / (sum_area + np.spacing(1))                    
209 |                     score = dt['score']
210 |                     
211 |                     if iou < 0.1 or score < threshold:
212 |                         continue
213 |                     else:
214 |                         print('iou: ', iou)
215 |                         dt_w = dt_x1 - dt_x0
216 |                         dt_h = dt_y1 - dt_y0
217 |                         ref = min(dt_w, dt_h)
218 |                         num_box += 1
219 |                         sum_score += dt['score']
220 |                         dt_joints = np.array(dt['keypoints']).reshape(17,-1)
221 |                         joints_dict = map_joint_dict(dt_joints)
222 |                         
223 |                         # stick 
224 |                         for k, link_pair in enumerate(link_pairs):
225 |                             if link_pair[0] in joints_dict \
226 |                             and link_pair[1] in joints_dict:
227 |                                 if dt_joints[link_pair[0],2] < joint_thres \
228 |                                     or dt_joints[link_pair[1],2] < joint_thres \
229 |                                     or vg[link_pair[0]] == 0 \
230 |                                     or vg[link_pair[1]] == 0:
231 |                                     continue
232 |                             if k in range(6,11):
233 |                                 lw = 1
234 |                             else:
235 |                                 lw = ref / 100.
236 |                             line = mlines.Line2D(
237 |                                     np.array([joints_dict[link_pair[0]][0],
238 |                                               joints_dict[link_pair[1]][0]]),
239 |                                     np.array([joints_dict[link_pair[0]][1],
240 |                                               joints_dict[link_pair[1]][1]]),
241 |                                     ls='-', lw=lw, alpha=1, color=link_pair[2],)
242 |                             line.set_zorder(0)
243 |                             ax.add_line(line)
244 |                         # black ring
245 |                         for k in range(dt_joints.shape[0]):
246 |                             if dt_joints[k,2] < joint_thres \
247 |                                 or vg[link_pair[0]] == 0 \
248 |                                 or vg[link_pair[1]] == 0:
249 |                                 continue
250 |                             if dt_joints[k,0] > w or dt_joints[k,1] > h:
251 |                                 continue
252 |                             if k in range(5):
253 |                                 radius = 1
254 |                             else:
255 |                                 radius = ref / 100
256 |                     
257 |                             circle = mpatches.Circle(tuple(dt_joints[k,:2]), 
258 |                                                      radius=radius, 
259 |                                                      ec='black', 
260 |                                                      fc=ring_color[k], 
261 |                                                      alpha=1, 
262 |                                                      linewidth=1)
263 |                             circle.set_zorder(1)
264 |                             ax.add_patch(circle)
265 |         
266 |             avg_score = (sum_score / (num_box+np.spacing(1)))*1000
267 |         
268 |             plt.gca().xaxis.set_major_locator(plt.NullLocator())
269 |             plt.gca().yaxis.set_major_locator(plt.NullLocator())
270 |             plt.axis('off')
271 |             plt.subplots_adjust(top=1,bottom=0,left=0,right=1,hspace=0,wspace=0)        
272 |             plt.margins(0,0)
273 |             if save:
274 |                 plt.savefig(save_path + \
275 |                            'score_'+str(np.int(avg_score))+ \
276 |                            '_id_'+str(imgId)+ \
277 |                            '_'+img_name + '.png', 
278 |                            format='png', bbox_inckes='tight', dpi=100)
279 |                 plt.savefig(save_path +'id_'+str(imgId)+ '.pdf', format='pdf', 
280 |                             bbox_inckes='tight', dpi=100)
281 |             # plt.show()
282 |             plt.close()
283 | 
284 | if __name__ == '__main__':
285 | 
286 |     args = parse_args()
287 |     if args.style == 'xiaochu':
288 |         # Xiaochu Style
289 |         colorstyle = xiaochu_style
290 |     elif args.style == 'chunhua':
291 |         # Chunhua Style
292 |         colorstyle = chunhua_style
293 |     else:
294 |         raise Exception('Invalid color style')
295 |     
296 |     save_path = args.save_path
297 |     img_path = args.image_path
298 |     if not os.path.exists(save_path):
299 |         try:
300 |             os.makedirs(save_path)
301 |         except Exception:
302 |             print('Fail to make {}'.format(save_path))
303 | 
304 |     
305 |     with open(args.prediction) as f:
306 |         data = json.load(f)
307 |     gt_file = args.gt_anno
308 |     plot(data, gt_file, img_path, save_path, colorstyle.link_pairs, colorstyle.ring_color, save=True)
309 | 
310 | 


--------------------------------------------------------------------------------
/demo/demo.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import argparse
  6 | import csv
  7 | import os
  8 | import shutil
  9 | 
 10 | from PIL import Image
 11 | import torch
 12 | import torch.nn.parallel
 13 | import torch.backends.cudnn as cudnn
 14 | import torch.optim
 15 | import torch.utils.data
 16 | import torch.utils.data.distributed
 17 | import torchvision.transforms as transforms
 18 | import torchvision
 19 | import cv2
 20 | import numpy as np
 21 | import time
 22 | 
 23 | 
 24 | import _init_paths
 25 | import models
 26 | from config import cfg
 27 | from config import update_config
 28 | from core.function import get_final_preds
 29 | from utils.transforms import get_affine_transform
 30 | 
 31 | COCO_KEYPOINT_INDEXES = {
 32 |     0: 'nose',
 33 |     1: 'left_eye',
 34 |     2: 'right_eye',
 35 |     3: 'left_ear',
 36 |     4: 'right_ear',
 37 |     5: 'left_shoulder',
 38 |     6: 'right_shoulder',
 39 |     7: 'left_elbow',
 40 |     8: 'right_elbow',
 41 |     9: 'left_wrist',
 42 |     10: 'right_wrist',
 43 |     11: 'left_hip',
 44 |     12: 'right_hip',
 45 |     13: 'left_knee',
 46 |     14: 'right_knee',
 47 |     15: 'left_ankle',
 48 |     16: 'right_ankle'
 49 | }
 50 | 
 51 | COCO_INSTANCE_CATEGORY_NAMES = [
 52 |     '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
 53 |     'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
 54 |     'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
 55 |     'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
 56 |     'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
 57 |     'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
 58 |     'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
 59 |     'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
 60 |     'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
 61 |     'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
 62 |     'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
 63 |     'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
 64 | ]
 65 | 
 66 | SKELETON = [
 67 |     [1,3],[1,0],[2,4],[2,0],[0,5],[0,6],[5,7],[7,9],[6,8],[8,10],[5,11],[6,12],[11,12],[11,13],[13,15],[12,14],[14,16]
 68 | ]
 69 | 
 70 | CocoColors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0],
 71 |               [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255],
 72 |               [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
 73 | 
 74 | NUM_KPTS = 17
 75 | 
 76 | CTX = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
 77 | 
 78 | def draw_pose(keypoints,img):
 79 |     """draw the keypoints and the skeletons.
 80 |     :params keypoints: the shape should be equal to [17,2]
 81 |     :params img:
 82 |     """
 83 |     assert keypoints.shape == (NUM_KPTS,2)
 84 |     for i in range(len(SKELETON)):
 85 |         kpt_a, kpt_b = SKELETON[i][0], SKELETON[i][1]
 86 |         x_a, y_a = keypoints[kpt_a][0],keypoints[kpt_a][1]
 87 |         x_b, y_b = keypoints[kpt_b][0],keypoints[kpt_b][1] 
 88 |         cv2.circle(img, (int(x_a), int(y_a)), 6, CocoColors[i], -1)
 89 |         cv2.circle(img, (int(x_b), int(y_b)), 6, CocoColors[i], -1)
 90 |         cv2.line(img, (int(x_a), int(y_a)), (int(x_b), int(y_b)), CocoColors[i], 2)
 91 | 
 92 | def draw_bbox(box,img):
 93 |     """draw the detected bounding box on the image.
 94 |     :param img:
 95 |     """
 96 |     cv2.rectangle(img, box[0], box[1], color=(0, 255, 0),thickness=3)
 97 | 
 98 | 
 99 | def get_person_detection_boxes(model, img, threshold=0.5):
100 |     pred = model(img)
101 |     pred_classes = [COCO_INSTANCE_CATEGORY_NAMES[i]
102 |                     for i in list(pred[0]['labels'].cpu().numpy())]  # Get the Prediction Score
103 |     pred_boxes = [[(i[0], i[1]), (i[2], i[3])]
104 |                   for i in list(pred[0]['boxes'].detach().cpu().numpy())]  # Bounding boxes
105 |     pred_score = list(pred[0]['scores'].detach().cpu().numpy())
106 |     if not pred_score or max(pred_score)<threshold:
107 |         return []
108 |     # Get list of index with score greater than threshold
109 |     pred_t = [pred_score.index(x) for x in pred_score if x > threshold][-1]
110 |     pred_boxes = pred_boxes[:pred_t+1]
111 |     pred_classes = pred_classes[:pred_t+1]
112 | 
113 |     person_boxes = []
114 |     for idx, box in enumerate(pred_boxes):
115 |         if pred_classes[idx] == 'person':
116 |             person_boxes.append(box)
117 | 
118 |     return person_boxes
119 | 
120 | 
121 | def get_pose_estimation_prediction(pose_model, image, center, scale):
122 |     rotation = 0
123 | 
124 |     # pose estimation transformation
125 |     trans = get_affine_transform(center, scale, rotation, cfg.MODEL.IMAGE_SIZE)
126 |     model_input = cv2.warpAffine(
127 |         image,
128 |         trans,
129 |         (int(cfg.MODEL.IMAGE_SIZE[0]), int(cfg.MODEL.IMAGE_SIZE[1])),
130 |         flags=cv2.INTER_LINEAR)
131 |     transform = transforms.Compose([
132 |         transforms.ToTensor(),
133 |         transforms.Normalize(mean=[0.485, 0.456, 0.406],
134 |                              std=[0.229, 0.224, 0.225]),
135 |     ])
136 | 
137 |     # pose estimation inference
138 |     model_input = transform(model_input).unsqueeze(0)
139 |     # switch to evaluate mode
140 |     pose_model.eval()
141 |     with torch.no_grad():
142 |         # compute output heatmap
143 |         output = pose_model(model_input)
144 |         preds, _ = get_final_preds(
145 |             cfg,
146 |             output.clone().cpu().numpy(),
147 |             np.asarray([center]),
148 |             np.asarray([scale]))
149 | 
150 |         return preds
151 | 
152 | 
153 | def box_to_center_scale(box, model_image_width, model_image_height):
154 |     """convert a box to center,scale information required for pose transformation
155 |     Parameters
156 |     ----------
157 |     box : list of tuple
158 |         list of length 2 with two tuples of floats representing
159 |         bottom left and top right corner of a box
160 |     model_image_width : int
161 |     model_image_height : int
162 | 
163 |     Returns
164 |     -------
165 |     (numpy array, numpy array)
166 |         Two numpy arrays, coordinates for the center of the box and the scale of the box
167 |     """
168 |     center = np.zeros((2), dtype=np.float32)
169 | 
170 |     bottom_left_corner = box[0]
171 |     top_right_corner = box[1]
172 |     box_width = top_right_corner[0]-bottom_left_corner[0]
173 |     box_height = top_right_corner[1]-bottom_left_corner[1]
174 |     bottom_left_x = bottom_left_corner[0]
175 |     bottom_left_y = bottom_left_corner[1]
176 |     center[0] = bottom_left_x + box_width * 0.5
177 |     center[1] = bottom_left_y + box_height * 0.5
178 | 
179 |     aspect_ratio = model_image_width * 1.0 / model_image_height
180 |     pixel_std = 200
181 | 
182 |     if box_width > aspect_ratio * box_height:
183 |         box_height = box_width * 1.0 / aspect_ratio
184 |     elif box_width < aspect_ratio * box_height:
185 |         box_width = box_height * aspect_ratio
186 |     scale = np.array(
187 |         [box_width * 1.0 / pixel_std, box_height * 1.0 / pixel_std],
188 |         dtype=np.float32)
189 |     if center[0] != -1:
190 |         scale = scale * 1.25
191 | 
192 |     return center, scale
193 | 
194 | def parse_args():
195 |     parser = argparse.ArgumentParser(description='Train keypoints network')
196 |     # general
197 |     parser.add_argument('--cfg', type=str, default='demo/inference-config.yaml')
198 |     parser.add_argument('--video', type=str)
199 |     parser.add_argument('--webcam',action='store_true')
200 |     parser.add_argument('--image',type=str)
201 |     parser.add_argument('--write',action='store_true')
202 |     parser.add_argument('--showFps',action='store_true')
203 | 
204 |     parser.add_argument('opts',
205 |                         help='Modify config options using the command-line',
206 |                         default=None,
207 |                         nargs=argparse.REMAINDER)
208 | 
209 |     args = parser.parse_args()
210 | 
211 |     # args expected by supporting codebase  
212 |     args.modelDir = ''
213 |     args.logDir = ''
214 |     args.dataDir = ''
215 |     args.prevModelDir = ''
216 |     return args
217 | 
218 | 
219 | def main():
220 |     # cudnn related setting
221 |     cudnn.benchmark = cfg.CUDNN.BENCHMARK
222 |     torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
223 |     torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
224 | 
225 |     args = parse_args()
226 |     update_config(cfg, args)
227 | 
228 |     box_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
229 |     box_model.to(CTX)
230 |     box_model.eval()
231 | 
232 |     pose_model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
233 |         cfg, is_train=False
234 |     )
235 | 
236 |     if cfg.TEST.MODEL_FILE:
237 |         print('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
238 |         pose_model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
239 |     else:
240 |         print('expected model defined in config at TEST.MODEL_FILE')
241 | 
242 |     pose_model = torch.nn.DataParallel(pose_model, device_ids=cfg.GPUS)
243 |     pose_model.to(CTX)
244 |     pose_model.eval()
245 | 
246 |     # Loading an video or an image or webcam 
247 |     if args.webcam:
248 |         vidcap = cv2.VideoCapture(0)
249 |     elif args.video:
250 |         vidcap = cv2.VideoCapture(args.video)
251 |     elif args.image:
252 |         image_bgr = cv2.imread(args.image)
253 |     else:
254 |         print('please use --video or --webcam or --image to define the input.')
255 |         return 
256 | 
257 |     if args.webcam or args.video:
258 |         if args.write:
259 |             save_path = 'output.avi'
260 |             fourcc = cv2.VideoWriter_fourcc(*'XVID')
261 |             out = cv2.VideoWriter(save_path,fourcc, 24.0, (int(vidcap.get(3)),int(vidcap.get(4))))
262 |         while True:
263 |             ret, image_bgr = vidcap.read()
264 |             if ret:
265 |                 last_time = time.time()
266 |                 image = image_bgr[:, :, [2, 1, 0]]
267 | 
268 |                 input = []
269 |                 img = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
270 |                 img_tensor = torch.from_numpy(img/255.).permute(2,0,1).float().to(CTX)
271 |                 input.append(img_tensor)
272 | 
273 |                 # object detection box
274 |                 pred_boxes = get_person_detection_boxes(box_model, input, threshold=0.9)
275 | 
276 |                 # pose estimation
277 |                 if len(pred_boxes) >= 1:
278 |                     for box in pred_boxes:
279 |                         center, scale = box_to_center_scale(box, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[1])
280 |                         image_pose = image.copy() if cfg.DATASET.COLOR_RGB else image_bgr.copy()
281 |                         pose_preds = get_pose_estimation_prediction(pose_model, image_pose, center, scale)
282 |                         if len(pose_preds)>=1:
283 |                             for kpt in pose_preds:
284 |                                 draw_pose(kpt,image_bgr) # draw the poses
285 | 
286 |                 if args.showFps:
287 |                     fps = 1/(time.time()-last_time)
288 |                     img = cv2.putText(image_bgr, 'fps: '+ "%.2f"%(fps), (25, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 2)
289 | 
290 |                 if args.write:
291 |                     out.write(image_bgr)
292 | 
293 |                 cv2.imshow('demo',image_bgr)
294 |                 if cv2.waitKey(1) & 0XFF==ord('q'):
295 |                     break
296 |             else:
297 |                 print('cannot load the video.')
298 |                 break
299 | 
300 |         cv2.destroyAllWindows()
301 |         vidcap.release()
302 |         if args.write:
303 |             print('video has been saved as {}'.format(save_path))
304 |             out.release()
305 | 
306 |     else:
307 |         # estimate on the image
308 |         last_time = time.time()
309 |         image = image_bgr[:, :, [2, 1, 0]]
310 | 
311 |         input = []
312 |         img = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
313 |         img_tensor = torch.from_numpy(img/255.).permute(2,0,1).float().to(CTX)
314 |         input.append(img_tensor)
315 | 
316 |         # object detection box
317 |         pred_boxes = get_person_detection_boxes(box_model, input, threshold=0.9)
318 | 
319 |         # pose estimation
320 |         if len(pred_boxes) >= 1:
321 |             for box in pred_boxes:
322 |                 center, scale = box_to_center_scale(box, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[1])
323 |                 image_pose = image.copy() if cfg.DATASET.COLOR_RGB else image_bgr.copy()
324 |                 pose_preds = get_pose_estimation_prediction(pose_model, image_pose, center, scale)
325 |                 if len(pose_preds)>=1:
326 |                     for kpt in pose_preds:
327 |                         draw_pose(kpt,image_bgr) # draw the poses
328 |         
329 |         if args.showFps:
330 |             fps = 1/(time.time()-last_time)
331 |             img = cv2.putText(image_bgr, 'fps: '+ "%.2f"%(fps), (25, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 2)
332 |         
333 |         if args.write:
334 |             save_path = 'output.jpg'
335 |             cv2.imwrite(save_path,image_bgr)
336 |             print('the result image has been saved as {}'.format(save_path))
337 | 
338 |         cv2.imshow('demo',image_bgr)
339 |         if cv2.waitKey(0) & 0XFF==ord('q'):
340 |             cv2.destroyAllWindows()
341 |         
342 | if __name__ == '__main__':
343 |     main()
344 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Deep High-Resolution Representation Learning for Human Pose Estimation (CVPR 2019)
  2 | ## News
  3 | - [2021/04/12] Welcome to check out our recent work on bottom-up pose estimation (CVPR 2021) [HRNet-DEKR](https://github.com/HRNet/DEKR)!
  4 | - [2020/07/05] [A very nice blog](https://towardsdatascience.com/overview-of-human-pose-estimation-neural-networks-hrnet-higherhrnet-architectures-and-faq-1954b2f8b249) from Towards Data Science introducing HRNet and HigherHRNet for human pose estimation.
  5 | - [2020/03/13] A longer version is accepted by TPAMI: [Deep High-Resolution Representation Learning for Visual Recognition](https://arxiv.org/pdf/1908.07919.pdf). It includes more HRNet applications, and the codes are available: [semantic segmentation](https://github.com/HRNet/HRNet-Semantic-Segmentation),  [objection detection](https://github.com/HRNet/HRNet-Object-Detection),  [facial landmark detection](https://github.com/HRNet/HRNet-Facial-Landmark-Detection), and [image classification](https://github.com/HRNet/HRNet-Image-Classification).
  6 | - [2020/02/01] We have added demo code for HRNet. Thanks [Alex Simes](https://github.com/alex9311). 
  7 | - Visualization code for showing the pose estimation results. Thanks Depu!
  8 | - [2019/08/27] HigherHRNet is now on [ArXiv](https://arxiv.org/abs/1908.10357), which is a bottom-up approach for human pose estimation powerd by HRNet. We will also release code and models at [Higher-HRNet-Human-Pose-Estimation](https://github.com/HRNet/Higher-HRNet-Human-Pose-Estimation), stay tuned!
  9 | - Our new work [High-Resolution Representations for Labeling Pixels and Regions](https://arxiv.org/abs/1904.04514) is available at [HRNet](https://github.com/HRNet). Our HRNet has been applied to a wide range of vision tasks, such as [image classification](https://github.com/HRNet/HRNet-Image-Classification), [objection detection](https://github.com/HRNet/HRNet-Object-Detection), [semantic segmentation](https://github.com/HRNet/HRNet-Semantic-Segmentation) and [facial landmark](https://github.com/HRNet/HRNet-Facial-Landmark-Detection).
 10 | 
 11 | ## Introduction
 12 | This is an official pytorch implementation of [*Deep High-Resolution Representation Learning for Human Pose Estimation*](https://arxiv.org/abs/1902.09212). 
 13 | In this work, we are interested in the human pose estimation problem with a focus on learning reliable high-resolution representations. Most existing methods **recover high-resolution representations from low-resolution representations** produced by a high-to-low resolution network. Instead, our proposed network **maintains high-resolution representations** through the whole process.
 14 | We start from a high-resolution subnetwork as the first stage, gradually add high-to-low resolution subnetworks one by one to form more stages, and connect the mutli-resolution subnetworks **in parallel**. We conduct **repeated multi-scale fusions** such that each of the high-to-low resolution representations receives information from other parallel representations over and over, leading to rich high-resolution representations. As a result, the predicted keypoint heatmap is potentially more accurate and spatially more precise. We empirically demonstrate the effectiveness of our network through the superior pose estimation results over two benchmark datasets: the COCO keypoint detection dataset and the MPII Human Pose dataset. </br>
 15 | 
 16 | ![Illustrating the architecture of the proposed HRNet](/figures/hrnet.png)
 17 | ## Main Results
 18 | ### Results on MPII val
 19 | | Arch               | Head | Shoulder | Elbow | Wrist |  Hip | Knee | Ankle | Mean | Mean@0.1 |
 20 | |--------------------|------|----------|-------|-------|------|------|-------|------|----------|
 21 | | pose_resnet_50     | 96.4 |     95.3 |  89.0 |  83.2 | 88.4 | 84.0 |  79.6 | 88.5 |     34.0 |
 22 | | pose_resnet_101    | 96.9 |     95.9 |  89.5 |  84.4 | 88.4 | 84.5 |  80.7 | 89.1 |     34.0 |
 23 | | pose_resnet_152    | 97.0 |     95.9 |  90.0 |  85.0 | 89.2 | 85.3 |  81.3 | 89.6 |     35.0 |
 24 | | **pose_hrnet_w32** | 97.1 |     95.9 |  90.3 |  86.4 | 89.1 | 87.1 |  83.3 | 90.3 |     37.7 |
 25 | 
 26 | ### Note:
 27 | - Flip test is used.
 28 | - Input size is 256x256
 29 | - pose_resnet_[50,101,152] is our previous work of [*Simple Baselines for Human Pose Estimation and Tracking*](http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html)
 30 | 
 31 | ### Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
 32 | | Arch               | Input size | #Params | GFLOPs |    AP | Ap .5 | AP .75 | AP (M) | AP (L) |    AR | AR .5 | AR .75 | AR (M) | AR (L) |
 33 | |--------------------|------------|---------|--------|-------|-------|--------|--------|--------|-------|-------|--------|--------|--------|
 34 | | pose_resnet_50     |    256x192 | 34.0M   |    8.9 | 0.704 | 0.886 |  0.783 |  0.671 |  0.772 | 0.763 | 0.929 |  0.834 |  0.721 |  0.824 |
 35 | | pose_resnet_50     |    384x288 | 34.0M   |   20.0 | 0.722 | 0.893 |  0.789 |  0.681 |  0.797 | 0.776 | 0.932 |  0.838 |  0.728 |  0.846 |
 36 | | pose_resnet_101    |    256x192 | 53.0M   |   12.4 | 0.714 | 0.893 |  0.793 |  0.681 |  0.781 | 0.771 | 0.934 |  0.840 |  0.730 |  0.832 |
 37 | | pose_resnet_101    |    384x288 | 53.0M   |   27.9 | 0.736 | 0.896 |  0.803 |  0.699 |  0.811 | 0.791 | 0.936 |  0.851 |  0.745 |  0.858 |
 38 | | pose_resnet_152    |    256x192 | 68.6M   |   15.7 | 0.720 | 0.893 |  0.798 |  0.687 |  0.789 | 0.778 | 0.934 |  0.846 |  0.736 |  0.839 |
 39 | | pose_resnet_152    |    384x288 | 68.6M   |   35.3 | 0.743 | 0.896 |  0.811 |  0.705 |  0.816 | 0.797 | 0.937 |  0.858 |  0.751 |  0.863 |
 40 | | **pose_hrnet_w32** |    256x192 | 28.5M   |    7.1 | 0.744 | 0.905 |  0.819 |  0.708 |  0.810 | 0.798 | 0.942 |  0.865 |  0.757 |  0.858 |
 41 | | **pose_hrnet_w32** |    384x288 | 28.5M   |   16.0 | 0.758 | 0.906 |  0.825 |  0.720 |  0.827 | 0.809 | 0.943 |  0.869 |  0.767 |  0.871 |
 42 | | **pose_hrnet_w48** |    256x192 | 63.6M   |   14.6 | 0.751 | 0.906 |  0.822 |  0.715 |  0.818 | 0.804 | 0.943 |  0.867 |  0.762 |  0.864 |
 43 | | **pose_hrnet_w48** |    384x288 | 63.6M   |   32.9 | 0.763 | 0.908 |  0.829 |  0.723 |  0.834 | 0.812 | 0.942 |  0.871 |  0.767 |  0.876 |
 44 | 
 45 | ### Note:
 46 | - Flip test is used.
 47 | - Person detector has person AP of 56.4 on COCO val2017 dataset.
 48 | - pose_resnet_[50,101,152] is our previous work of [*Simple Baselines for Human Pose Estimation and Tracking*](http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html).
 49 | - GFLOPs is for convolution and linear layers only.
 50 | 
 51 | 
 52 | ### Results on COCO test-dev2017 with detector having human AP of 60.9 on COCO test-dev2017 dataset
 53 | | Arch               | Input size | #Params | GFLOPs |    AP | Ap .5 | AP .75 | AP (M) | AP (L) |    AR | AR .5 | AR .75 | AR (M) | AR (L) |
 54 | |--------------------|------------|---------|--------|-------|-------|--------|--------|--------|-------|-------|--------|--------|--------|
 55 | | pose_resnet_152    |    384x288 | 68.6M   |   35.3 | 0.737 | 0.919 |  0.828 |  0.713 |  0.800 | 0.790 | 0.952 |  0.856 |  0.748 |  0.849 |
 56 | | **pose_hrnet_w48** |    384x288 | 63.6M   |   32.9 | 0.755 | 0.925 |  0.833 |  0.719 |  0.815 | 0.805 | 0.957 |  0.874 |  0.763 |  0.863 |
 57 | | **pose_hrnet_w48\*** |    384x288 | 63.6M   |   32.9 | 0.770 | 0.927 |  0.845 |  0.734 |  0.831 | 0.820 | 0.960 |  0.886 |  0.778 |  0.877 |
 58 | 
 59 | ### Note:
 60 | - Flip test is used.
 61 | - Person detector has person AP of 60.9 on COCO test-dev2017 dataset.
 62 | - pose_resnet_152 is our previous work of [*Simple Baselines for Human Pose Estimation and Tracking*](http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html).
 63 | - GFLOPs is for convolution and linear layers only.
 64 | - pose_hrnet_w48\* means using additional data from [AI challenger](https://challenger.ai/dataset/keypoint) for training.
 65 | 
 66 | ## Environment
 67 | The code is developed using python 3.6 on Ubuntu 16.04. NVIDIA GPUs are needed. The code is developed and tested using 4 NVIDIA P100 GPU cards. Other platforms or GPU cards are not fully tested.
 68 | 
 69 | ## Quick start
 70 | ### Installation
 71 | 1. Install pytorch >= v1.0.0 following [official instruction](https://pytorch.org/).
 72 |    **Note that if you use pytorch's version < v1.0.0, you should following the instruction at <https://github.com/Microsoft/human-pose-estimation.pytorch> to disable cudnn's implementations of BatchNorm layer. We encourage you to use higher pytorch's version(>=v1.0.0)**
 73 | 2. Clone this repo, and we'll call the directory that you cloned as ${POSE_ROOT}.
 74 | 3. Install dependencies:
 75 |    ```
 76 |    pip install -r requirements.txt
 77 |    ```
 78 | 4. Make libs:
 79 |    ```
 80 |    cd ${POSE_ROOT}/lib
 81 |    make
 82 |    ```
 83 | 5. Install [COCOAPI](https://github.com/cocodataset/cocoapi):
 84 |    ```
 85 |    # COCOAPI=/path/to/clone/cocoapi
 86 |    git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
 87 |    cd $COCOAPI/PythonAPI
 88 |    # Install into global site-packages
 89 |    make install
 90 |    # Alternatively, if you do not have permissions or prefer
 91 |    # not to install the COCO API into global site-packages
 92 |    python3 setup.py install --user
 93 |    ```
 94 |    Note that instructions like # COCOAPI=/path/to/install/cocoapi indicate that you should pick a path where you'd like to have the software cloned and then set an environment variable (COCOAPI in this case) accordingly.
 95 | 4. Init output(training model output directory) and log(tensorboard log directory) directory:
 96 | 
 97 |    ```
 98 |    mkdir output 
 99 |    mkdir log
100 |    ```
101 | 
102 |    Your directory tree should look like this:
103 | 
104 |    ```
105 |    ${POSE_ROOT}
106 |    ├── data
107 |    ├── experiments
108 |    ├── lib
109 |    ├── log
110 |    ├── models
111 |    ├── output
112 |    ├── tools 
113 |    ├── README.md
114 |    └── requirements.txt
115 |    ```
116 | 
117 | 6. Download pretrained models from our model zoo([GoogleDrive](https://drive.google.com/drive/folders/1hOTihvbyIxsm5ygDpbUuJ7O_tzv4oXjC?usp=sharing) or [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blW231MH2krnmLq5kkQ))
118 |    ```
119 |    ${POSE_ROOT}
120 |     `-- models
121 |         `-- pytorch
122 |             |-- imagenet
123 |             |   |-- hrnet_w32-36af842e.pth
124 |             |   |-- hrnet_w48-8ef0771d.pth
125 |             |   |-- resnet50-19c8e357.pth
126 |             |   |-- resnet101-5d3b4d8f.pth
127 |             |   `-- resnet152-b121ed2d.pth
128 |             |-- pose_coco
129 |             |   |-- pose_hrnet_w32_256x192.pth
130 |             |   |-- pose_hrnet_w32_384x288.pth
131 |             |   |-- pose_hrnet_w48_256x192.pth
132 |             |   |-- pose_hrnet_w48_384x288.pth
133 |             |   |-- pose_resnet_101_256x192.pth
134 |             |   |-- pose_resnet_101_384x288.pth
135 |             |   |-- pose_resnet_152_256x192.pth
136 |             |   |-- pose_resnet_152_384x288.pth
137 |             |   |-- pose_resnet_50_256x192.pth
138 |             |   `-- pose_resnet_50_384x288.pth
139 |             `-- pose_mpii
140 |                 |-- pose_hrnet_w32_256x256.pth
141 |                 |-- pose_hrnet_w48_256x256.pth
142 |                 |-- pose_resnet_101_256x256.pth
143 |                 |-- pose_resnet_152_256x256.pth
144 |                 `-- pose_resnet_50_256x256.pth
145 | 
146 |    ```
147 |    
148 | ### Data preparation
149 | **For MPII data**, please download from [MPII Human Pose Dataset](http://human-pose.mpi-inf.mpg.de/). The original annotation files are in matlab format. We have converted them into json format, you also need to download them from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blW00SqrairNetmeVu4) or [GoogleDrive](https://drive.google.com/drive/folders/1En_VqmStnsXMdldXA6qpqEyDQulnmS3a?usp=sharing).
150 | Extract them under {POSE_ROOT}/data, and make them look like this:
151 | ```
152 | ${POSE_ROOT}
153 | |-- data
154 | `-- |-- mpii
155 |     `-- |-- annot
156 |         |   |-- gt_valid.mat
157 |         |   |-- test.json
158 |         |   |-- train.json
159 |         |   |-- trainval.json
160 |         |   `-- valid.json
161 |         `-- images
162 |             |-- 000001163.jpg
163 |             |-- 000003072.jpg
164 | ```
165 | 
166 | **For COCO data**, please download from [COCO download](http://cocodataset.org/#download), 2017 Train/Val is needed for COCO keypoints training and validation. We also provide person detection result of COCO val2017 and test-dev2017 to reproduce our multi-person pose estimation results. Please download from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blWzzDXoz5BeFl8sWM-) or [GoogleDrive](https://drive.google.com/drive/folders/1fRUDNUDxe9fjqcRZ2bnF_TKMlO0nB_dk?usp=sharing).
167 | Download and extract them under {POSE_ROOT}/data, and make them look like this:
168 | ```
169 | ${POSE_ROOT}
170 | |-- data
171 | `-- |-- coco
172 |     `-- |-- annotations
173 |         |   |-- person_keypoints_train2017.json
174 |         |   `-- person_keypoints_val2017.json
175 |         |-- person_detection_results
176 |         |   |-- COCO_val2017_detections_AP_H_56_person.json
177 |         |   |-- COCO_test-dev2017_detections_AP_H_609_person.json
178 |         `-- images
179 |             |-- train2017
180 |             |   |-- 000000000009.jpg
181 |             |   |-- 000000000025.jpg
182 |             |   |-- 000000000030.jpg
183 |             |   |-- ... 
184 |             `-- val2017
185 |                 |-- 000000000139.jpg
186 |                 |-- 000000000285.jpg
187 |                 |-- 000000000632.jpg
188 |                 |-- ... 
189 | ```
190 | 
191 | ### Training and Testing
192 | 
193 | #### Testing on MPII dataset using model zoo's models([GoogleDrive](https://drive.google.com/drive/folders/1hOTihvbyIxsm5ygDpbUuJ7O_tzv4oXjC?usp=sharing) or [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blW231MH2krnmLq5kkQ))
194 |  
195 | 
196 | ```
197 | python tools/test.py \
198 |     --cfg experiments/mpii/hrnet/w32_256x256_adam_lr1e-3.yaml \
199 |     TEST.MODEL_FILE models/pytorch/pose_mpii/pose_hrnet_w32_256x256.pth
200 | ```
201 | 
202 | #### Training on MPII dataset
203 | 
204 | ```
205 | python tools/train.py \
206 |     --cfg experiments/mpii/hrnet/w32_256x256_adam_lr1e-3.yaml
207 | ```
208 | 
209 | #### Testing on COCO val2017 dataset using model zoo's models([GoogleDrive](https://drive.google.com/drive/folders/1hOTihvbyIxsm5ygDpbUuJ7O_tzv4oXjC?usp=sharing) or [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blW231MH2krnmLq5kkQ))
210 |  
211 | 
212 | ```
213 | python tools/test.py \
214 |     --cfg experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml \
215 |     TEST.MODEL_FILE models/pytorch/pose_coco/pose_hrnet_w32_256x192.pth \
216 |     TEST.USE_GT_BBOX False
217 | ```
218 | 
219 | #### Training on COCO train2017 dataset
220 | 
221 | ```
222 | python tools/train.py \
223 |     --cfg experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml \
224 | ```
225 | 
226 | ### Visualization
227 | 
228 | #### Visualizing predictions on COCO val
229 | 
230 | ```
231 | python visualization/plot_coco.py \
232 |     --prediction output/coco/w48_384x288_adam_lr1e-3/results/keypoints_val2017_results_0.json \
233 |     --save-path visualization/results
234 | 
235 | ```
236 | 
237 | 
238 | <img src="figures\visualization\coco\score_610_id_2685_000000002685.png" height="215"><img src="figures\visualization\coco\score_710_id_153229_000000153229.png" height="215"><img src="figures\visualization\coco\score_755_id_343561_000000343561.png" height="215">
239 | 
240 | <img src="figures\visualization\coco\score_755_id_559842_000000559842.png" height="209"><img src="figures\visualization\coco\score_770_id_6954_000000006954.png" height="209"><img src="figures\visualization\coco\score_919_id_53626_000000053626.png" height="209">
241 | 
242 | ### Other applications
243 | Many other dense prediction tasks, such as segmentation, face alignment and object detection, etc. have been benefited by HRNet. More information can be found at [High-Resolution Networks](https://github.com/HRNet).
244 | 
245 | ### Other implementation
246 | [mmpose](https://github.com/open-mmlab/mmpose) </br>
247 | [ModelScope (中文）](https://modelscope.cn/models/damo/cv_hrnetv2w32_body-2d-keypoints_image/summary)</br>
248 | [timm](https://huggingface.co/docs/timm/main/en/models/hrnet)
249 | 
250 | 
251 | ### Citation
252 | If you use our code or models in your research, please cite with:
253 | ```
254 | @inproceedings{sun2019deep,
255 |   title={Deep High-Resolution Representation Learning for Human Pose Estimation},
256 |   author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
257 |   booktitle={CVPR},
258 |   year={2019}
259 | }
260 | 
261 | @inproceedings{xiao2018simple,
262 |     author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
263 |     title={Simple Baselines for Human Pose Estimation and Tracking},
264 |     booktitle = {European Conference on Computer Vision (ECCV)},
265 |     year = {2018}
266 | }
267 | 
268 | @article{WangSCJDZLMTWLX19,
269 |   title={Deep High-Resolution Representation Learning for Visual Recognition},
270 |   author={Jingdong Wang and Ke Sun and Tianheng Cheng and 
271 |           Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and 
272 |           Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
273 |   journal   = {TPAMI}
274 |   year={2019}
275 | }
276 | ```
277 | 


--------------------------------------------------------------------------------