├── Example Configs
├── H-1.yaml
├── H-2.yaml
├── H-3.yaml
├── H-4.yaml
└── H0.yaml
├── LICENSE
├── README.md
├── experiments
├── coco
│ └── higher_hrnet
│ │ ├── frozen.yaml
│ │ └── unfrozen.yaml
└── crowd_pose
│ └── higher_hrnet
│ ├── w32_512_adam_lr1e-3.yaml
│ ├── w32_512_adam_lr1e-3_coco.yaml
│ ├── w32_512_adam_lr1e-3_syncbn.yaml
│ ├── w32_640_adam_lr1e-3.yaml
│ └── w48_640_adam_lr1e-3.yaml
├── lib
├── config
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── default.cpython-36.pyc
│ │ └── models.cpython-36.pyc
│ ├── default.py
│ └── models.py
├── core
│ ├── __pycache__
│ │ ├── group.cpython-36.pyc
│ │ ├── inference.cpython-36.pyc
│ │ ├── loss.cpython-36.pyc
│ │ └── trainer.cpython-36.pyc
│ ├── group.py
│ ├── inference.py
│ ├── loss.py
│ └── trainer.py
├── dataset
│ ├── COCODataset.py
│ ├── COCOKeypoints.py
│ ├── CrowdPoseDataset.py
│ ├── CrowdPoseKeypoints.py
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── COCODataset.cpython-36.pyc
│ │ ├── COCOKeypoints.cpython-36.pyc
│ │ ├── CrowdPoseDataset.cpython-36.pyc
│ │ ├── CrowdPoseKeypoints.cpython-36.pyc
│ │ ├── __init__.cpython-36.pyc
│ │ └── build.cpython-36.pyc
│ ├── build.py
│ ├── target_generators
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-36.pyc
│ │ │ └── target_generators.cpython-36.pyc
│ │ └── target_generators.py
│ └── transforms
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── build.cpython-36.pyc
│ │ └── transforms.cpython-36.pyc
│ │ ├── build.py
│ │ └── transforms.py
├── fp16_utils
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── fp16_optimizer.cpython-36.pyc
│ │ ├── fp16util.cpython-36.pyc
│ │ └── loss_scaler.cpython-36.pyc
│ ├── fp16_optimizer.py
│ ├── fp16util.py
│ └── loss_scaler.py
├── models
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── efficientnet_blocks.cpython-36.pyc
│ │ ├── pose_efficientnet.cpython-36.pyc
│ │ ├── pose_efficientnet_all.cpython-36.pyc
│ │ └── pose_higher_hrnet.cpython-36.pyc
│ ├── efficientnet_blocks.py
│ └── pose_higher_hrnet.py
└── utils
│ ├── __pycache__
│ ├── transforms.cpython-36.pyc
│ ├── utils.cpython-36.pyc
│ ├── vis.cpython-36.pyc
│ └── zipreader.cpython-36.pyc
│ ├── transforms.py
│ ├── utils.py
│ ├── vis.py
│ └── zipreader.py
├── requirements.txt
└── tools
├── __pycache__
└── _init_paths.cpython-36.pyc
├── _init_paths.py
├── crowdpose_concat_train_val.py
├── dist_train.py
└── valid.py
/Example Configs/H-1.yaml:
--------------------------------------------------------------------------------
1 | AUTO_RESUME: True
2 | DATA_DIR: ''
3 | GPUS: (0,)
4 | LOG_DIR: log
5 | OUTPUT_DIR: output_H-1
6 | PRINT_FREQ: 100
7 | CUDNN:
8 | BENCHMARK: True
9 | DETERMINISTIC: True
10 | ENABLED: True
11 | DATASET:
12 | SIGMA: 2
13 | DATASET: coco_kpt
14 | DATASET_TEST: coco
15 | DATA_FORMAT: jpg
16 | FLIP: 0.5
17 | INPUT_SIZE: 480
18 | OUTPUT_SIZE: [120, 240]
19 | MAX_NUM_PEOPLE: 30
20 | MAX_ROTATION: 30
21 | MAX_SCALE: 1.5
22 | SCALE_TYPE: 'short'
23 | MAX_TRANSLATE: 40
24 | MIN_SCALE: 0.75
25 | NUM_JOINTS: 17
26 | ROOT: 'data/coco' #Dataset Root Folder
27 | TEST: val2017
28 | TRAIN: train2017
29 | DEBUG:
30 | DEBUG: True
31 | SAVE_BATCH_IMAGES_GT: False
32 | SAVE_BATCH_IMAGES_PRED: False
33 | SAVE_HEATMAPS_GT: True
34 | SAVE_HEATMAPS_PRED: True
35 | SAVE_TAGMAPS_PRED: True
36 | LOSS:
37 | NUM_STAGES: 2
38 | AE_LOSS_TYPE: exp
39 | WITH_AE_LOSS: [True, False]
40 | PUSH_LOSS_FACTOR: [0.001, 0.001]
41 | PULL_LOSS_FACTOR: [0.001, 0.001]
42 | WITH_HEATMAPS_LOSS: [True, True]
43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
44 | MODEL:
45 | EXTRA:
46 | FINAL_CONV_KERNEL: 1
47 | PRETRAINED_LAYERS: ['*']
48 | STEM_INPLANES: 64
49 | STAGE2:
50 | NUM_MODULES: 1
51 | NUM_BRANCHES: 2
52 | BLOCK: BASIC
53 | NUM_BLOCKS:
54 | - 2
55 | - 2
56 | NUM_CHANNELS:
57 | - 32
58 | - 64
59 | FUSE_METHOD: SUM
60 | STAGE3:
61 | NUM_MODULES: 3
62 | NUM_BRANCHES: 3
63 | BLOCK: BASIC
64 | NUM_BLOCKS:
65 | - 2
66 | - 2
67 | - 2
68 | NUM_CHANNELS:
69 | - 32
70 | - 64
71 | - 128
72 | FUSE_METHOD: SUM
73 | STAGE4:
74 | NUM_MODULES: 3
75 | NUM_BRANCHES: 4
76 | BLOCK: BASIC
77 | NUM_BLOCKS:
78 | - 2
79 | - 2
80 | - 2
81 | - 2
82 | NUM_CHANNELS:
83 | - 32
84 | - 64
85 | - 128
86 | - 256
87 | FUSE_METHOD: SUM
88 | DECONV:
89 | NUM_DECONVS: 1
90 | NUM_CHANNELS:
91 | - 32
92 | KERNEL_SIZE:
93 | - 4
94 | NUM_BASIC_BLOCKS: 2
95 | CAT_OUTPUT:
96 | - True
97 | INIT_WEIGHTS: True
98 | SCALE_FACTOR: -1
99 | WIDTH_MULT: 0.909
100 | DEPTH_MULT: 0.833
101 | NAME: pose_higher_hrnet
102 | NUM_JOINTS: 17
103 | PRETRAINED: 'example_path/efficientnet-b0-4cfa50.pth' #Path to pretrained backbone model
104 | TAG_PER_JOINT: True
105 | TEST:
106 | FLIP_TEST: True
107 | IMAGES_PER_GPU: 1
108 | MODEL_FILE: ''
109 | SCALE_FACTOR: [1]
110 | DETECTION_THRESHOLD: 0.1
111 | WITH_HEATMAPS: (True, True)
112 | WITH_AE: (True, False)
113 | PROJECT2IMAGE: True
114 | NMS_KERNEL: 5
115 | NMS_PADDING: 2
116 | TRAIN:
117 | BEGIN_EPOCH: 0
118 | CHECKPOINT: ''
119 | END_EPOCH: 300
120 | GAMMA1: 0.99
121 | GAMMA2: 0.0
122 | IMAGES_PER_GPU: 6
123 | LR: 0.001
124 | LR_FACTOR: 0.1
125 | LR_STEP: [200, 260]
126 | MOMENTUM: 0.9
127 | NESTEROV: False
128 | OPTIMIZER: adam
129 | RESUME: False
130 | SHUFFLE: True
131 | WD: 0.0001
132 | WORKERS: 8
133 |
134 |
--------------------------------------------------------------------------------
/Example Configs/H-2.yaml:
--------------------------------------------------------------------------------
1 | AUTO_RESUME: True
2 | DATA_DIR: ''
3 | GPUS: (0,)
4 | LOG_DIR: log
5 | OUTPUT_DIR: output_H-2
6 | PRINT_FREQ: 100
7 | CUDNN:
8 | BENCHMARK: True
9 | DETERMINISTIC: True
10 | ENABLED: True
11 | DATASET:
12 | SIGMA: 2
13 | DATASET: coco_kpt
14 | DATASET_TEST: coco
15 | DATA_FORMAT: jpg
16 | FLIP: 0.5
17 | INPUT_SIZE: 448
18 | OUTPUT_SIZE: [112, 224]
19 | MAX_NUM_PEOPLE: 30
20 | MAX_ROTATION: 30
21 | MAX_SCALE: 1.5
22 | SCALE_TYPE: 'short'
23 | MAX_TRANSLATE: 40
24 | MIN_SCALE: 0.75
25 | NUM_JOINTS: 17
26 | ROOT: 'data/coco' #Dataset Root Folder
27 | TEST: val2017
28 | TRAIN: train2017
29 | DEBUG:
30 | DEBUG: True
31 | SAVE_BATCH_IMAGES_GT: False
32 | SAVE_BATCH_IMAGES_PRED: False
33 | SAVE_HEATMAPS_GT: True
34 | SAVE_HEATMAPS_PRED: True
35 | SAVE_TAGMAPS_PRED: True
36 | LOSS:
37 | NUM_STAGES: 2
38 | AE_LOSS_TYPE: exp
39 | WITH_AE_LOSS: [True, False]
40 | PUSH_LOSS_FACTOR: [0.001, 0.001]
41 | PULL_LOSS_FACTOR: [0.001, 0.001]
42 | WITH_HEATMAPS_LOSS: [True, True]
43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
44 | MODEL:
45 | EXTRA:
46 | FINAL_CONV_KERNEL: 1
47 | PRETRAINED_LAYERS: ['*']
48 | STEM_INPLANES: 64
49 | STAGE2:
50 | NUM_MODULES: 1
51 | NUM_BRANCHES: 2
52 | BLOCK: BASIC
53 | NUM_BLOCKS:
54 | - 2
55 | - 2
56 | NUM_CHANNELS:
57 | - 32
58 | - 64
59 | FUSE_METHOD: SUM
60 | STAGE3:
61 | NUM_MODULES: 2
62 | NUM_BRANCHES: 3
63 | BLOCK: BASIC
64 | NUM_BLOCKS:
65 | - 2
66 | - 2
67 | - 2
68 | NUM_CHANNELS:
69 | - 32
70 | - 64
71 | - 128
72 | FUSE_METHOD: SUM
73 | STAGE4:
74 | NUM_MODULES: 3
75 | NUM_BRANCHES: 4
76 | BLOCK: BASIC
77 | NUM_BLOCKS:
78 | - 2
79 | - 2
80 | - 2
81 | - 2
82 | NUM_CHANNELS:
83 | - 32
84 | - 64
85 | - 128
86 | - 256
87 | FUSE_METHOD: SUM
88 | DECONV:
89 | NUM_DECONVS: 1
90 | NUM_CHANNELS:
91 | - 32
92 | KERNEL_SIZE:
93 | - 4
94 | NUM_BASIC_BLOCKS: 2
95 | CAT_OUTPUT:
96 | - True
97 | INIT_WEIGHTS: True
98 | SCALE_FACTOR: -2
99 | WIDTH_MULT: 0.826
100 | DEPTH_MULT: 0.694
101 | NAME: pose_higher_hrnet
102 | NUM_JOINTS: 17
103 | PRETRAINED: 'example_path/efficientnet-b0-4cfa50.pth' #Path to pretrained backbone model
104 | TAG_PER_JOINT: True
105 | TEST:
106 | FLIP_TEST: True
107 | IMAGES_PER_GPU: 1
108 | MODEL_FILE: ''
109 | SCALE_FACTOR: [1]
110 | DETECTION_THRESHOLD: 0.1
111 | WITH_HEATMAPS: (True, True)
112 | WITH_AE: (True, False)
113 | PROJECT2IMAGE: True
114 | NMS_KERNEL: 5
115 | NMS_PADDING: 2
116 | TRAIN:
117 | BEGIN_EPOCH: 0
118 | CHECKPOINT: ''
119 | END_EPOCH: 300
120 | GAMMA1: 0.99
121 | GAMMA2: 0.0
122 | IMAGES_PER_GPU: 8
123 | LR: 0.001
124 | LR_FACTOR: 0.1
125 | LR_STEP: [200, 260]
126 | MOMENTUM: 0.9
127 | NESTEROV: False
128 | OPTIMIZER: adam
129 | RESUME: False
130 | SHUFFLE: True
131 | WD: 0.0001
132 | WORKERS: 8
133 |
134 |
--------------------------------------------------------------------------------
/Example Configs/H-3.yaml:
--------------------------------------------------------------------------------
1 | AUTO_RESUME: True
2 | DATA_DIR: ''
3 | GPUS: (0,)
4 | LOG_DIR: log
5 | OUTPUT_DIR: output_H-2
6 | PRINT_FREQ: 100
7 | CUDNN:
8 | BENCHMARK: True
9 | DETERMINISTIC: True
10 | ENABLED: True
11 | DATASET:
12 | SIGMA: 2
13 | DATASET: coco_kpt
14 | DATASET_TEST: coco
15 | DATA_FORMAT: jpg
16 | FLIP: 0.5
17 | INPUT_SIZE: 416
18 | OUTPUT_SIZE: [104, 208]
19 | MAX_NUM_PEOPLE: 30
20 | MAX_ROTATION: 30
21 | MAX_SCALE: 1.5
22 | SCALE_TYPE: 'short'
23 | MAX_TRANSLATE: 40
24 | MIN_SCALE: 0.75
25 | NUM_JOINTS: 17
26 | ROOT: 'data/coco' #Dataset Root Folder
27 | TEST: val2017
28 | TRAIN: train2017
29 | DEBUG:
30 | DEBUG: True
31 | SAVE_BATCH_IMAGES_GT: False
32 | SAVE_BATCH_IMAGES_PRED: False
33 | SAVE_HEATMAPS_GT: True
34 | SAVE_HEATMAPS_PRED: True
35 | SAVE_TAGMAPS_PRED: True
36 | LOSS:
37 | NUM_STAGES: 2
38 | AE_LOSS_TYPE: exp
39 | WITH_AE_LOSS: [True, False]
40 | PUSH_LOSS_FACTOR: [0.001, 0.001]
41 | PULL_LOSS_FACTOR: [0.001, 0.001]
42 | WITH_HEATMAPS_LOSS: [True, True]
43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
44 | MODEL:
45 | EXTRA:
46 | FINAL_CONV_KERNEL: 1
47 | PRETRAINED_LAYERS: ['*']
48 | STEM_INPLANES: 64
49 | STAGE2:
50 | NUM_MODULES: 1
51 | NUM_BRANCHES: 2
52 | BLOCK: BASIC
53 | NUM_BLOCKS:
54 | - 2
55 | - 2
56 | NUM_CHANNELS:
57 | - 32
58 | - 64
59 | FUSE_METHOD: SUM
60 | STAGE3:
61 | NUM_MODULES: 1
62 | NUM_BRANCHES: 3
63 | BLOCK: BASIC
64 | NUM_BLOCKS:
65 | - 2
66 | - 2
67 | - 2
68 | NUM_CHANNELS:
69 | - 32
70 | - 64
71 | - 128
72 | FUSE_METHOD: SUM
73 | STAGE4:
74 | NUM_MODULES: 3
75 | NUM_BRANCHES: 4
76 | BLOCK: BASIC
77 | NUM_BLOCKS:
78 | - 2
79 | - 2
80 | - 2
81 | - 2
82 | NUM_CHANNELS:
83 | - 32
84 | - 64
85 | - 128
86 | - 256
87 | FUSE_METHOD: SUM
88 | DECONV:
89 | NUM_DECONVS: 1
90 | NUM_CHANNELS:
91 | - 32
92 | KERNEL_SIZE:
93 | - 4
94 | NUM_BASIC_BLOCKS: 2
95 | CAT_OUTPUT:
96 | - True
97 | INIT_WEIGHTS: True
98 | SCALE_FACTOR: -3
99 | WIDTH_MULT: 0.751
100 | DEPTH_MULT: 0.578
101 | NAME: pose_higher_hrnet
102 | NUM_JOINTS: 17
103 | PRETRAINED: 'example_path/efficientnet-b0-4cfa50.pth' #Path to pretrained backbone model
104 | TAG_PER_JOINT: True
105 | TEST:
106 | FLIP_TEST: True
107 | IMAGES_PER_GPU: 1
108 | MODEL_FILE: ''
109 | SCALE_FACTOR: [1]
110 | DETECTION_THRESHOLD: 0.1
111 | WITH_HEATMAPS: (True, True)
112 | WITH_AE: (True, False)
113 | PROJECT2IMAGE: True
114 | NMS_KERNEL: 5
115 | NMS_PADDING: 2
116 | TRAIN:
117 | BEGIN_EPOCH: 0
118 | CHECKPOINT: ''
119 | END_EPOCH: 300
120 | GAMMA1: 0.99
121 | GAMMA2: 0.0
122 | IMAGES_PER_GPU: 10
123 | LR: 0.001
124 | LR_FACTOR: 0.1
125 | LR_STEP: [200, 260]
126 | MOMENTUM: 0.9
127 | NESTEROV: False
128 | OPTIMIZER: adam
129 | RESUME: False
130 | SHUFFLE: True
131 | WD: 0.0001
132 | WORKERS: 8
133 |
134 |
--------------------------------------------------------------------------------
/Example Configs/H-4.yaml:
--------------------------------------------------------------------------------
1 | AUTO_RESUME: True
2 | DATA_DIR: ''
3 | GPUS: (0,)
4 | LOG_DIR: log
5 | OUTPUT_DIR: output_H-4
6 | PRINT_FREQ: 100
7 | CUDNN:
8 | BENCHMARK: True
9 | DETERMINISTIC: True
10 | ENABLED: True
11 | DATASET:
12 | SIGMA: 2
13 | DATASET: coco_kpt
14 | DATASET_TEST: coco
15 | DATA_FORMAT: jpg
16 | FLIP: 0.5
17 | INPUT_SIZE: 384
18 | OUTPUT_SIZE: [96, 192]
19 | MAX_NUM_PEOPLE: 30
20 | MAX_ROTATION: 30
21 | MAX_SCALE: 1.5
22 | SCALE_TYPE: 'short'
23 | MAX_TRANSLATE: 40
24 | MIN_SCALE: 0.75
25 | NUM_JOINTS: 17
26 | ROOT: 'data/coco' #Dataset Root Folder
27 | TEST: val2017
28 | TRAIN: train2017
29 | DEBUG:
30 | DEBUG: True
31 | SAVE_BATCH_IMAGES_GT: False
32 | SAVE_BATCH_IMAGES_PRED: False
33 | SAVE_HEATMAPS_GT: True
34 | SAVE_HEATMAPS_PRED: True
35 | SAVE_TAGMAPS_PRED: True
36 | LOSS:
37 | NUM_STAGES: 2
38 | AE_LOSS_TYPE: exp
39 | WITH_AE_LOSS: [True, False]
40 | PUSH_LOSS_FACTOR: [0.001, 0.001]
41 | PULL_LOSS_FACTOR: [0.001, 0.001]
42 | WITH_HEATMAPS_LOSS: [True, True]
43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
44 | MODEL:
45 | EXTRA:
46 | FINAL_CONV_KERNEL: 1
47 | PRETRAINED_LAYERS: ['*']
48 | STEM_INPLANES: 64
49 | STAGE2:
50 | NUM_MODULES: 1
51 | NUM_BRANCHES: 2
52 | BLOCK: BASIC
53 | NUM_BLOCKS:
54 | - 2
55 | - 2
56 | NUM_CHANNELS:
57 | - 32
58 | - 64
59 | FUSE_METHOD: SUM
60 | STAGE3:
61 | NUM_MODULES: 1
62 | NUM_BRANCHES: 3
63 | BLOCK: BASIC
64 | NUM_BLOCKS:
65 | - 2
66 | - 2
67 | - 2
68 | NUM_CHANNELS:
69 | - 32
70 | - 64
71 | - 128
72 | FUSE_METHOD: SUM
73 | STAGE4:
74 | NUM_MODULES: 2
75 | NUM_BRANCHES: 4
76 | BLOCK: BASIC
77 | NUM_BLOCKS:
78 | - 2
79 | - 2
80 | - 2
81 | - 2
82 | NUM_CHANNELS:
83 | - 32
84 | - 64
85 | - 128
86 | - 256
87 | FUSE_METHOD: SUM
88 | DECONV:
89 | NUM_DECONVS: 1
90 | NUM_CHANNELS:
91 | - 32
92 | KERNEL_SIZE:
93 | - 4
94 | NUM_BASIC_BLOCKS: 2
95 | CAT_OUTPUT:
96 | - True
97 | INIT_WEIGHTS: True
98 | SCALE_FACTOR: -4
99 | WIDTH_MULT: 0.684
100 | DEPTH_MULT: 0.483
101 | NAME: pose_higher_hrnet
102 | NUM_JOINTS: 17
103 | PRETRAINED: 'example_path/efficientnet-b0-4cfa50.pth' #Path to pretrained backbone model
104 | TAG_PER_JOINT: True
105 | TEST:
106 | FLIP_TEST: True
107 | IMAGES_PER_GPU: 1
108 | MODEL_FILE: ''
109 | SCALE_FACTOR: [1]
110 | DETECTION_THRESHOLD: 0.1
111 | WITH_HEATMAPS: (True, True)
112 | WITH_AE: (True, False)
113 | PROJECT2IMAGE: True
114 | NMS_KERNEL: 5
115 | NMS_PADDING: 2
116 | TRAIN:
117 | BEGIN_EPOCH: 0
118 | CHECKPOINT: ''
119 | END_EPOCH: 300
120 | GAMMA1: 0.99
121 | GAMMA2: 0.0
122 | IMAGES_PER_GPU: 48
123 | LR: 0.001
124 | LR_FACTOR: 0.1
125 | LR_STEP: [200, 260]
126 | MOMENTUM: 0.9
127 | NESTEROV: False
128 | OPTIMIZER: adam
129 | RESUME: False
130 | SHUFFLE: True
131 | WD: 0.0001
132 | WORKERS: 8
133 |
134 |
--------------------------------------------------------------------------------
/Example Configs/H0.yaml:
--------------------------------------------------------------------------------
1 | AUTO_RESUME: False
2 | DATA_DIR: ''
3 | GPUS: (0,)
4 | LOG_DIR: log
5 | OUTPUT_DIR: output_H0
6 | PRINT_FREQ: 100
7 | CUDNN:
8 | BENCHMARK: True
9 | DETERMINISTIC: False
10 | ENABLED: True
11 | DATASET:
12 | SIGMA: 2
13 | DATASET: coco_kpt
14 | DATASET_TEST: coco
15 | DATA_FORMAT: jpg
16 | FLIP: 0.5
17 | INPUT_SIZE: 512
18 | OUTPUT_SIZE: [128, 256]
19 | MAX_NUM_PEOPLE: 30
20 | MAX_ROTATION: 30
21 | MAX_SCALE: 1.5
22 | SCALE_TYPE: 'short'
23 | MAX_TRANSLATE: 40
24 | MIN_SCALE: 0.75
25 | NUM_JOINTS: 17
26 | ROOT: 'data/coco' #Dataset Root Folder
27 | TEST: val2017
28 | TRAIN: train2017
29 | DEBUG:
30 | DEBUG: True
31 | SAVE_BATCH_IMAGES_GT: False
32 | SAVE_BATCH_IMAGES_PRED: False
33 | SAVE_HEATMAPS_GT: True
34 | SAVE_HEATMAPS_PRED: True
35 | SAVE_TAGMAPS_PRED: True
36 | LOSS:
37 | NUM_STAGES: 2
38 | AE_LOSS_TYPE: exp
39 | WITH_AE_LOSS: [True, False]
40 | PUSH_LOSS_FACTOR: [0.001, 0.001]
41 | PULL_LOSS_FACTOR: [0.001, 0.001]
42 | WITH_HEATMAPS_LOSS: [True, True]
43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
44 | MODEL:
45 | EXTRA:
46 | FINAL_CONV_KERNEL: 1
47 | PRETRAINED_LAYERS: ['*']
48 | STEM_INPLANES: 64
49 | STAGE2:
50 | NUM_MODULES: 1
51 | NUM_BRANCHES: 2
52 | BLOCK: BASIC
53 | NUM_BLOCKS:
54 | - 2
55 | - 2
56 | NUM_CHANNELS:
57 | - 32
58 | - 64
59 | FUSE_METHOD: SUM
60 | STAGE3:
61 | NUM_MODULES: 4
62 | NUM_BRANCHES: 3
63 | BLOCK: BASIC
64 | NUM_BLOCKS:
65 | - 2
66 | - 2
67 | - 2
68 | NUM_CHANNELS:
69 | - 32
70 | - 64
71 | - 128
72 | FUSE_METHOD: SUM
73 | STAGE4:
74 | NUM_MODULES: 3
75 | NUM_BRANCHES: 4
76 | BLOCK: BASIC
77 | NUM_BLOCKS:
78 | - 2
79 | - 2
80 | - 2
81 | - 2
82 | NUM_CHANNELS:
83 | - 32
84 | - 64
85 | - 128
86 | - 256
87 | FUSE_METHOD: SUM
88 | DECONV:
89 | NUM_DECONVS: 1
90 | NUM_CHANNELS:
91 | - 32
92 | KERNEL_SIZE:
93 | - 4
94 | NUM_BASIC_BLOCKS: 2
95 | CAT_OUTPUT:
96 | - True
97 | INIT_WEIGHTS: True
98 | NAME: pose_higher_hrnet
99 | NUM_JOINTS: 17
100 | PRETRAINED: 'example_path/efficientnet-b0-4cfa50.pth' #Path to pretrained backbone model
101 | TAG_PER_JOINT: True
102 | TEST:
103 | FLIP_TEST: True
104 | IMAGES_PER_GPU: 1
105 | MODEL_FILE: ''
106 | SCALE_FACTOR: [1]
107 | DETECTION_THRESHOLD: 0.1
108 | WITH_HEATMAPS: (True, True)
109 | WITH_AE: (True, False)
110 | PROJECT2IMAGE: True
111 | NMS_KERNEL: 5
112 | NMS_PADDING: 2
113 | TRAIN:
114 | BEGIN_EPOCH: 0
115 | CHECKPOINT: ''
116 | END_EPOCH: 300
117 | GAMMA1: 0.99
118 | GAMMA2: 0.0
119 | IMAGES_PER_GPU: 1
120 | LR: 0.001
121 | LR_FACTOR: 0.1
122 | LR_STEP: [200, 260]
123 | MOMENTUM: 0.9
124 | NESTEROV: False
125 | OPTIMIZER: adam
126 | RESUME: False
127 | SHUFFLE: True
128 | WD: 0.0001
129 | WORKERS: 0
130 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # EfficientHRNet
2 |
3 | EfficientHRNet is a family of scalable and efficient networks created by unifiying EfficientNet and HigherHRNet for Multi-person human pose estimation. A preprint of our paper can be found [here.](https://arxiv.org/abs/2007.08090)
4 |
5 | Our code is based on the
6 |
7 | 1) Official implementation of [HigherHRNet](https://github.com/HRNet/HigherHRNet-Human-Pose-Estimation)
8 |
9 | 2) PyTorch implementation of [EfficientNet](https://github.com/narumiruna/efficientnet-pytorch)
10 |
11 |
12 | We provide a formulation for jointly scaling our backbone EfficientNet below the baseline B0 and the rest of EfficientHRNet with it. Ultimately, we are able to create a family
13 | of highly accurate and efficient 2D human pose estimators that is flexible enough to provide lightweight solution for a variety of application and device requirements.
14 |
15 |
16 | ## Environment Setup ##
17 |
18 | * Pytorch >= 1.1.0
19 |
20 | * Dependencies as listed in requirements.txt
21 |
22 | * COCO Keypoints Dataset along with COCOAPI as given in https://cocodataset.org/#download and https://github.com/cocodataset/cocoapi
23 |
24 | The code was developed using python 3.6 and NVIDIA GPUs, both of which are required.
25 |
26 |
27 | ## Configurations for EfficientHRNet models ##
28 |
29 | Config files are found at experiments/coco/higher_hrnet/
30 |
31 | Varying the following parameters provide different EfficientHRNet models ranging from H0 to H-4 :
32 |
33 | * scale_factor
34 | * input_size
35 | * width_mult
36 | * depth_mult
37 |
38 | More details on scaling can be found in our paper: https://arxiv.org/pdf/2007.08090.pdf
39 |
40 | Examples can be seen in the Example Configs folder.
41 |
42 |
43 | ## EfficientHRNet Training and Evaluation ##
44 |
45 | Distributed training is supported. Config settings can be customized based on user requirements. Training and validation scripts can be found at tools/
46 |
47 |
48 | ### Training on COCO (Nvidia GPUs) ###
49 |
50 | Single GPU training example:
51 | ```
52 | CUDA_VISIBLE_DEVICES=0 python3 tools/dist_train.py --cfg experiments/coco/higher-hrnet/config.yaml
53 | ```
54 | Distributed training example:
55 | ```
56 | CUDA_VISIBLE_DEVICES=0,1 python3 tools/dist_train.py --cfg experiments/coco/higher-hrnet/config.yaml --dist-url tcp://127.0.0.1:12345
57 | ```
58 |
59 | ### Testing on COCO (Nvidia GPUs) ###
60 |
61 | Both single-scale and multi-scale testing are supported.
62 |
63 | Single scale testing:
64 | ```
65 | python3 tools/valid.py --cfg experiments/coco/higher-hrnet/config.yaml TEST.MODEL_FILE /path/to/model.pth
66 | ```
67 | Multi-scale testing:
68 | ```
69 | python3 tools/valid.py --cfg experiments/coco/higher-hrnet/config.yaml TEST.MODEL_FILE /path/to/model.pth TEST.SCALE_FACTOR [0.5,1.0,1.5]
70 | ```
71 |
72 | ## Pretrained Models ##
73 |
74 | COCO17 pretrained models for EfficientHRNet H0 to H-4 can be download [here.](https://drive.google.com/drive/folders/1FcJ1bawqWb1yAkcqb2sJfMsePMwupsWJ?usp=sharing)
75 |
76 | | Method | Input Size | Parameters | FLOPs | AP | APmulti-scale |
77 | |:--------------:|:----------:|:----------:|:-----:|:----:|:----:|
78 | | H0 | 512 | 23.3M | 25.6B | 64.0 | 67.1 |
79 | | H-1 | 480 | 16M | 14.2B | 59.1 | 62.3 |
80 | | H-2 | 448 | 10.3M | 7.7B | 52.8 | 55.0 |
81 | | H-3 | 416 | 6.9M | 4.2B | 44.5 | 45.5 |
82 | | H-4 | 384 | 3.7M | 2.1B | 35.5 | 39.7 |
83 |
84 |
85 | Compact EfficientNet ImageNet trained weights can be downloaded [here.](https://drive.google.com/drive/folders/1AZMYacfDcZv4QePcYONtg2in7oVmmSwV?usp=sharing)
86 |
87 | | | | |ImageNet | | Cifar-100 | |
88 | |:--------------:|:----------:|:------:|:----------:|:-----:|:----------:|:-----:|
89 | | Method | Input Size | FLOPs | Parameters | Top-1 | Parameters | Top-1 |
90 | | B0 | 512 | 0.4B | 5.3M | 75 | 4.1M | 81.9 |
91 | | B-1 | 480 | 0.3B | 4.5M | 73.8 | 3.5M | 81.4 |
92 | | B-2 | 448 | 0.2B | 3.4M | 71.3 | 2.5M | 79.8 |
93 | | B-3 | 416 | 0.1B | 2.8M | 68.5 | 1.9M | 78.2 |
94 | | B-4 | 384 | 0.05B | 1.3M | 65.6 | 1.3M | 74.3 |
95 |
96 |
97 | ## Citation ##
98 |
99 | If you would like use EfficientHRNet in your work, please use the following citation.
100 |
101 | ```
102 | @misc{neff2020efficienthrnet,
103 | title={EfficientHRNet: Efficient Scaling for Lightweight High-Resolution Multi-Person Pose Estimation},
104 | author={Christopher Neff and Aneri Sheth and Steven Furgurson and Hamed Tabkhi},
105 | year={2020},
106 | eprint={2007.08090},
107 | archivePrefix={arXiv},
108 | primaryClass={cs.CV}
109 | }
110 | ```
111 |
112 | We also recommend citing EfficientNet and HigherHRNet, which inspired this work.
113 |
--------------------------------------------------------------------------------
/experiments/coco/higher_hrnet/frozen.yaml:
--------------------------------------------------------------------------------
1 | AUTO_RESUME: False
2 | DATA_DIR: ''
3 | GPUS: (0,)
4 | LOG_DIR: log
5 | OUTPUT_DIR: b0_frozen
6 | PRINT_FREQ: 100
7 | CUDNN:
8 | BENCHMARK: True
9 | DETERMINISTIC: True
10 | ENABLED: True
11 | DATASET:
12 | SIGMA: 2
13 | DATASET: coco_kpt
14 | DATASET_TEST: coco
15 | DATA_FORMAT: jpg
16 | FLIP: 0.5
17 | INPUT_SIZE: 512
18 | OUTPUT_SIZE: [128, 256]
19 | MAX_NUM_PEOPLE: 30
20 | MAX_ROTATION: 30
21 | MAX_SCALE: 1.5
22 | SCALE_TYPE: 'short'
23 | MAX_TRANSLATE: 40
24 | MIN_SCALE: 0.75
25 | NUM_JOINTS: 17
26 | ROOT: '/home/aneri/EfficientHRNet_b0/data/coco/'
27 | TEST: val2017
28 | TRAIN: train2017
29 | DEBUG:
30 | DEBUG: True
31 | SAVE_BATCH_IMAGES_GT: False
32 | SAVE_BATCH_IMAGES_PRED: False
33 | SAVE_HEATMAPS_GT: True
34 | SAVE_HEATMAPS_PRED: True
35 | SAVE_TAGMAPS_PRED: True
36 | LOSS:
37 | NUM_STAGES: 2
38 | AE_LOSS_TYPE: exp
39 | WITH_AE_LOSS: [True, True]
40 | PUSH_LOSS_FACTOR: [0.001, 0.001]
41 | PULL_LOSS_FACTOR: [0.001, 0.001]
42 | WITH_HEATMAPS_LOSS: [True, True]
43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
44 | MODEL:
45 | EXTRA:
46 | FINAL_CONV_KERNEL: 1
47 | PRETRAINED_LAYERS: ['*']
48 | STEM_INPLANES: 64
49 | STAGE2:
50 | NUM_MODULES: 1
51 | NUM_BRANCHES: 2
52 | BLOCK: BASIC
53 | NUM_BLOCKS:
54 | - 2
55 | - 2
56 | NUM_CHANNELS:
57 | - 32
58 | - 64
59 | FUSE_METHOD: SUM
60 | STAGE3:
61 | NUM_MODULES: 4
62 | NUM_BRANCHES: 3
63 | BLOCK: BASIC
64 | NUM_BLOCKS:
65 | - 2
66 | - 2
67 | - 2
68 | NUM_CHANNELS:
69 | - 32
70 | - 64
71 | - 128
72 | FUSE_METHOD: SUM
73 | STAGE4:
74 | NUM_MODULES: 3
75 | NUM_BRANCHES: 4
76 | BLOCK: BASIC
77 | NUM_BLOCKS:
78 | - 2
79 | - 2
80 | - 2
81 | - 2
82 | NUM_CHANNELS:
83 | - 32
84 | - 64
85 | - 128
86 | - 256
87 | FUSE_METHOD: SUM
88 | DECONV:
89 | NUM_DECONVS: 1
90 | NUM_CHANNELS:
91 | - 32
92 | KERNEL_SIZE:
93 | - 4
94 | NUM_BASIC_BLOCKS: 2
95 | CAT_OUTPUT:
96 | - True
97 | INIT_WEIGHTS: True
98 | SCALE_FACTOR: 0
99 | WIDTH_MULT: 1.0
100 | DEPTH_MULT: 1.0
101 | NAME: pose_higher_hrnet
102 | NUM_JOINTS: 17
103 | PRETRAINED: '/home/aneri/efficientnetb5-branch/efficientnet-b0-4cfa50.pth'
104 | TAG_PER_JOINT: True
105 | TEST:
106 | FLIP_TEST: True
107 | IMAGES_PER_GPU: 1
108 | MODEL_FILE: ''
109 | SCALE_FACTOR: [1]
110 | DETECTION_THRESHOLD: 0.1
111 | WITH_HEATMAPS: (True, True)
112 | WITH_AE: (True, True)
113 | PROJECT2IMAGE: True
114 | NMS_KERNEL: 5
115 | NMS_PADDING: 2
116 | TRAIN:
117 | BEGIN_EPOCH: 0
118 | CHECKPOINT: ''
119 | END_EPOCH: 300
120 | GAMMA1: 0.99
121 | GAMMA2: 0.0
122 | IMAGES_PER_GPU: 60
123 | LR: 0.001
124 | LR_FACTOR: 0.1
125 | LR_STEP: [200, 260]
126 | MOMENTUM: 0.9
127 | NESTEROV: False
128 | OPTIMIZER: adam
129 | RESUME: False
130 | SHUFFLE: True
131 | WD: 0.0001
132 | WORKERS: 8
133 |
134 |
--------------------------------------------------------------------------------
/experiments/coco/higher_hrnet/unfrozen.yaml:
--------------------------------------------------------------------------------
1 | AUTO_RESUME: False
2 | DATA_DIR: ''
3 | GPUS: (0,)
4 | LOG_DIR: log
5 | OUTPUT_DIR: b0_unfrozen
6 | PRINT_FREQ: 100
7 | CUDNN:
8 | BENCHMARK: True
9 | DETERMINISTIC: True
10 | ENABLED: True
11 | DATASET:
12 | SIGMA: 2
13 | DATASET: coco_kpt
14 | DATASET_TEST: coco
15 | DATA_FORMAT: jpg
16 | FLIP: 0.5
17 | INPUT_SIZE: 512
18 | OUTPUT_SIZE: [128, 256]
19 | MAX_NUM_PEOPLE: 30
20 | MAX_ROTATION: 30
21 | MAX_SCALE: 1.5
22 | SCALE_TYPE: 'short'
23 | MAX_TRANSLATE: 40
24 | MIN_SCALE: 0.75
25 | NUM_JOINTS: 17
26 | ROOT: '/home/aneri/EfficientHRNet_b0/data/coco/'
27 | TEST: val2017
28 | TRAIN: train2017
29 | DEBUG:
30 | DEBUG: True
31 | SAVE_BATCH_IMAGES_GT: False
32 | SAVE_BATCH_IMAGES_PRED: False
33 | SAVE_HEATMAPS_GT: True
34 | SAVE_HEATMAPS_PRED: True
35 | SAVE_TAGMAPS_PRED: True
36 | LOSS:
37 | NUM_STAGES: 2
38 | AE_LOSS_TYPE: exp
39 | WITH_AE_LOSS: [True, True]
40 | PUSH_LOSS_FACTOR: [0.001, 0.001]
41 | PULL_LOSS_FACTOR: [0.001, 0.001]
42 | WITH_HEATMAPS_LOSS: [True, True]
43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
44 | MODEL:
45 | EXTRA:
46 | FINAL_CONV_KERNEL: 1
47 | PRETRAINED_LAYERS: ['*']
48 | STEM_INPLANES: 64
49 | STAGE2:
50 | NUM_MODULES: 1
51 | NUM_BRANCHES: 2
52 | BLOCK: BASIC
53 | NUM_BLOCKS:
54 | - 2
55 | - 2
56 | NUM_CHANNELS:
57 | - 32
58 | - 64
59 | FUSE_METHOD: SUM
60 | STAGE3:
61 | NUM_MODULES: 4
62 | NUM_BRANCHES: 3
63 | BLOCK: BASIC
64 | NUM_BLOCKS:
65 | - 2
66 | - 2
67 | - 2
68 | NUM_CHANNELS:
69 | - 32
70 | - 64
71 | - 128
72 | FUSE_METHOD: SUM
73 | STAGE4:
74 | NUM_MODULES: 3
75 | NUM_BRANCHES: 4
76 | BLOCK: BASIC
77 | NUM_BLOCKS:
78 | - 2
79 | - 2
80 | - 2
81 | - 2
82 | NUM_CHANNELS:
83 | - 32
84 | - 64
85 | - 128
86 | - 256
87 | FUSE_METHOD: SUM
88 | DECONV:
89 | NUM_DECONVS: 1
90 | NUM_CHANNELS:
91 | - 32
92 | KERNEL_SIZE:
93 | - 4
94 | NUM_BASIC_BLOCKS: 2
95 | CAT_OUTPUT:
96 | - True
97 | INIT_WEIGHTS: True
98 | SCALE_FACTOR: 0
99 | WIDTH_MULT: 1.0
100 | DEPTH_MULT: 1.0
101 | NAME: pose_higher_hrnet
102 | NUM_JOINTS: 17
103 | PRETRAINED: '/home/aneri/efficientnetb5-branch/efficientnet-b0-4cfa50.pth'
104 | TAG_PER_JOINT: True
105 | TEST:
106 | FLIP_TEST: True
107 | IMAGES_PER_GPU: 1
108 | MODEL_FILE: ''
109 | SCALE_FACTOR: [1]
110 | DETECTION_THRESHOLD: 0.1
111 | WITH_HEATMAPS: (True, True)
112 | WITH_AE: (True, True)
113 | PROJECT2IMAGE: True
114 | NMS_KERNEL: 5
115 | NMS_PADDING: 2
116 | TRAIN:
117 | BEGIN_EPOCH: 0
118 | CHECKPOINT: ''
119 | END_EPOCH: 300
120 | GAMMA1: 0.99
121 | GAMMA2: 0.0
122 | IMAGES_PER_GPU: 29
123 | LR: 0.001
124 | LR_FACTOR: 0.1
125 | LR_STEP: [200, 260]
126 | MOMENTUM: 0.9
127 | NESTEROV: False
128 | OPTIMIZER: adam
129 | RESUME: False
130 | SHUFFLE: True
131 | WD: 0.0001
132 | WORKERS: 8
133 |
134 |
--------------------------------------------------------------------------------
/experiments/crowd_pose/higher_hrnet/w32_512_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
1 | AUTO_RESUME: True
2 | DATA_DIR: ''
3 | GPUS: (0,)
4 | LOG_DIR: log
5 | OUTPUT_DIR: output
6 | PRINT_FREQ: 100
7 | CUDNN:
8 | BENCHMARK: True
9 | DETERMINISTIC: False
10 | ENABLED: True
11 | DATASET:
12 | SIGMA: 2
13 | DATASET: crowd_pose_kpt
14 | DATASET_TEST: crowd_pose
15 | DATA_FORMAT: jpg
16 | FLIP: 0.5
17 | INPUT_SIZE: 512
18 | OUTPUT_SIZE: [128, 256]
19 | MAX_NUM_PEOPLE: 30
20 | MAX_ROTATION: 30
21 | MAX_SCALE: 1.5
22 | SCALE_TYPE: 'short'
23 | MAX_TRANSLATE: 40
24 | MIN_SCALE: 0.75
25 | NUM_JOINTS: 14
26 | ROOT: 'data/crowd_pose'
27 | TEST: test
28 | TRAIN: trainval
29 | DEBUG:
30 | DEBUG: True
31 | SAVE_BATCH_IMAGES_GT: False
32 | SAVE_BATCH_IMAGES_PRED: False
33 | SAVE_HEATMAPS_GT: True
34 | SAVE_HEATMAPS_PRED: True
35 | SAVE_TAGMAPS_PRED: True
36 | LOSS:
37 | NUM_STAGES: 2
38 | AE_LOSS_TYPE: exp
39 | WITH_AE_LOSS: [True, False]
40 | PUSH_LOSS_FACTOR: [0.001, 0.001]
41 | PULL_LOSS_FACTOR: [0.001, 0.001]
42 | WITH_HEATMAPS_LOSS: [True, True]
43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
44 | MODEL:
45 | EXTRA:
46 | FINAL_CONV_KERNEL: 1
47 | PRETRAINED_LAYERS: ['*']
48 | STEM_INPLANES: 64
49 | STAGE2:
50 | NUM_MODULES: 1
51 | NUM_BRANCHES: 2
52 | BLOCK: BASIC
53 | NUM_BLOCKS:
54 | - 4
55 | - 4
56 | NUM_CHANNELS:
57 | - 32
58 | - 64
59 | FUSE_METHOD: SUM
60 | STAGE3:
61 | NUM_MODULES: 4
62 | NUM_BRANCHES: 3
63 | BLOCK: BASIC
64 | NUM_BLOCKS:
65 | - 4
66 | - 4
67 | - 4
68 | NUM_CHANNELS:
69 | - 32
70 | - 64
71 | - 128
72 | FUSE_METHOD: SUM
73 | STAGE4:
74 | NUM_MODULES: 3
75 | NUM_BRANCHES: 4
76 | BLOCK: BASIC
77 | NUM_BLOCKS:
78 | - 4
79 | - 4
80 | - 4
81 | - 4
82 | NUM_CHANNELS:
83 | - 32
84 | - 64
85 | - 128
86 | - 256
87 | FUSE_METHOD: SUM
88 | DECONV:
89 | NUM_DECONVS: 1
90 | NUM_CHANNELS:
91 | - 32
92 | KERNEL_SIZE:
93 | - 4
94 | NUM_BASIC_BLOCKS: 4
95 | CAT_OUTPUT:
96 | - True
97 | INIT_WEIGHTS: True
98 | NAME: pose_higher_hrnet
99 | NUM_JOINTS: 14
100 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
101 | TAG_PER_JOINT: True
102 | TEST:
103 | FLIP_TEST: True
104 | IMAGES_PER_GPU: 1
105 | MODEL_FILE: ''
106 | SCALE_FACTOR: [1]
107 | DETECTION_THRESHOLD: 0.1
108 | WITH_HEATMAPS: (True, True)
109 | WITH_AE: (True, False)
110 | PROJECT2IMAGE: True
111 | NMS_KERNEL: 5
112 | NMS_PADDING: 2
113 | TRAIN:
114 | BEGIN_EPOCH: 0
115 | CHECKPOINT: ''
116 | END_EPOCH: 300
117 | GAMMA1: 0.99
118 | GAMMA2: 0.0
119 | IMAGES_PER_GPU: 12
120 | LR: 0.001
121 | LR_FACTOR: 0.1
122 | LR_STEP: [200, 260]
123 | MOMENTUM: 0.9
124 | NESTEROV: False
125 | OPTIMIZER: adam
126 | RESUME: False
127 | SHUFFLE: True
128 | WD: 0.0001
129 | WORKERS: 4
130 |
--------------------------------------------------------------------------------
/experiments/crowd_pose/higher_hrnet/w32_512_adam_lr1e-3_coco.yaml:
--------------------------------------------------------------------------------
1 | AUTO_RESUME: True
2 | DATA_DIR: ''
3 | GPUS: (0,)
4 | LOG_DIR: log
5 | OUTPUT_DIR: output
6 | PRINT_FREQ: 100
7 | CUDNN:
8 | BENCHMARK: True
9 | DETERMINISTIC: False
10 | ENABLED: True
11 | DATASET:
12 | SIGMA: 2
13 | DATASET: crowd_pose_kpt
14 | DATASET_TEST: crowd_pose
15 | DATA_FORMAT: jpg
16 | FLIP: 0.5
17 | INPUT_SIZE: 512
18 | OUTPUT_SIZE: [128, 256]
19 | MAX_NUM_PEOPLE: 30
20 | MAX_ROTATION: 30
21 | MAX_SCALE: 1.5
22 | SCALE_TYPE: 'short'
23 | MAX_TRANSLATE: 40
24 | MIN_SCALE: 0.75
25 | NUM_JOINTS: 14
26 | ROOT: 'data/crowd_pose'
27 | TEST: test
28 | TRAIN: trainval
29 | DEBUG:
30 | DEBUG: True
31 | SAVE_BATCH_IMAGES_GT: False
32 | SAVE_BATCH_IMAGES_PRED: False
33 | SAVE_HEATMAPS_GT: True
34 | SAVE_HEATMAPS_PRED: True
35 | SAVE_TAGMAPS_PRED: True
36 | LOSS:
37 | NUM_STAGES: 2
38 | AE_LOSS_TYPE: exp
39 | WITH_AE_LOSS: [True, False]
40 | PUSH_LOSS_FACTOR: [0.001, 0.001]
41 | PULL_LOSS_FACTOR: [0.001, 0.001]
42 | WITH_HEATMAPS_LOSS: [True, True]
43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
44 | MODEL:
45 | EXTRA:
46 | FINAL_CONV_KERNEL: 1
47 | PRETRAINED_LAYERS:
48 | - 'conv1'
49 | - 'bn1'
50 | - 'conv2'
51 | - 'bn2'
52 | - 'layer1'
53 | - 'transition1'
54 | - 'stage2'
55 | - 'transition2'
56 | - 'stage3'
57 | - 'transition3'
58 | - 'stage4'
59 | STEM_INPLANES: 64
60 | STAGE2:
61 | NUM_MODULES: 1
62 | NUM_BRANCHES: 2
63 | BLOCK: BASIC
64 | NUM_BLOCKS:
65 | - 4
66 | - 4
67 | NUM_CHANNELS:
68 | - 32
69 | - 64
70 | FUSE_METHOD: SUM
71 | STAGE3:
72 | NUM_MODULES: 4
73 | NUM_BRANCHES: 3
74 | BLOCK: BASIC
75 | NUM_BLOCKS:
76 | - 4
77 | - 4
78 | - 4
79 | NUM_CHANNELS:
80 | - 32
81 | - 64
82 | - 128
83 | FUSE_METHOD: SUM
84 | STAGE4:
85 | NUM_MODULES: 3
86 | NUM_BRANCHES: 4
87 | BLOCK: BASIC
88 | NUM_BLOCKS:
89 | - 4
90 | - 4
91 | - 4
92 | - 4
93 | NUM_CHANNELS:
94 | - 32
95 | - 64
96 | - 128
97 | - 256
98 | FUSE_METHOD: SUM
99 | DECONV:
100 | NUM_DECONVS: 1
101 | NUM_CHANNELS:
102 | - 32
103 | KERNEL_SIZE:
104 | - 4
105 | NUM_BASIC_BLOCKS: 4
106 | CAT_OUTPUT:
107 | - True
108 | INIT_WEIGHTS: True
109 | NAME: pose_higher_hrnet
110 | NUM_JOINTS: 14
111 | PRETRAINED: 'models/pytorch/pose_coco/pose_higher_hrnet_w32_512.pth'
112 | TAG_PER_JOINT: True
113 | TEST:
114 | FLIP_TEST: True
115 | IMAGES_PER_GPU: 1
116 | MODEL_FILE: ''
117 | SCALE_FACTOR: [1]
118 | DETECTION_THRESHOLD: 0.1
119 | WITH_HEATMAPS: (True, True)
120 | WITH_AE: (True, False)
121 | PROJECT2IMAGE: True
122 | NMS_KERNEL: 5
123 | NMS_PADDING: 2
124 | TRAIN:
125 | BEGIN_EPOCH: 0
126 | CHECKPOINT: ''
127 | END_EPOCH: 300
128 | GAMMA1: 0.99
129 | GAMMA2: 0.0
130 | IMAGES_PER_GPU: 12
131 | LR: 0.001
132 | LR_FACTOR: 0.1
133 | LR_STEP: [200, 260]
134 | MOMENTUM: 0.9
135 | NESTEROV: False
136 | OPTIMIZER: adam
137 | RESUME: False
138 | SHUFFLE: True
139 | WD: 0.0001
140 | WORKERS: 4
141 |
--------------------------------------------------------------------------------
/experiments/crowd_pose/higher_hrnet/w32_512_adam_lr1e-3_syncbn.yaml:
--------------------------------------------------------------------------------
1 | AUTO_RESUME: True
2 | DATA_DIR: ''
3 | GPUS: (0,)
4 | LOG_DIR: log
5 | OUTPUT_DIR: output
6 | PRINT_FREQ: 100
7 | CUDNN:
8 | BENCHMARK: True
9 | DETERMINISTIC: False
10 | ENABLED: True
11 | DATASET:
12 | SIGMA: 2
13 | DATASET: crowd_pose_kpt
14 | DATASET_TEST: crowd_pose
15 | DATA_FORMAT: jpg
16 | FLIP: 0.5
17 | INPUT_SIZE: 512
18 | OUTPUT_SIZE: [128, 256]
19 | MAX_NUM_PEOPLE: 30
20 | MAX_ROTATION: 30
21 | MAX_SCALE: 1.5
22 | SCALE_TYPE: 'short'
23 | MAX_TRANSLATE: 40
24 | MIN_SCALE: 0.75
25 | NUM_JOINTS: 14
26 | ROOT: 'data/crowd_pose'
27 | TEST: test
28 | TRAIN: trainval
29 | DEBUG:
30 | DEBUG: True
31 | SAVE_BATCH_IMAGES_GT: False
32 | SAVE_BATCH_IMAGES_PRED: False
33 | SAVE_HEATMAPS_GT: True
34 | SAVE_HEATMAPS_PRED: True
35 | SAVE_TAGMAPS_PRED: True
36 | LOSS:
37 | NUM_STAGES: 2
38 | AE_LOSS_TYPE: exp
39 | WITH_AE_LOSS: [True, False]
40 | PUSH_LOSS_FACTOR: [0.001, 0.001]
41 | PULL_LOSS_FACTOR: [0.001, 0.001]
42 | WITH_HEATMAPS_LOSS: [True, True]
43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
44 | MODEL:
45 | EXTRA:
46 | FINAL_CONV_KERNEL: 1
47 | PRETRAINED_LAYERS: ['*']
48 | STEM_INPLANES: 64
49 | STAGE2:
50 | NUM_MODULES: 1
51 | NUM_BRANCHES: 2
52 | BLOCK: BASIC
53 | NUM_BLOCKS:
54 | - 4
55 | - 4
56 | NUM_CHANNELS:
57 | - 32
58 | - 64
59 | FUSE_METHOD: SUM
60 | STAGE3:
61 | NUM_MODULES: 4
62 | NUM_BRANCHES: 3
63 | BLOCK: BASIC
64 | NUM_BLOCKS:
65 | - 4
66 | - 4
67 | - 4
68 | NUM_CHANNELS:
69 | - 32
70 | - 64
71 | - 128
72 | FUSE_METHOD: SUM
73 | STAGE4:
74 | NUM_MODULES: 3
75 | NUM_BRANCHES: 4
76 | BLOCK: BASIC
77 | NUM_BLOCKS:
78 | - 4
79 | - 4
80 | - 4
81 | - 4
82 | NUM_CHANNELS:
83 | - 32
84 | - 64
85 | - 128
86 | - 256
87 | FUSE_METHOD: SUM
88 | DECONV:
89 | NUM_DECONVS: 1
90 | NUM_CHANNELS:
91 | - 32
92 | KERNEL_SIZE:
93 | - 4
94 | NUM_BASIC_BLOCKS: 4
95 | CAT_OUTPUT:
96 | - True
97 | INIT_WEIGHTS: True
98 | NAME: pose_higher_hrnet
99 | NUM_JOINTS: 14
100 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
101 | TAG_PER_JOINT: True
102 | SYNC_BN: True
103 | TEST:
104 | FLIP_TEST: True
105 | IMAGES_PER_GPU: 1
106 | MODEL_FILE: ''
107 | SCALE_FACTOR: [1]
108 | DETECTION_THRESHOLD: 0.1
109 | WITH_HEATMAPS: (True, True)
110 | WITH_AE: (True, False)
111 | PROJECT2IMAGE: True
112 | NMS_KERNEL: 5
113 | NMS_PADDING: 2
114 | TRAIN:
115 | BEGIN_EPOCH: 0
116 | CHECKPOINT: ''
117 | END_EPOCH: 300
118 | GAMMA1: 0.99
119 | GAMMA2: 0.0
120 | IMAGES_PER_GPU: 12
121 | LR: 0.001
122 | LR_FACTOR: 0.1
123 | LR_STEP: [200, 260]
124 | MOMENTUM: 0.9
125 | NESTEROV: False
126 | OPTIMIZER: adam
127 | RESUME: False
128 | SHUFFLE: True
129 | WD: 0.0001
130 | WORKERS: 4
131 |
--------------------------------------------------------------------------------
/experiments/crowd_pose/higher_hrnet/w32_640_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
1 | AUTO_RESUME: True
2 | DATA_DIR: ''
3 | GPUS: (0,)
4 | LOG_DIR: log
5 | OUTPUT_DIR: output
6 | PRINT_FREQ: 100
7 | FP16:
8 | ENABLED: True
9 | DYNAMIC_LOSS_SCALE: True
10 | CUDNN:
11 | BENCHMARK: True
12 | DETERMINISTIC: False
13 | ENABLED: True
14 | DATASET:
15 | SIGMA: 2
16 | DATASET: crowd_pose_kpt
17 | DATASET_TEST: crowd_pose
18 | DATA_FORMAT: jpg
19 | FLIP: 0.5
20 | INPUT_SIZE: 640
21 | OUTPUT_SIZE: [160, 320]
22 | MAX_NUM_PEOPLE: 30
23 | MAX_ROTATION: 30
24 | MAX_SCALE: 1.5
25 | SCALE_TYPE: 'short'
26 | MAX_TRANSLATE: 40
27 | MIN_SCALE: 0.75
28 | NUM_JOINTS: 14
29 | ROOT: 'data/crowd_pose'
30 | TEST: test
31 | TRAIN: trainval
32 | DEBUG:
33 | DEBUG: True
34 | SAVE_BATCH_IMAGES_GT: False
35 | SAVE_BATCH_IMAGES_PRED: False
36 | SAVE_HEATMAPS_GT: True
37 | SAVE_HEATMAPS_PRED: True
38 | SAVE_TAGMAPS_PRED: True
39 | LOSS:
40 | NUM_STAGES: 2
41 | AE_LOSS_TYPE: exp
42 | WITH_AE_LOSS: [True, False]
43 | PUSH_LOSS_FACTOR: [0.001, 0.001]
44 | PULL_LOSS_FACTOR: [0.001, 0.001]
45 | WITH_HEATMAPS_LOSS: [True, True]
46 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
47 | MODEL:
48 | EXTRA:
49 | FINAL_CONV_KERNEL: 1
50 | PRETRAINED_LAYERS: ['*']
51 | STEM_INPLANES: 64
52 | STAGE2:
53 | NUM_MODULES: 1
54 | NUM_BRANCHES: 2
55 | BLOCK: BASIC
56 | NUM_BLOCKS:
57 | - 4
58 | - 4
59 | NUM_CHANNELS:
60 | - 32
61 | - 64
62 | FUSE_METHOD: SUM
63 | STAGE3:
64 | NUM_MODULES: 4
65 | NUM_BRANCHES: 3
66 | BLOCK: BASIC
67 | NUM_BLOCKS:
68 | - 4
69 | - 4
70 | - 4
71 | NUM_CHANNELS:
72 | - 32
73 | - 64
74 | - 128
75 | FUSE_METHOD: SUM
76 | STAGE4:
77 | NUM_MODULES: 3
78 | NUM_BRANCHES: 4
79 | BLOCK: BASIC
80 | NUM_BLOCKS:
81 | - 4
82 | - 4
83 | - 4
84 | - 4
85 | NUM_CHANNELS:
86 | - 32
87 | - 64
88 | - 128
89 | - 256
90 | FUSE_METHOD: SUM
91 | DECONV:
92 | NUM_DECONVS: 1
93 | NUM_CHANNELS:
94 | - 32
95 | KERNEL_SIZE:
96 | - 4
97 | NUM_BASIC_BLOCKS: 4
98 | CAT_OUTPUT:
99 | - True
100 | INIT_WEIGHTS: True
101 | NAME: pose_higher_hrnet
102 | NUM_JOINTS: 14
103 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
104 | TAG_PER_JOINT: True
105 | TEST:
106 | FLIP_TEST: True
107 | IMAGES_PER_GPU: 1
108 | MODEL_FILE: ''
109 | SCALE_FACTOR: [1]
110 | DETECTION_THRESHOLD: 0.1
111 | WITH_HEATMAPS: (True, True)
112 | WITH_AE: (True, False)
113 | PROJECT2IMAGE: True
114 | NMS_KERNEL: 5
115 | NMS_PADDING: 2
116 | TRAIN:
117 | BEGIN_EPOCH: 0
118 | CHECKPOINT: ''
119 | END_EPOCH: 300
120 | GAMMA1: 0.99
121 | GAMMA2: 0.0
122 | IMAGES_PER_GPU: 12
123 | LR: 0.001
124 | LR_FACTOR: 0.1
125 | LR_STEP: [200, 260]
126 | MOMENTUM: 0.9
127 | NESTEROV: False
128 | OPTIMIZER: adam
129 | RESUME: False
130 | SHUFFLE: True
131 | WD: 0.0001
132 | WORKERS: 4
133 |
--------------------------------------------------------------------------------
/experiments/crowd_pose/higher_hrnet/w48_640_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
1 | AUTO_RESUME: True
2 | DATA_DIR: ''
3 | GPUS: (0,)
4 | LOG_DIR: log
5 | OUTPUT_DIR: output
6 | PRINT_FREQ: 100
7 | FP16:
8 | ENABLED: True
9 | DYNAMIC_LOSS_SCALE: True
10 | CUDNN:
11 | BENCHMARK: True
12 | DETERMINISTIC: False
13 | ENABLED: True
14 | DATASET:
15 | SIGMA: 2
16 | DATASET: crowd_pose_kpt
17 | DATASET_TEST: crowd_pose
18 | DATA_FORMAT: jpg
19 | FLIP: 0.5
20 | INPUT_SIZE: 640
21 | OUTPUT_SIZE: [160, 320]
22 | MAX_NUM_PEOPLE: 30
23 | MAX_ROTATION: 30
24 | MAX_SCALE: 1.5
25 | SCALE_TYPE: 'short'
26 | MAX_TRANSLATE: 40
27 | MIN_SCALE: 0.75
28 | NUM_JOINTS: 14
29 | ROOT: 'data/crowd_pose'
30 | TEST: test
31 | TRAIN: trainval
32 | DEBUG:
33 | DEBUG: True
34 | SAVE_BATCH_IMAGES_GT: False
35 | SAVE_BATCH_IMAGES_PRED: False
36 | SAVE_HEATMAPS_GT: True
37 | SAVE_HEATMAPS_PRED: True
38 | SAVE_TAGMAPS_PRED: True
39 | LOSS:
40 | NUM_STAGES: 2
41 | AE_LOSS_TYPE: exp
42 | WITH_AE_LOSS: [True, False]
43 | PUSH_LOSS_FACTOR: [0.001, 0.001]
44 | PULL_LOSS_FACTOR: [0.001, 0.001]
45 | WITH_HEATMAPS_LOSS: [True, True]
46 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
47 | MODEL:
48 | EXTRA:
49 | FINAL_CONV_KERNEL: 1
50 | PRETRAINED_LAYERS: ['*']
51 | STEM_INPLANES: 64
52 | STAGE2:
53 | NUM_MODULES: 1
54 | NUM_BRANCHES: 2
55 | BLOCK: BASIC
56 | NUM_BLOCKS:
57 | - 4
58 | - 4
59 | NUM_CHANNELS:
60 | - 48
61 | - 96
62 | FUSE_METHOD: SUM
63 | STAGE3:
64 | NUM_MODULES: 4
65 | NUM_BRANCHES: 3
66 | BLOCK: BASIC
67 | NUM_BLOCKS:
68 | - 4
69 | - 4
70 | - 4
71 | NUM_CHANNELS:
72 | - 48
73 | - 96
74 | - 192
75 | FUSE_METHOD: SUM
76 | STAGE4:
77 | NUM_MODULES: 3
78 | NUM_BRANCHES: 4
79 | BLOCK: BASIC
80 | NUM_BLOCKS:
81 | - 4
82 | - 4
83 | - 4
84 | - 4
85 | NUM_CHANNELS:
86 | - 48
87 | - 96
88 | - 192
89 | - 384
90 | FUSE_METHOD: SUM
91 | DECONV:
92 | NUM_DECONVS: 1
93 | NUM_CHANNELS:
94 | - 48
95 | KERNEL_SIZE:
96 | - 4
97 | NUM_BASIC_BLOCKS: 4
98 | CAT_OUTPUT:
99 | - True
100 | INIT_WEIGHTS: True
101 | NAME: pose_higher_hrnet
102 | NUM_JOINTS: 14
103 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth'
104 | TAG_PER_JOINT: True
105 | TEST:
106 | FLIP_TEST: True
107 | IMAGES_PER_GPU: 1
108 | MODEL_FILE: ''
109 | SCALE_FACTOR: [1]
110 | DETECTION_THRESHOLD: 0.1
111 | WITH_HEATMAPS: (True, True)
112 | WITH_AE: (True, False)
113 | PROJECT2IMAGE: True
114 | NMS_KERNEL: 5
115 | NMS_PADDING: 2
116 | TRAIN:
117 | BEGIN_EPOCH: 0
118 | CHECKPOINT: ''
119 | END_EPOCH: 300
120 | GAMMA1: 0.99
121 | GAMMA2: 0.0
122 | IMAGES_PER_GPU: 10
123 | LR: 0.001
124 | LR_FACTOR: 0.1
125 | LR_STEP: [200, 260]
126 | MOMENTUM: 0.9
127 | NESTEROV: False
128 | OPTIMIZER: adam
129 | RESUME: False
130 | SHUFFLE: True
131 | WD: 0.0001
132 | WORKERS: 4
133 |
--------------------------------------------------------------------------------
/lib/config/__init__.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
5 | # ------------------------------------------------------------------------------
6 |
7 | from .default import _C as cfg
8 | from .default import update_config
9 | from .default import check_config
10 |
--------------------------------------------------------------------------------
/lib/config/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/config/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/config/__pycache__/default.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/config/__pycache__/default.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/config/__pycache__/models.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/config/__pycache__/models.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/config/default.py:
--------------------------------------------------------------------------------
1 |
2 | # ------------------------------------------------------------------------------
3 | # Copyright (c) Microsoft
4 | # Licensed under the MIT License.
5 | # Written by Bin Xiao (leoxiaobin@gmail.com)
6 | # Modified by Bowen Cheng (bcheng9@illinois.edu)
7 | # ------------------------------------------------------------------------------
8 |
9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 |
13 | import os
14 |
15 | from yacs.config import CfgNode as CN
16 |
17 | from .models import MODEL_EXTRAS
18 |
19 |
20 | _C = CN()
21 |
22 | _C.OUTPUT_DIR = ''
23 | _C.LOG_DIR = ''
24 | _C.DATA_DIR = ''
25 | _C.GPUS = (0,)
26 | _C.WORKERS = 4
27 | _C.PRINT_FREQ = 20
28 | _C.AUTO_RESUME = False
29 | _C.PIN_MEMORY = True
30 | _C.RANK = 0
31 | _C.VERBOSE = True
32 | _C.DIST_BACKEND = 'nccl'
33 | _C.MULTIPROCESSING_DISTRIBUTED = True
34 |
35 | # FP16 training params
36 | _C.FP16 = CN()
37 | _C.FP16.ENABLED = False
38 | _C.FP16.STATIC_LOSS_SCALE = 1.0
39 | _C.FP16.DYNAMIC_LOSS_SCALE = False
40 |
41 | # Cudnn related params
42 | _C.CUDNN = CN()
43 | _C.CUDNN.BENCHMARK = True
44 | _C.CUDNN.DETERMINISTIC = False
45 | _C.CUDNN.ENABLED = True
46 |
47 | # common params for NETWORK
48 | _C.MODEL = CN()
49 | _C.MODEL.NAME = 'pose_multi_resolution_net_v16'
50 | _C.MODEL.INIT_WEIGHTS = True
51 | _C.MODEL.SCALE_FACTOR = 0
52 | _C.MODEL.WIDTH_MULT = 1.0
53 | _C.MODEL.DEPTH_MULT = 1.0
54 | _C.MODEL.PRETRAINED = ''
55 | _C.MODEL.NUM_JOINTS = 17
56 | _C.MODEL.TAG_PER_JOINT = True
57 | _C.MODEL.EXTRA = CN(new_allowed=True)
58 | _C.MODEL.SYNC_BN = False
59 |
60 | _C.LOSS = CN()
61 | _C.LOSS.NUM_STAGES = 1
62 | _C.LOSS.WITH_HEATMAPS_LOSS = (True,)
63 | _C.LOSS.HEATMAPS_LOSS_FACTOR = (1.0,)
64 | _C.LOSS.WITH_AE_LOSS = (True,)
65 | _C.LOSS.AE_LOSS_TYPE = 'max'
66 | _C.LOSS.PUSH_LOSS_FACTOR = (0.001,)
67 | _C.LOSS.PULL_LOSS_FACTOR = (0.001,)
68 |
69 | # DATASET related params
70 | _C.DATASET = CN()
71 | _C.DATASET.ROOT = ''
72 | _C.DATASET.DATASET = 'coco_kpt'
73 | _C.DATASET.DATASET_TEST = 'coco'
74 | _C.DATASET.NUM_JOINTS = 17
75 | _C.DATASET.MAX_NUM_PEOPLE = 30
76 | _C.DATASET.TRAIN = 'train2017'
77 | _C.DATASET.TEST = 'val2017'
78 | _C.DATASET.DATA_FORMAT = 'jpg'
79 |
80 | # training data augmentation
81 | _C.DATASET.MAX_ROTATION = 30
82 | _C.DATASET.MIN_SCALE = 0.75
83 | _C.DATASET.MAX_SCALE = 1.25
84 | _C.DATASET.SCALE_TYPE = 'short'
85 | _C.DATASET.MAX_TRANSLATE = 40
86 | _C.DATASET.INPUT_SIZE = 512
87 | _C.DATASET.OUTPUT_SIZE = [128, 256, 512]
88 | _C.DATASET.FLIP = 0.5
89 |
90 | # heatmap generator (default is OUTPUT_SIZE/64)
91 | _C.DATASET.SIGMA = -1
92 | _C.DATASET.SCALE_AWARE_SIGMA = False
93 | _C.DATASET.BASE_SIZE = 256.0
94 | _C.DATASET.BASE_SIGMA = 2.0
95 | _C.DATASET.INT_SIGMA = False
96 |
97 | _C.DATASET.WITH_CENTER = False
98 |
99 | # train
100 | _C.TRAIN = CN()
101 |
102 | _C.TRAIN.LR_FACTOR = 0.1
103 | _C.TRAIN.LR_STEP = [90, 110]
104 | _C.TRAIN.LR = 0.001
105 |
106 | _C.TRAIN.OPTIMIZER = 'adam'
107 | _C.TRAIN.MOMENTUM = 0.9
108 | _C.TRAIN.WD = 0.0001
109 | _C.TRAIN.NESTEROV = False
110 | _C.TRAIN.GAMMA1 = 0.99
111 | _C.TRAIN.GAMMA2 = 0.0
112 |
113 | _C.TRAIN.BEGIN_EPOCH = 0
114 | _C.TRAIN.END_EPOCH = 140
115 |
116 | _C.TRAIN.RESUME = False
117 | _C.TRAIN.CHECKPOINT = ''
118 |
119 | _C.TRAIN.IMAGES_PER_GPU = 1
120 | _C.TRAIN.SHUFFLE = True
121 |
122 | # testing
123 | _C.TEST = CN()
124 |
125 | # size of images for each device
126 | # _C.TEST.BATCH_SIZE = 32
127 | _C.TEST.IMAGES_PER_GPU = 1
128 | # Test Model Epoch
129 | _C.TEST.FLIP_TEST = False
130 | _C.TEST.ADJUST = True
131 | _C.TEST.REFINE = True
132 | _C.TEST.SCALE_FACTOR = [1]
133 | # group
134 | _C.TEST.DETECTION_THRESHOLD = 0.2
135 | _C.TEST.TAG_THRESHOLD = 1.
136 | _C.TEST.USE_DETECTION_VAL = True
137 | _C.TEST.IGNORE_TOO_MUCH = False
138 | _C.TEST.MODEL_FILE = ''
139 | _C.TEST.IGNORE_CENTER = True
140 | _C.TEST.NMS_KERNEL = 3
141 | _C.TEST.NMS_PADDING = 1
142 | _C.TEST.PROJECT2IMAGE = False
143 |
144 | _C.TEST.WITH_HEATMAPS = (True,)
145 | _C.TEST.WITH_AE = (True,)
146 |
147 | _C.TEST.LOG_PROGRESS = False
148 |
149 | # debug
150 | _C.DEBUG = CN()
151 | _C.DEBUG.DEBUG = True
152 | _C.DEBUG.SAVE_BATCH_IMAGES_GT = False
153 | _C.DEBUG.SAVE_BATCH_IMAGES_PRED = False
154 | _C.DEBUG.SAVE_HEATMAPS_GT = True
155 | _C.DEBUG.SAVE_HEATMAPS_PRED = True
156 | _C.DEBUG.SAVE_TAGMAPS_PRED = True
157 |
158 |
159 | def update_config(cfg, args):
160 | cfg.defrost()
161 | cfg.merge_from_file(args.cfg)
162 | cfg.merge_from_list(args.opts)
163 |
164 | if not os.path.exists(cfg.DATASET.ROOT):
165 | cfg.DATASET.ROOT = os.path.join(
166 | cfg.DATA_DIR, cfg.DATASET.ROOT
167 | )
168 |
169 | cfg.MODEL.PRETRAINED = os.path.join(
170 | cfg.DATA_DIR, cfg.MODEL.PRETRAINED
171 | )
172 |
173 | if cfg.TEST.MODEL_FILE:
174 | cfg.TEST.MODEL_FILE = os.path.join(
175 | cfg.DATA_DIR, cfg.TEST.MODEL_FILE
176 | )
177 |
178 | if cfg.DATASET.WITH_CENTER:
179 | cfg.DATASET.NUM_JOINTS += 1
180 | cfg.MODEL.NUM_JOINTS = cfg.DATASET.NUM_JOINTS
181 |
182 | if not isinstance(cfg.DATASET.OUTPUT_SIZE, (list, tuple)):
183 | cfg.DATASET.OUTPUT_SIZE = [cfg.DATASET.OUTPUT_SIZE]
184 | if not isinstance(cfg.LOSS.WITH_HEATMAPS_LOSS, (list, tuple)):
185 | cfg.LOSS.WITH_HEATMAPS_LOSS = (cfg.LOSS.WITH_HEATMAPS_LOSS)
186 |
187 | if not isinstance(cfg.LOSS.HEATMAPS_LOSS_FACTOR, (list, tuple)):
188 | cfg.LOSS.HEATMAPS_LOSS_FACTOR = (cfg.LOSS.HEATMAPS_LOSS_FACTOR)
189 |
190 | if not isinstance(cfg.LOSS.WITH_AE_LOSS, (list, tuple)):
191 | cfg.LOSS.WITH_AE_LOSS = (cfg.LOSS.WITH_AE_LOSS)
192 |
193 | if not isinstance(cfg.LOSS.PUSH_LOSS_FACTOR, (list, tuple)):
194 | cfg.LOSS.PUSH_LOSS_FACTOR = (cfg.LOSS.PUSH_LOSS_FACTOR)
195 |
196 | if not isinstance(cfg.LOSS.PULL_LOSS_FACTOR, (list, tuple)):
197 | cfg.LOSS.PULL_LOSS_FACTOR = (cfg.LOSS.PULL_LOSS_FACTOR)
198 |
199 | cfg.freeze()
200 |
201 |
202 | def check_config(cfg):
203 | assert cfg.LOSS.NUM_STAGES == len(cfg.LOSS.WITH_HEATMAPS_LOSS), \
204 | 'LOSS.NUM_SCALE should be the same as the length of LOSS.WITH_HEATMAPS_LOSS'
205 | assert cfg.LOSS.NUM_STAGES == len(cfg.LOSS.HEATMAPS_LOSS_FACTOR), \
206 | 'LOSS.NUM_SCALE should be the same as the length of LOSS.HEATMAPS_LOSS_FACTOR'
207 | assert cfg.LOSS.NUM_STAGES == len(cfg.LOSS.WITH_AE_LOSS), \
208 | 'LOSS.NUM_SCALE should be the same as the length of LOSS.WITH_AE_LOSS'
209 | assert cfg.LOSS.NUM_STAGES == len(cfg.LOSS.PUSH_LOSS_FACTOR), \
210 | 'LOSS.NUM_SCALE should be the same as the length of LOSS.PUSH_LOSS_FACTOR'
211 | assert cfg.LOSS.NUM_STAGES == len(cfg.LOSS.PULL_LOSS_FACTOR), \
212 | 'LOSS.NUM_SCALE should be the same as the length of LOSS.PULL_LOSS_FACTOR'
213 | assert cfg.LOSS.NUM_STAGES == len(cfg.TEST.WITH_HEATMAPS), \
214 | 'LOSS.NUM_SCALE should be the same as the length of TEST.WITH_HEATMAPS'
215 | assert cfg.LOSS.NUM_STAGES == len(cfg.TEST.WITH_AE), \
216 | 'LOSS.NUM_SCALE should be the same as the length of TEST.WITH_AE'
217 |
218 |
219 | if __name__ == '__main__':
220 | import sys
221 | with open(sys.argv[1], 'w') as f:
222 | print(_C, file=f)
223 |
--------------------------------------------------------------------------------
/lib/config/models.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
5 | # ------------------------------------------------------------------------------
6 |
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 |
11 | from yacs.config import CfgNode as CN
12 |
13 |
14 | # pose_multi_resoluton_net related params
15 | POSE_HIGHER_RESOLUTION_NET = CN()
16 | POSE_HIGHER_RESOLUTION_NET.PRETRAINED_LAYERS = ['*']
17 | POSE_HIGHER_RESOLUTION_NET.STEM_INPLANES = 64
18 | POSE_HIGHER_RESOLUTION_NET.FINAL_CONV_KERNEL = 1
19 |
20 | POSE_HIGHER_RESOLUTION_NET.STAGE1 = CN()
21 | POSE_HIGHER_RESOLUTION_NET.STAGE1.NUM_MODULES = 1
22 | POSE_HIGHER_RESOLUTION_NET.STAGE1.NUM_BRANCHES = 1
23 | POSE_HIGHER_RESOLUTION_NET.STAGE1.NUM_BLOCKS = [4]
24 | POSE_HIGHER_RESOLUTION_NET.STAGE1.NUM_CHANNELS = [64]
25 | POSE_HIGHER_RESOLUTION_NET.STAGE1.BLOCK = 'BOTTLENECK'
26 | POSE_HIGHER_RESOLUTION_NET.STAGE1.FUSE_METHOD = 'SUM'
27 |
28 | POSE_HIGHER_RESOLUTION_NET.STAGE2 = CN()
29 | POSE_HIGHER_RESOLUTION_NET.STAGE2.NUM_MODULES = 1
30 | POSE_HIGHER_RESOLUTION_NET.STAGE2.NUM_BRANCHES = 2
31 | POSE_HIGHER_RESOLUTION_NET.STAGE2.NUM_BLOCKS = [4, 4]
32 | POSE_HIGHER_RESOLUTION_NET.STAGE2.NUM_CHANNELS = [24, 48]
33 | POSE_HIGHER_RESOLUTION_NET.STAGE2.BLOCK = 'BOTTLENECK'
34 | POSE_HIGHER_RESOLUTION_NET.STAGE2.FUSE_METHOD = 'SUM'
35 |
36 | POSE_HIGHER_RESOLUTION_NET.STAGE3 = CN()
37 | POSE_HIGHER_RESOLUTION_NET.STAGE3.NUM_MODULES = 1
38 | POSE_HIGHER_RESOLUTION_NET.STAGE3.NUM_BRANCHES = 3
39 | POSE_HIGHER_RESOLUTION_NET.STAGE3.NUM_BLOCKS = [4, 4, 4]
40 | POSE_HIGHER_RESOLUTION_NET.STAGE3.NUM_CHANNELS = [24, 48, 92]
41 | POSE_HIGHER_RESOLUTION_NET.STAGE3.BLOCK = 'BOTTLENECK'
42 | POSE_HIGHER_RESOLUTION_NET.STAGE3.FUSE_METHOD = 'SUM'
43 |
44 | POSE_HIGHER_RESOLUTION_NET.STAGE4 = CN()
45 | POSE_HIGHER_RESOLUTION_NET.STAGE4.NUM_MODULES = 1
46 | POSE_HIGHER_RESOLUTION_NET.STAGE4.NUM_BRANCHES = 4
47 | POSE_HIGHER_RESOLUTION_NET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
48 | POSE_HIGHER_RESOLUTION_NET.STAGE4.NUM_CHANNELS = [24, 48, 92, 192]
49 | POSE_HIGHER_RESOLUTION_NET.STAGE4.BLOCK = 'BOTTLENECK'
50 | POSE_HIGHER_RESOLUTION_NET.STAGE4.FUSE_METHOD = 'SUM'
51 |
52 | POSE_HIGHER_RESOLUTION_NET.DECONV = CN()
53 | POSE_HIGHER_RESOLUTION_NET.DECONV.NUM_DCONVS = 2
54 | POSE_HIGHER_RESOLUTION_NET.DECONV.NUM_CHANNELS = [32, 32]
55 | POSE_HIGHER_RESOLUTION_NET.DECONV.NUM_BASIC_BLOCKS = 4
56 | POSE_HIGHER_RESOLUTION_NET.DECONV.KERNEL_SIZE = [2, 2]
57 | POSE_HIGHER_RESOLUTION_NET.DECONV.CAT_OUTPUT = [True, True]
58 |
59 |
60 | MODEL_EXTRAS = {
61 | 'pose_multi_resolution_net_v16': POSE_HIGHER_RESOLUTION_NET,
62 | }
63 |
--------------------------------------------------------------------------------
/lib/core/__pycache__/group.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/core/__pycache__/group.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/core/__pycache__/inference.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/core/__pycache__/inference.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/core/__pycache__/loss.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/core/__pycache__/loss.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/core/__pycache__/trainer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/core/__pycache__/trainer.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/core/group.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Some code is from https://github.com/princeton-vl/pose-ae-train/blob/454d4ba113bbb9775d4dc259ef5e6c07c2ceed54/utils/group.py
5 | # Written by Bin Xiao (leoxiaobin@gmail.com)
6 | # Modified by Bowen Cheng (bcheng9@illinois.edu)
7 | # ------------------------------------------------------------------------------
8 |
9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 |
13 | from munkres import Munkres
14 | import numpy as np
15 | import torch
16 |
17 |
18 | def py_max_match(scores):
19 | m = Munkres()
20 | tmp = m.compute(scores)
21 | tmp = np.array(tmp).astype(np.int32)
22 | return tmp
23 |
24 |
25 | def match_by_tag(inp, params):
26 | assert isinstance(params, Params), 'params should be class Params()'
27 |
28 | tag_k, loc_k, val_k = inp
29 | default_ = np.zeros((params.num_joints, 3 + tag_k.shape[2]))
30 |
31 | joint_dict = {}
32 | tag_dict = {}
33 | for i in range(params.num_joints):
34 | idx = params.joint_order[i]
35 |
36 | tags = tag_k[idx]
37 | joints = np.concatenate(
38 | (loc_k[idx], val_k[idx, :, None], tags), 1
39 | )
40 | mask = joints[:, 2] > params.detection_threshold
41 | tags = tags[mask]
42 | joints = joints[mask]
43 |
44 | if joints.shape[0] == 0:
45 | continue
46 |
47 | if i == 0 or len(joint_dict) == 0:
48 | for tag, joint in zip(tags, joints):
49 | key = tag[0]
50 | joint_dict.setdefault(key, np.copy(default_))[idx] = joint
51 | tag_dict[key] = [tag]
52 | else:
53 | grouped_keys = list(joint_dict.keys())[:params.max_num_people]
54 | grouped_tags = [np.mean(tag_dict[i], axis=0) for i in grouped_keys]
55 |
56 | if params.ignore_too_much \
57 | and len(grouped_keys) == params.max_num_people:
58 | continue
59 |
60 | diff = joints[:, None, 3:] - np.array(grouped_tags)[None, :, :]
61 | diff_normed = np.linalg.norm(diff, ord=2, axis=2)
62 | diff_saved = np.copy(diff_normed)
63 |
64 | if params.use_detection_val:
65 | diff_normed = np.round(diff_normed) * 100 - joints[:, 2:3]
66 |
67 | num_added = diff.shape[0]
68 | num_grouped = diff.shape[1]
69 |
70 | if num_added > num_grouped:
71 | diff_normed = np.concatenate(
72 | (
73 | diff_normed,
74 | np.zeros((num_added, num_added-num_grouped))+1e10
75 | ),
76 | axis=1
77 | )
78 |
79 | pairs = py_max_match(diff_normed)
80 | for row, col in pairs:
81 | if (
82 | row < num_added
83 | and col < num_grouped
84 | and diff_saved[row][col] < params.tag_threshold
85 | ):
86 | key = grouped_keys[col]
87 | joint_dict[key][idx] = joints[row]
88 | tag_dict[key].append(tags[row])
89 | else:
90 | key = tags[row][0]
91 | joint_dict.setdefault(key, np.copy(default_))[idx] = \
92 | joints[row]
93 | tag_dict[key] = [tags[row]]
94 |
95 | ans = np.array([joint_dict[i] for i in joint_dict]).astype(np.float32)
96 | return ans
97 |
98 |
99 | class Params(object):
100 | def __init__(self, cfg):
101 | self.num_joints = cfg.DATASET.NUM_JOINTS
102 | self.max_num_people = cfg.DATASET.MAX_NUM_PEOPLE
103 |
104 | self.detection_threshold = cfg.TEST.DETECTION_THRESHOLD
105 | self.tag_threshold = cfg.TEST.TAG_THRESHOLD
106 | self.use_detection_val = cfg.TEST.USE_DETECTION_VAL
107 | self.ignore_too_much = cfg.TEST.IGNORE_TOO_MUCH
108 |
109 | if cfg.DATASET.WITH_CENTER and cfg.TEST.IGNORE_CENTER:
110 | self.num_joints -= 1
111 |
112 | if cfg.DATASET.WITH_CENTER and not cfg.TEST.IGNORE_CENTER:
113 | self.joint_order = [
114 | i-1 for i in [18, 1, 2, 3, 4, 5, 6, 7, 12, 13, 8, 9, 10, 11, 14, 15, 16, 17]
115 | ]
116 | else:
117 | self.joint_order = [
118 | i-1 for i in [1, 2, 3, 4, 5, 6, 7, 12, 13, 8, 9, 10, 11, 14, 15, 16, 17]
119 | ]
120 |
121 |
122 | class HeatmapParser(object):
123 | def __init__(self, cfg):
124 | self.params = Params(cfg)
125 | self.tag_per_joint = cfg.MODEL.TAG_PER_JOINT
126 | self.pool = torch.nn.MaxPool2d(
127 | cfg.TEST.NMS_KERNEL, 1, cfg.TEST.NMS_PADDING
128 | )
129 |
130 | def nms(self, det):
131 | maxm = self.pool(det)
132 | maxm = torch.eq(maxm, det).float()
133 | det = det * maxm
134 | return det
135 |
136 | def match(self, tag_k, loc_k, val_k):
137 | match = lambda x: match_by_tag(x, self.params)
138 | return list(map(match, zip(tag_k, loc_k, val_k)))
139 |
140 | def top_k(self, det, tag):
141 | # det = torch.Tensor(det, requires_grad=False)
142 | # tag = torch.Tensor(tag, requires_grad=False)
143 |
144 | det = self.nms(det)
145 | num_images = det.size(0)
146 | num_joints = det.size(1)
147 | h = det.size(2)
148 | w = det.size(3)
149 | det = det.view(num_images, num_joints, -1)
150 | val_k, ind = det.topk(self.params.max_num_people, dim=2)
151 |
152 | tag = tag.view(tag.size(0), tag.size(1), w*h, -1)
153 | if not self.tag_per_joint:
154 | tag = tag.expand(-1, self.params.num_joints, -1, -1)
155 |
156 | tag_k = torch.stack(
157 | [
158 | torch.gather(tag[:, :, :, i], 2, ind)
159 | for i in range(tag.size(3))
160 | ],
161 | dim=3
162 | )
163 |
164 | x = ind % w
165 | y = (ind // w).long()
166 |
167 | ind_k = torch.stack((x, y), dim=3)
168 |
169 | ans = {
170 | 'tag_k': tag_k.cpu().numpy(),
171 | 'loc_k': ind_k.cpu().numpy(),
172 | 'val_k': val_k.cpu().numpy()
173 | }
174 |
175 | return ans
176 |
177 | def adjust(self, ans, det):
178 | for batch_id, people in enumerate(ans):
179 | for people_id, i in enumerate(people):
180 | for joint_id, joint in enumerate(i):
181 | if joint[2] > 0:
182 | y, x = joint[0:2]
183 | xx, yy = int(x), int(y)
184 | #print(batch_id, joint_id, det[batch_id].shape)
185 | tmp = det[batch_id][joint_id]
186 | if tmp[xx, min(yy+1, tmp.shape[1]-1)] > tmp[xx, max(yy-1, 0)]:
187 | y += 0.25
188 | else:
189 | y -= 0.25
190 |
191 | if tmp[min(xx+1, tmp.shape[0]-1), yy] > tmp[max(0, xx-1), yy]:
192 | x += 0.25
193 | else:
194 | x -= 0.25
195 | ans[batch_id][people_id, joint_id, 0:2] = (y+0.5, x+0.5)
196 | return ans
197 |
198 | def refine(self, det, tag, keypoints):
199 | """
200 | Given initial keypoint predictions, we identify missing joints
201 | :param det: numpy.ndarray of size (17, 128, 128)
202 | :param tag: numpy.ndarray of size (17, 128, 128) if not flip
203 | :param keypoints: numpy.ndarray of size (17, 4) if not flip, last dim is (x, y, det score, tag score)
204 | :return:
205 | """
206 | if len(tag.shape) == 3:
207 | # tag shape: (17, 128, 128, 1)
208 | tag = tag[:, :, :, None]
209 |
210 | tags = []
211 | for i in range(keypoints.shape[0]):
212 | if keypoints[i, 2] > 0:
213 | # save tag value of detected keypoint
214 | x, y = keypoints[i][:2].astype(np.int32)
215 | tags.append(tag[i, y, x])
216 |
217 | # mean tag of current detected people
218 | prev_tag = np.mean(tags, axis=0)
219 | ans = []
220 |
221 | for i in range(keypoints.shape[0]):
222 | # score of joints i at all position
223 | tmp = det[i, :, :]
224 | # distance of all tag values with mean tag of current detected people
225 | tt = (((tag[i, :, :] - prev_tag[None, None, :]) ** 2).sum(axis=2) ** 0.5)
226 | tmp2 = tmp - np.round(tt)
227 |
228 | # find maximum position
229 | y, x = np.unravel_index(np.argmax(tmp2), tmp.shape)
230 | xx = x
231 | yy = y
232 | # detection score at maximum position
233 | val = tmp[y, x]
234 | # offset by 0.5
235 | x += 0.5
236 | y += 0.5
237 |
238 | # add a quarter offset
239 | if tmp[yy, min(xx + 1, tmp.shape[1] - 1)] > tmp[yy, max(xx - 1, 0)]:
240 | x += 0.25
241 | else:
242 | x -= 0.25
243 |
244 | if tmp[min(yy + 1, tmp.shape[0] - 1), xx] > tmp[max(0, yy - 1), xx]:
245 | y += 0.25
246 | else:
247 | y -= 0.25
248 |
249 | ans.append((x, y, val))
250 | ans = np.array(ans)
251 |
252 | if ans is not None:
253 | for i in range(det.shape[0]):
254 | # add keypoint if it is not detected
255 | if ans[i, 2] > 0 and keypoints[i, 2] == 0:
256 | # if ans[i, 2] > 0.01 and keypoints[i, 2] == 0:
257 | keypoints[i, :2] = ans[i, :2]
258 | keypoints[i, 2] = ans[i, 2]
259 |
260 | return keypoints
261 |
262 | def parse(self, det, tag, adjust=True, refine=True):
263 | ans = self.match(**self.top_k(det, tag))
264 |
265 | if adjust:
266 | ans = self.adjust(ans, det)
267 |
268 | scores = [i[:, 2].mean() for i in ans[0]]
269 |
270 | if refine:
271 | ans = ans[0]
272 | # for every detected person
273 | for i in range(len(ans)):
274 | det_numpy = det[0].cpu().numpy()
275 | tag_numpy = tag[0].cpu().numpy()
276 | if not self.tag_per_joint:
277 | tag_numpy = np.tile(
278 | tag_numpy, (self.params.num_joints, 1, 1, 1)
279 | )
280 | ans[i] = self.refine(det_numpy, tag_numpy, ans[i])
281 | ans = [ans]
282 |
283 | return ans, scores
284 |
--------------------------------------------------------------------------------
/lib/core/inference.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
5 | # Modified by Bowen Cheng (bcheng9@illinois.edu)
6 | # ------------------------------------------------------------------------------
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 |
12 |
13 | import torch
14 |
15 | from dataset.transforms import FLIP_CONFIG
16 |
17 |
18 | def get_outputs(
19 | cfg, model, image, with_flip=False,
20 | project2image=False, size_projected=None
21 | ):
22 | outputs = []
23 | heatmaps = []
24 | tags = []
25 |
26 | outputs.append(model(image))
27 | heatmaps.append(outputs[-1][:, :cfg.DATASET.NUM_JOINTS])
28 | tags.append(outputs[-1][:, cfg.DATASET.NUM_JOINTS:])
29 |
30 | if with_flip:
31 | outputs.append(model(torch.flip(image, [3])))
32 | outputs[-1] = torch.flip(outputs[-1], [3])
33 | heatmaps.append(outputs[-1][:, :cfg.DATASET.NUM_JOINTS])
34 | tags.append(outputs[-1][:, cfg.DATASET.NUM_JOINTS:])
35 | if 'coco' in cfg.DATASET.DATASET:
36 | dataset_name = 'COCO'
37 | elif 'crowd_pose' in cfg.DATASET.DATASET:
38 | dataset_name = 'CROWDPOSE'
39 | else:
40 | raise ValueError('Please implement flip_index for new dataset: %s.' % cfg.DATASET.DATASET)
41 | flip_index = FLIP_CONFIG[dataset_name + '_WITH_CENTER'] \
42 | if cfg.DATASET.WITH_CENTER else FLIP_CONFIG[dataset_name]
43 | heatmaps[-1] = heatmaps[-1][:, flip_index, :, :]
44 | if cfg.MODEL.TAG_PER_JOINT:
45 | tags[-1] = tags[-1][:, flip_index, :, :]
46 |
47 | if cfg.DATASET.WITH_CENTER and cfg.TEST.IGNORE_CENTER:
48 | heatmaps = [hms[:, :-1] for hms in heatmaps]
49 | tags = [tms[:, :-1] for tms in tags]
50 |
51 | if project2image and size_projected:
52 | heatmaps = [
53 | torch.nn.functional.interpolate(
54 | hms,
55 | size=(size_projected[1], size_projected[0]),
56 | mode='bilinear',
57 | align_corners=False
58 | )
59 | for hms in heatmaps
60 | ]
61 |
62 | tags = [
63 | torch.nn.functional.interpolate(
64 | tms,
65 | size=(size_projected[1], size_projected[0]),
66 | mode='bilinear',
67 | align_corners=False
68 | )
69 | for tms in tags
70 | ]
71 |
72 | return outputs, heatmaps, tags
73 |
74 |
75 | def get_multi_stage_outputs(
76 | cfg, model, image, with_flip=False,
77 | project2image=False, size_projected=None
78 | ):
79 | # outputs = []
80 | heatmaps_avg = 0
81 | num_heatmaps = 0
82 | heatmaps = []
83 | tags = []
84 |
85 | outputs = model(image)
86 | for i, output in enumerate(outputs):
87 | if len(outputs) > 1 and i != len(outputs) - 1:
88 | output = torch.nn.functional.interpolate(
89 | output,
90 | size=(outputs[-1].size(2), outputs[-1].size(3)),
91 | mode='bilinear',
92 | align_corners=False
93 | )
94 |
95 | offset_feat = cfg.DATASET.NUM_JOINTS \
96 | if cfg.LOSS.WITH_HEATMAPS_LOSS[i] else 0
97 |
98 | if cfg.LOSS.WITH_HEATMAPS_LOSS[i] and cfg.TEST.WITH_HEATMAPS[i]:
99 | heatmaps_avg += output[:, :cfg.DATASET.NUM_JOINTS]
100 | num_heatmaps += 1
101 |
102 | if cfg.LOSS.WITH_AE_LOSS[i] and cfg.TEST.WITH_AE[i]:
103 | tags.append(output[:, offset_feat:])
104 |
105 | if num_heatmaps > 0:
106 | heatmaps.append(heatmaps_avg/num_heatmaps)
107 |
108 | if with_flip:
109 | if 'coco' in cfg.DATASET.DATASET:
110 | dataset_name = 'COCO'
111 | elif 'crowd_pose' in cfg.DATASET.DATASET:
112 | dataset_name = 'CROWDPOSE'
113 | else:
114 | raise ValueError('Please implement flip_index for new dataset: %s.' % cfg.DATASET.DATASET)
115 | flip_index = FLIP_CONFIG[dataset_name + '_WITH_CENTER'] \
116 | if cfg.DATASET.WITH_CENTER else FLIP_CONFIG[dataset_name]
117 |
118 | heatmaps_avg = 0
119 | num_heatmaps = 0
120 | outputs_flip = model(torch.flip(image, [3]))
121 | for i in range(len(outputs_flip)):
122 | output = outputs_flip[i]
123 | if len(outputs_flip) > 1 and i != len(outputs_flip) - 1:
124 | output = torch.nn.functional.interpolate(
125 | output,
126 | size=(outputs_flip[-1].size(2), outputs_flip[-1].size(3)),
127 | mode='bilinear',
128 | align_corners=False
129 | )
130 | output = torch.flip(output, [3])
131 | outputs.append(output)
132 |
133 | offset_feat = cfg.DATASET.NUM_JOINTS \
134 | if cfg.LOSS.WITH_HEATMAPS_LOSS[i] else 0
135 |
136 | if cfg.LOSS.WITH_HEATMAPS_LOSS[i] and cfg.TEST.WITH_HEATMAPS[i]:
137 | heatmaps_avg += \
138 | output[:, :cfg.DATASET.NUM_JOINTS][:, flip_index, :, :]
139 | num_heatmaps += 1
140 |
141 | if cfg.LOSS.WITH_AE_LOSS[i] and cfg.TEST.WITH_AE[i]:
142 | tags.append(output[:, offset_feat:])
143 | if cfg.MODEL.TAG_PER_JOINT:
144 | tags[-1] = tags[-1][:, flip_index, :, :]
145 |
146 | heatmaps.append(heatmaps_avg/num_heatmaps)
147 |
148 | if cfg.DATASET.WITH_CENTER and cfg.TEST.IGNORE_CENTER:
149 | heatmaps = [hms[:, :-1] for hms in heatmaps]
150 | tags = [tms[:, :-1] for tms in tags]
151 |
152 | if project2image and size_projected:
153 | heatmaps = [
154 | torch.nn.functional.interpolate(
155 | hms,
156 | size=(size_projected[1], size_projected[0]),
157 | mode='bilinear',
158 | align_corners=False
159 | )
160 | for hms in heatmaps
161 | ]
162 |
163 | tags = [
164 | torch.nn.functional.interpolate(
165 | tms,
166 | size=(size_projected[1], size_projected[0]),
167 | mode='bilinear',
168 | align_corners=False
169 | )
170 | for tms in tags
171 | ]
172 |
173 | return outputs, heatmaps, tags
174 |
175 |
176 | def aggregate_results(
177 | cfg, scale_factor, final_heatmaps, tags_list, heatmaps, tags
178 | ):
179 | if scale_factor == 1 or len(cfg.TEST.SCALE_FACTOR) == 1:
180 | if final_heatmaps is not None and not cfg.TEST.PROJECT2IMAGE:
181 | tags = [
182 | torch.nn.functional.interpolate(
183 | tms,
184 | size=(final_heatmaps.size(2), final_heatmaps.size(3)),
185 | mode='bilinear',
186 | align_corners=False
187 | )
188 | for tms in tags
189 | ]
190 | for tms in tags:
191 | tags_list.append(torch.unsqueeze(tms, dim=4))
192 |
193 | heatmaps_avg = (heatmaps[0] + heatmaps[1])/2.0 if cfg.TEST.FLIP_TEST \
194 | else heatmaps[0]
195 |
196 | if final_heatmaps is None:
197 | final_heatmaps = heatmaps_avg
198 | elif cfg.TEST.PROJECT2IMAGE:
199 | final_heatmaps += heatmaps_avg
200 | else:
201 | final_heatmaps += torch.nn.functional.interpolate(
202 | heatmaps_avg,
203 | size=(final_heatmaps.size(2), final_heatmaps.size(3)),
204 | mode='bilinear',
205 | align_corners=False
206 | )
207 |
208 | return final_heatmaps, tags_list
209 |
--------------------------------------------------------------------------------
/lib/core/trainer.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
5 | # Modified by Bowen Cheng (bcheng9@illinois.edu)
6 | # ------------------------------------------------------------------------------
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 |
12 | import logging
13 | import os
14 | import time
15 |
16 | from utils.utils import AverageMeter
17 | from utils.vis import save_debug_images
18 | from ptflops import get_model_complexity_info
19 |
20 |
21 | def do_train(cfg, model, data_loader, loss_factory, optimizer, epoch,
22 | output_dir, tb_log_dir, writer_dict, fp16=False):
23 | logger = logging.getLogger("Training")
24 |
25 | batch_time = AverageMeter()
26 | data_time = AverageMeter()
27 |
28 | heatmaps_loss_meter = [AverageMeter() for _ in range(cfg.LOSS.NUM_STAGES)]
29 | push_loss_meter = [AverageMeter() for _ in range(cfg.LOSS.NUM_STAGES)]
30 | pull_loss_meter = [AverageMeter() for _ in range(cfg.LOSS.NUM_STAGES)]
31 |
32 | # switch to train mode
33 | model.train()
34 | #flops, params = get_model_complexity_info(model.module.features, (3,384,384), as_strings=False)
35 | #print("FLops", flops)
36 | #print("Params", params)
37 |
38 |
39 | for layer in model.module.features:
40 | for param in layer.parameters():
41 | param.requires_grad = False
42 |
43 | end = time.time()
44 | for i, (images, heatmaps, masks, joints) in enumerate(data_loader):
45 | # measure data loading time
46 | data_time.update(time.time() - end)
47 |
48 | # compute output
49 | outputs = model(images)
50 |
51 | heatmaps = list(map(lambda x: x.cuda(non_blocking=True), heatmaps))
52 | masks = list(map(lambda x: x.cuda(non_blocking=True), masks))
53 | joints = list(map(lambda x: x.cuda(non_blocking=True), joints))
54 |
55 | # loss = loss_factory(outputs, heatmaps, masks)
56 | heatmaps_losses, push_losses, pull_losses = \
57 | loss_factory(outputs, heatmaps, masks, joints)
58 |
59 | loss = 0
60 | for idx in range(cfg.LOSS.NUM_STAGES):
61 | if heatmaps_losses[idx] is not None:
62 | heatmaps_loss = heatmaps_losses[idx].mean(dim=0)
63 | heatmaps_loss_meter[idx].update(
64 | heatmaps_loss.item(), images.size(0)
65 | )
66 | loss = loss + heatmaps_loss
67 | if push_losses[idx] is not None:
68 | push_loss = push_losses[idx].mean(dim=0)
69 | push_loss_meter[idx].update(
70 | push_loss.item(), images.size(0)
71 | )
72 | loss = loss + push_loss
73 | if pull_losses[idx] is not None:
74 | pull_loss = pull_losses[idx].mean(dim=0)
75 | pull_loss_meter[idx].update(
76 | pull_loss.item(), images.size(0)
77 | )
78 | loss = loss + pull_loss
79 |
80 | # compute gradient and do update step
81 | optimizer.zero_grad()
82 | if fp16:
83 | optimizer.backward(loss)
84 | else:
85 | loss.backward()
86 | optimizer.step()
87 |
88 | # measure elapsed time
89 | batch_time.update(time.time() - end)
90 | end = time.time()
91 |
92 | if i % cfg.PRINT_FREQ == 0 and cfg.RANK == 0:
93 | msg = 'Epoch: [{0}][{1}/{2}]\t' \
94 | 'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \
95 | 'Speed: {speed:.1f} samples/s\t' \
96 | 'Data: {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \
97 | '{heatmaps_loss}{push_loss}{pull_loss}'.format(
98 | epoch, i, len(data_loader),
99 | batch_time=batch_time,
100 | speed=images.size(0)/batch_time.val,
101 | data_time=data_time,
102 | heatmaps_loss=_get_loss_info(heatmaps_loss_meter, 'heatmaps'),
103 | push_loss=_get_loss_info(push_loss_meter, 'push'),
104 | pull_loss=_get_loss_info(pull_loss_meter, 'pull')
105 | )
106 | logger.info(msg)
107 |
108 | writer = writer_dict['writer']
109 | global_steps = writer_dict['train_global_steps']
110 | for idx in range(cfg.LOSS.NUM_STAGES):
111 | writer.add_scalar(
112 | 'train_stage{}_heatmaps_loss'.format(i),
113 | heatmaps_loss_meter[idx].val,
114 | global_steps
115 | )
116 | writer.add_scalar(
117 | 'train_stage{}_push_loss'.format(idx),
118 | push_loss_meter[idx].val,
119 | global_steps
120 | )
121 | writer.add_scalar(
122 | 'train_stage{}_pull_loss'.format(idx),
123 | pull_loss_meter[idx].val,
124 | global_steps
125 | )
126 | writer_dict['train_global_steps'] = global_steps + 1
127 |
128 | prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i)
129 | for scale_idx in range(len(outputs)):
130 | prefix_scale = prefix + '_output_{}'.format(
131 | cfg.DATASET.OUTPUT_SIZE[scale_idx]
132 | )
133 | save_debug_images(
134 | cfg, images, heatmaps[scale_idx], masks[scale_idx],
135 | outputs[scale_idx], prefix_scale
136 | )
137 |
138 |
139 | def _get_loss_info(loss_meters, loss_name):
140 | msg = ''
141 | for i, meter in enumerate(loss_meters):
142 | msg += 'Stage{i}-{name}: {meter.val:.3e} ({meter.avg:.3e})\t'.format(
143 | i=i, name=loss_name, meter=meter
144 | )
145 |
146 | return msg
147 |
--------------------------------------------------------------------------------
/lib/dataset/COCODataset.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
5 | # Modified by Bowen Cheng (bcheng9@illinois.edu)
6 | # ------------------------------------------------------------------------------
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 |
12 | from collections import defaultdict
13 | from collections import OrderedDict
14 | import logging
15 | import os
16 | import os.path
17 |
18 | import cv2
19 | import json_tricks as json
20 | import numpy as np
21 | from torch.utils.data import Dataset
22 |
23 | from pycocotools.cocoeval import COCOeval
24 | from utils import zipreader
25 |
26 | logger = logging.getLogger(__name__)
27 |
28 |
29 | class CocoDataset(Dataset):
30 | """`MS Coco Detection `_ Dataset.
31 |
32 | Args:
33 | root (string): Root directory where dataset is located to.
34 | dataset (string): Dataset name(train2017, val2017, test2017).
35 | data_format(string): Data format for reading('jpg', 'zip')
36 | transform (callable, optional): A function/transform that takes in an opencv image
37 | and returns a transformed version. E.g, ``transforms.ToTensor``
38 | target_transform (callable, optional): A function/transform that takes in the
39 | target and transforms it.
40 | """
41 |
42 | def __init__(self, root, dataset, data_format, transform=None,
43 | target_transform=None):
44 | from pycocotools.coco import COCO
45 | self.name = 'COCO'
46 | self.root = root
47 | self.dataset = dataset
48 | self.data_format = data_format
49 | self.coco = COCO(self._get_anno_file_name())
50 | self.ids = list(self.coco.imgs.keys())
51 | self.transform = transform
52 | self.target_transform = target_transform
53 |
54 | cats = [cat['name']
55 | for cat in self.coco.loadCats(self.coco.getCatIds())]
56 | self.classes = ['__background__'] + cats
57 | logger.info('=> classes: {}'.format(self.classes))
58 | self.num_classes = len(self.classes)
59 | self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
60 | self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds()))
61 | self._coco_ind_to_class_ind = dict(
62 | [
63 | (self._class_to_coco_ind[cls], self._class_to_ind[cls])
64 | for cls in self.classes[1:]
65 | ]
66 | )
67 |
68 | def _get_anno_file_name(self):
69 | # example: root/annotations/person_keypoints_tran2017.json
70 | # image_info_test-dev2017.json
71 | if 'test' in self.dataset:
72 | return os.path.join(
73 | self.root,
74 | 'annotations',
75 | 'image_info_{}.json'.format(
76 | self.dataset
77 | )
78 | )
79 | else:
80 | return os.path.join(
81 | self.root,
82 | 'annotations',
83 | 'person_keypoints_{}.json'.format(
84 | self.dataset
85 | )
86 | )
87 |
88 | def _get_image_path(self, file_name):
89 | images_dir = os.path.join(self.root, 'images')
90 | dataset = 'test2017' if 'test' in self.dataset else self.dataset
91 | if self.data_format == 'zip':
92 | return os.path.join(images_dir, dataset) + '.zip@' + file_name
93 | else:
94 | return os.path.join(images_dir, dataset, file_name)
95 |
96 | def __getitem__(self, index):
97 | """
98 | Args:
99 | index (int): Index
100 |
101 | Returns:
102 | tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.
103 | """
104 | coco = self.coco
105 | img_id = self.ids[index]
106 | ann_ids = coco.getAnnIds(imgIds=img_id)
107 | target = coco.loadAnns(ann_ids)
108 |
109 | file_name = coco.loadImgs(img_id)[0]['file_name']
110 |
111 | if self.data_format == 'zip':
112 | img = zipreader.imread(
113 | self._get_image_path(file_name),
114 | cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION
115 | )
116 | else:
117 | img = cv2.imread(
118 | self._get_image_path(file_name),
119 | cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION
120 | )
121 |
122 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
123 |
124 | if self.transform is not None:
125 | img = self.transform(img)
126 |
127 | if self.target_transform is not None:
128 | target = self.target_transform(target)
129 |
130 | return img, target
131 |
132 | def __len__(self):
133 | return len(self.ids)
134 |
135 | def __repr__(self):
136 | fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
137 | fmt_str += ' Number of datapoints: {}\n'.format(self.__len__())
138 | fmt_str += ' Root Location: {}\n'.format(self.root)
139 | tmp = ' Transforms (if any): '
140 | fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
141 | tmp = ' Target Transforms (if any): '
142 | fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
143 | return fmt_str
144 |
145 | def processKeypoints(self, keypoints):
146 | tmp = keypoints.copy()
147 | if keypoints[:, 2].max() > 0:
148 | p = keypoints[keypoints[:, 2] > 0][:, :2].mean(axis=0)
149 | num_keypoints = keypoints.shape[0]
150 | for i in range(num_keypoints):
151 | tmp[i][0:3] = [
152 | float(keypoints[i][0]),
153 | float(keypoints[i][1]),
154 | float(keypoints[i][2])
155 | ]
156 |
157 | return tmp
158 |
159 | def evaluate(self, cfg, preds, scores, output_dir,
160 | *args, **kwargs):
161 | '''
162 | Perform evaluation on COCO keypoint task
163 | :param cfg: cfg dictionary
164 | :param preds: prediction
165 | :param output_dir: output directory
166 | :param args:
167 | :param kwargs:
168 | :return:
169 | '''
170 | res_folder = os.path.join(output_dir, 'results')
171 | if not os.path.exists(res_folder):
172 | os.makedirs(res_folder)
173 | res_file = os.path.join(
174 | res_folder, 'keypoints_%s_results.json' % self.dataset)
175 |
176 | # preds is a list of: image x person x (keypoints)
177 | # keypoints: num_joints * 4 (x, y, score, tag)
178 | kpts = defaultdict(list)
179 | for idx, _kpts in enumerate(preds):
180 | img_id = self.ids[idx]
181 | file_name = self.coco.loadImgs(img_id)[0]['file_name']
182 | for idx_kpt, kpt in enumerate(_kpts):
183 | area = (np.max(kpt[:, 0]) - np.min(kpt[:, 0])) * (np.max(kpt[:, 1]) - np.min(kpt[:, 1]))
184 | kpt = self.processKeypoints(kpt)
185 | # if self.with_center:
186 | if cfg.DATASET.WITH_CENTER and not cfg.TEST.IGNORE_CENTER:
187 | kpt = kpt[:-1]
188 |
189 | kpts[int(file_name[-16:-4])].append(
190 | {
191 | 'keypoints': kpt[:, 0:3],
192 | 'score': scores[idx][idx_kpt],
193 | 'tags': kpt[:, 3],
194 | 'image': int(file_name[-16:-4]),
195 | 'area': area
196 | }
197 | )
198 |
199 | # rescoring and oks nms
200 | oks_nmsed_kpts = []
201 | # image x person x (keypoints)
202 | for img in kpts.keys():
203 | # person x (keypoints)
204 | img_kpts = kpts[img]
205 | # person x (keypoints)
206 | # do not use nms, keep all detections
207 | keep = []
208 | if len(keep) == 0:
209 | oks_nmsed_kpts.append(img_kpts)
210 | else:
211 | oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep])
212 |
213 | self._write_coco_keypoint_results(
214 | oks_nmsed_kpts, res_file
215 | )
216 |
217 | if 'test' not in self.dataset:
218 | info_str = self._do_python_keypoint_eval(
219 | res_file, res_folder
220 | )
221 | name_value = OrderedDict(info_str)
222 | return name_value, name_value['AP']
223 | else:
224 | return {'Null': 0}, 0
225 |
226 | def _write_coco_keypoint_results(self, keypoints, res_file):
227 | data_pack = [
228 | {
229 | 'cat_id': self._class_to_coco_ind[cls],
230 | 'cls_ind': cls_ind,
231 | 'cls': cls,
232 | 'ann_type': 'keypoints',
233 | 'keypoints': keypoints
234 | }
235 | for cls_ind, cls in enumerate(self.classes) if not cls == '__background__'
236 | ]
237 |
238 | results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
239 | logger.info('=> Writing results json to %s' % res_file)
240 | with open(res_file, 'w') as f:
241 | json.dump(results, f, sort_keys=True, indent=4)
242 | try:
243 | json.load(open(res_file))
244 | except Exception:
245 | content = []
246 | with open(res_file, 'r') as f:
247 | for line in f:
248 | content.append(line)
249 | content[-1] = ']'
250 | with open(res_file, 'w') as f:
251 | for c in content:
252 | f.write(c)
253 |
254 | def _coco_keypoint_results_one_category_kernel(self, data_pack):
255 | cat_id = data_pack['cat_id']
256 | keypoints = data_pack['keypoints']
257 | cat_results = []
258 | num_joints = 17
259 |
260 | for img_kpts in keypoints:
261 | if len(img_kpts) == 0:
262 | continue
263 |
264 | _key_points = np.array(
265 | [img_kpts[k]['keypoints'] for k in range(len(img_kpts))]
266 | )
267 | key_points = np.zeros(
268 | (_key_points.shape[0], num_joints * 3),
269 | dtype=np.float
270 | )
271 |
272 | for ipt in range(num_joints):
273 | key_points[:, ipt * 3 + 0] = _key_points[:, ipt, 0]
274 | key_points[:, ipt * 3 + 1] = _key_points[:, ipt, 1]
275 | key_points[:, ipt * 3 + 2] = _key_points[:, ipt, 2] # keypoints score.
276 |
277 | for k in range(len(img_kpts)):
278 | kpt = key_points[k].reshape((num_joints, 3))
279 | left_top = np.amin(kpt, axis=0)
280 | right_bottom = np.amax(kpt, axis=0)
281 |
282 | w = right_bottom[0] - left_top[0]
283 | h = right_bottom[1] - left_top[1]
284 |
285 | cat_results.append({
286 | 'image_id': img_kpts[k]['image'],
287 | 'category_id': cat_id,
288 | 'keypoints': list(key_points[k]),
289 | 'score': img_kpts[k]['score'],
290 | 'bbox': list([left_top[0], left_top[1], w, h])
291 | })
292 |
293 | return cat_results
294 |
295 | def _do_python_keypoint_eval(self, res_file, res_folder):
296 | coco_dt = self.coco.loadRes(res_file)
297 | coco_eval = COCOeval(self.coco, coco_dt, 'keypoints')
298 | coco_eval.params.useSegm = None
299 | coco_eval.evaluate()
300 | coco_eval.accumulate()
301 | coco_eval.summarize()
302 | stats_names = ['AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)']
303 |
304 | info_str = []
305 | for ind, name in enumerate(stats_names):
306 | info_str.append((name, coco_eval.stats[ind]))
307 | # info_str.append(coco_eval.stats[ind])
308 |
309 | return info_str
310 |
--------------------------------------------------------------------------------
/lib/dataset/COCOKeypoints.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
5 | # Modified by Bowen Cheng (bcheng9@illinois.edu)
6 | # ------------------------------------------------------------------------------
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 |
12 | import logging
13 |
14 | import numpy as np
15 |
16 | import pycocotools
17 | from .COCODataset import CocoDataset
18 | from .target_generators import HeatmapGenerator
19 |
20 |
21 | logger = logging.getLogger(__name__)
22 |
23 |
24 | class CocoKeypoints(CocoDataset):
25 | def __init__(self,
26 | cfg,
27 | dataset_name,
28 | remove_images_without_annotations,
29 | heatmap_generator,
30 | joints_generator,
31 | transforms=None):
32 | super().__init__(cfg.DATASET.ROOT,
33 | dataset_name,
34 | cfg.DATASET.DATA_FORMAT)
35 |
36 | if cfg.DATASET.WITH_CENTER:
37 | assert cfg.DATASET.NUM_JOINTS == 18, 'Number of joint with center for COCO is 18'
38 | else:
39 | assert cfg.DATASET.NUM_JOINTS == 17, 'Number of joint for COCO is 17'
40 |
41 | self.num_scales = self._init_check(heatmap_generator, joints_generator)
42 |
43 | self.num_joints = cfg.DATASET.NUM_JOINTS
44 | self.with_center = cfg.DATASET.WITH_CENTER
45 | self.num_joints_without_center = self.num_joints - 1 \
46 | if self.with_center else self.num_joints
47 | self.scale_aware_sigma = cfg.DATASET.SCALE_AWARE_SIGMA
48 | self.base_sigma = cfg.DATASET.BASE_SIGMA
49 | self.base_size = cfg.DATASET.BASE_SIZE
50 | self.int_sigma = cfg.DATASET.INT_SIGMA
51 |
52 | if remove_images_without_annotations:
53 | self.ids = [
54 | img_id
55 | for img_id in self.ids
56 | if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0
57 | ]
58 |
59 | self.transforms = transforms
60 | self.heatmap_generator = heatmap_generator
61 | self.joints_generator = joints_generator
62 |
63 | def __getitem__(self, idx):
64 | img, anno = super().__getitem__(idx)
65 |
66 | mask = self.get_mask(anno, idx)
67 |
68 | anno = [
69 | obj for obj in anno
70 | if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0
71 | ]
72 |
73 | # TODO(bowen): to generate scale-aware sigma, modify `get_joints` to associate a sigma to each joint
74 | joints = self.get_joints(anno)
75 |
76 | mask_list = [mask.copy() for _ in range(self.num_scales)]
77 | joints_list = [joints.copy() for _ in range(self.num_scales)]
78 | target_list = list()
79 |
80 | if self.transforms:
81 | img, mask_list, joints_list = self.transforms(
82 | img, mask_list, joints_list
83 | )
84 |
85 | for scale_id in range(self.num_scales):
86 | target_t = self.heatmap_generator[scale_id](joints_list[scale_id])
87 | joints_t = self.joints_generator[scale_id](joints_list[scale_id])
88 |
89 | target_list.append(target_t.astype(np.float32))
90 | mask_list[scale_id] = mask_list[scale_id].astype(np.float32)
91 | joints_list[scale_id] = joints_t.astype(np.int32)
92 |
93 | return img, target_list, mask_list, joints_list
94 |
95 | def get_joints(self, anno):
96 | num_people = len(anno)
97 |
98 | if self.scale_aware_sigma:
99 | joints = np.zeros((num_people, self.num_joints, 4))
100 | else:
101 | joints = np.zeros((num_people, self.num_joints, 3))
102 |
103 | for i, obj in enumerate(anno):
104 | joints[i, :self.num_joints_without_center, :3] = \
105 | np.array(obj['keypoints']).reshape([-1, 3])
106 | if self.with_center:
107 | joints_sum = np.sum(joints[i, :-1, :2], axis=0)
108 | num_vis_joints = len(np.nonzero(joints[i, :-1, 2])[0])
109 | if num_vis_joints > 0:
110 | joints[i, -1, :2] = joints_sum / num_vis_joints
111 | joints[i, -1, 2] = 1
112 | if self.scale_aware_sigma:
113 | # get person box
114 | box = obj['bbox']
115 | size = max(box[2], box[3])
116 | sigma = size / self.base_size * self.base_sigma
117 | if self.int_sigma:
118 | sigma = int(np.round(sigma + 0.5))
119 | assert sigma > 0, sigma
120 | joints[i, :, 3] = sigma
121 |
122 | return joints
123 |
124 | def get_mask(self, anno, idx):
125 | coco = self.coco
126 | img_info = coco.loadImgs(self.ids[idx])[0]
127 |
128 | m = np.zeros((img_info['height'], img_info['width']))
129 |
130 | for obj in anno:
131 | if obj['iscrowd']:
132 | rle = pycocotools.mask.frPyObjects(
133 | obj['segmentation'], img_info['height'], img_info['width'])
134 | m += pycocotools.mask.decode(rle)
135 | elif obj['num_keypoints'] == 0:
136 | rles = pycocotools.mask.frPyObjects(
137 | obj['segmentation'], img_info['height'], img_info['width'])
138 | for rle in rles:
139 | m += pycocotools.mask.decode(rle)
140 |
141 | return m < 0.5
142 |
143 | def _init_check(self, heatmap_generator, joints_generator):
144 | assert isinstance(heatmap_generator, (list, tuple)), 'heatmap_generator should be a list or tuple'
145 | assert isinstance(joints_generator, (list, tuple)), 'joints_generator should be a list or tuple'
146 | assert len(heatmap_generator) == len(joints_generator), \
147 | 'heatmap_generator and joints_generator should have same length,'\
148 | 'got {} vs {}.'.format(
149 | len(heatmap_generator), len(joints_generator)
150 | )
151 | return len(heatmap_generator)
152 |
--------------------------------------------------------------------------------
/lib/dataset/CrowdPoseDataset.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bowen Cheng (bcheng9@illinois.edu) and Bin Xiao (leoxiaobin@gmail.com)
5 | # ------------------------------------------------------------------------------
6 |
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 |
11 | from collections import defaultdict
12 | from collections import OrderedDict
13 | import logging
14 | import os
15 | import os.path
16 |
17 | import cv2
18 | import json_tricks as json
19 | import numpy as np
20 | from torch.utils.data import Dataset
21 |
22 | from crowdposetools.cocoeval import COCOeval
23 | from utils import zipreader
24 |
25 | logger = logging.getLogger(__name__)
26 |
27 |
28 | class CrowdPoseDataset(Dataset):
29 | """`CrowdPose`_ Dataset.
30 |
31 | Args:
32 | root (string): Root directory where dataset is located to.
33 | dataset (string): Dataset name(train2017, val2017, test2017).
34 | data_format(string): Data format for reading('jpg', 'zip')
35 | transform (callable, optional): A function/transform that takes in an opencv image
36 | and returns a transformed version. E.g, ``transforms.ToTensor``
37 | target_transform (callable, optional): A function/transform that takes in the
38 | target and transforms it.
39 | """
40 |
41 | def __init__(self, root, dataset, data_format, transform=None,
42 | target_transform=None):
43 | from crowdposetools.coco import COCO
44 | self.name = 'CROWDPOSE'
45 | self.root = root
46 | self.dataset = dataset
47 | self.data_format = data_format
48 | self.coco = COCO(self._get_anno_file_name())
49 | self.ids = list(self.coco.imgs.keys())
50 | self.transform = transform
51 | self.target_transform = target_transform
52 |
53 | cats = [cat['name']
54 | for cat in self.coco.loadCats(self.coco.getCatIds())]
55 | self.classes = ['__background__'] + cats
56 | logger.info('=> classes: {}'.format(self.classes))
57 | self.num_classes = len(self.classes)
58 | self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
59 | self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds()))
60 | self._coco_ind_to_class_ind = dict(
61 | [
62 | (self._class_to_coco_ind[cls], self._class_to_ind[cls])
63 | for cls in self.classes[1:]
64 | ]
65 | )
66 |
67 | def _get_anno_file_name(self):
68 | # example: root/json/crowdpose_{train,val,test}.json
69 | return os.path.join(
70 | self.root,
71 | 'json',
72 | 'crowdpose_{}.json'.format(
73 | self.dataset
74 | )
75 | )
76 |
77 | def _get_image_path(self, file_name):
78 | images_dir = os.path.join(self.root, 'images')
79 | if self.data_format == 'zip':
80 | return images_dir + '.zip@' + file_name
81 | else:
82 | return os.path.join(images_dir, file_name)
83 |
84 | def __getitem__(self, index):
85 | """
86 | Args:
87 | index (int): Index
88 |
89 | Returns:
90 | tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.
91 | """
92 | coco = self.coco
93 | img_id = self.ids[index]
94 | ann_ids = coco.getAnnIds(imgIds=img_id)
95 | target = coco.loadAnns(ann_ids)
96 |
97 | file_name = coco.loadImgs(img_id)[0]['file_name']
98 |
99 | if self.data_format == 'zip':
100 | img = zipreader.imread(
101 | self._get_image_path(file_name),
102 | cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION
103 | )
104 | else:
105 | img = cv2.imread(
106 | self._get_image_path(file_name),
107 | cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION
108 | )
109 |
110 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
111 |
112 | if self.transform is not None:
113 | img = self.transform(img)
114 |
115 | if self.target_transform is not None:
116 | target = self.target_transform(target)
117 |
118 | return img, target
119 |
120 | def __len__(self):
121 | return len(self.ids)
122 |
123 | def __repr__(self):
124 | fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
125 | fmt_str += ' Number of datapoints: {}\n'.format(self.__len__())
126 | fmt_str += ' Root Location: {}\n'.format(self.root)
127 | tmp = ' Transforms (if any): '
128 | fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
129 | tmp = ' Target Transforms (if any): '
130 | fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
131 | return fmt_str
132 |
133 | def processKeypoints(self, keypoints):
134 | tmp = keypoints.copy()
135 | if keypoints[:, 2].max() > 0:
136 | p = keypoints[keypoints[:, 2] > 0][:, :2].mean(axis=0)
137 | num_keypoints = keypoints.shape[0]
138 | for i in range(num_keypoints):
139 | tmp[i][0:3] = [
140 | float(keypoints[i][0]),
141 | float(keypoints[i][1]),
142 | float(keypoints[i][2])
143 | ]
144 |
145 | return tmp
146 |
147 | def evaluate(self, cfg, preds, scores, output_dir,
148 | *args, **kwargs):
149 | '''
150 | Perform evaluation on COCO keypoint task
151 | :param cfg: cfg dictionary
152 | :param preds: prediction
153 | :param output_dir: output directory
154 | :param args:
155 | :param kwargs:
156 | :return:
157 | '''
158 | res_folder = os.path.join(output_dir, 'results')
159 | if not os.path.exists(res_folder):
160 | os.makedirs(res_folder)
161 | res_file = os.path.join(
162 | res_folder, 'keypoints_%s_results.json' % self.dataset)
163 |
164 | # preds is a list of: image x person x (keypoints)
165 | # keypoints: num_joints * 4 (x, y, score, tag)
166 | kpts = defaultdict(list)
167 | for idx, _kpts in enumerate(preds):
168 | img_id = self.ids[idx]
169 | file_name = self.coco.loadImgs(img_id)[0]['file_name']
170 | for idx_kpt, kpt in enumerate(_kpts):
171 | area = (np.max(kpt[:, 0]) - np.min(kpt[:, 0])) * (np.max(kpt[:, 1]) - np.min(kpt[:, 1]))
172 | kpt = self.processKeypoints(kpt)
173 | # if self.with_center:
174 | if cfg.DATASET.WITH_CENTER and not cfg.TEST.IGNORE_CENTER:
175 | kpt = kpt[:-1]
176 |
177 | kpts[int(file_name.split('.')[0])].append(
178 | {
179 | 'keypoints': kpt[:, 0:3],
180 | 'score': scores[idx][idx_kpt],
181 | 'tags': kpt[:, 3],
182 | 'image': int(file_name.split('.')[0]),
183 | 'area': area
184 | }
185 | )
186 |
187 | # rescoring and oks nms
188 | oks_nmsed_kpts = []
189 | # image x person x (keypoints)
190 | for img in kpts.keys():
191 | # person x (keypoints)
192 | img_kpts = kpts[img]
193 | # person x (keypoints)
194 | # do not use nms, keep all detections
195 | keep = []
196 | if len(keep) == 0:
197 | oks_nmsed_kpts.append(img_kpts)
198 | else:
199 | oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep])
200 |
201 | self._write_coco_keypoint_results(
202 | oks_nmsed_kpts, res_file
203 | )
204 |
205 | # CrowdPose `test` set has annotation.
206 | info_str = self._do_python_keypoint_eval(
207 | res_file, res_folder
208 | )
209 | name_value = OrderedDict(info_str)
210 | return name_value, name_value['AP']
211 |
212 | def _write_coco_keypoint_results(self, keypoints, res_file):
213 | data_pack = [
214 | {
215 | 'cat_id': self._class_to_coco_ind[cls],
216 | 'cls_ind': cls_ind,
217 | 'cls': cls,
218 | 'ann_type': 'keypoints',
219 | 'keypoints': keypoints
220 | }
221 | for cls_ind, cls in enumerate(self.classes) if not cls == '__background__'
222 | ]
223 |
224 | results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
225 | logger.info('=> Writing results json to %s' % res_file)
226 | with open(res_file, 'w') as f:
227 | json.dump(results, f, sort_keys=True, indent=4)
228 | try:
229 | json.load(open(res_file))
230 | except Exception:
231 | content = []
232 | with open(res_file, 'r') as f:
233 | for line in f:
234 | content.append(line)
235 | content[-1] = ']'
236 | with open(res_file, 'w') as f:
237 | for c in content:
238 | f.write(c)
239 |
240 | def _coco_keypoint_results_one_category_kernel(self, data_pack):
241 | cat_id = data_pack['cat_id']
242 | keypoints = data_pack['keypoints']
243 | cat_results = []
244 | num_joints = 14
245 |
246 | for img_kpts in keypoints:
247 | if len(img_kpts) == 0:
248 | continue
249 |
250 | _key_points = np.array(
251 | [img_kpts[k]['keypoints'] for k in range(len(img_kpts))]
252 | )
253 | key_points = np.zeros(
254 | (_key_points.shape[0], num_joints * 3),
255 | dtype=np.float
256 | )
257 |
258 | for ipt in range(num_joints):
259 | key_points[:, ipt * 3 + 0] = _key_points[:, ipt, 0]
260 | key_points[:, ipt * 3 + 1] = _key_points[:, ipt, 1]
261 | key_points[:, ipt * 3 + 2] = _key_points[:, ipt, 2] # keypoints score.
262 |
263 | for k in range(len(img_kpts)):
264 | kpt = key_points[k].reshape((num_joints, 3))
265 | left_top = np.amin(kpt, axis=0)
266 | right_bottom = np.amax(kpt, axis=0)
267 |
268 | w = right_bottom[0] - left_top[0]
269 | h = right_bottom[1] - left_top[1]
270 |
271 | cat_results.append({
272 | 'image_id': img_kpts[k]['image'],
273 | 'category_id': cat_id,
274 | 'keypoints': list(key_points[k]),
275 | 'score': img_kpts[k]['score'],
276 | 'bbox': list([left_top[0], left_top[1], w, h])
277 | })
278 |
279 | return cat_results
280 |
281 | def _do_python_keypoint_eval(self, res_file, res_folder):
282 | coco_dt = self.coco.loadRes(res_file)
283 | coco_eval = COCOeval(self.coco, coco_dt, 'keypoints')
284 | coco_eval.params.useSegm = None
285 | coco_eval.evaluate()
286 | coco_eval.accumulate()
287 | coco_eval.summarize()
288 | stats_names = ['AP', 'Ap .5', 'AP .75', 'AR', 'AR .5', 'AR .75', 'AP (easy)', 'AP (medium)', 'AP (hard)']
289 | stats_index = [0, 1, 2, 5, 6, 7, 8, 9, 10]
290 |
291 | info_str = []
292 | for ind, name in enumerate(stats_names):
293 | info_str.append((name, coco_eval.stats[stats_index[ind]]))
294 | # info_str.append(coco_eval.stats[ind])
295 |
296 | return info_str
297 |
--------------------------------------------------------------------------------
/lib/dataset/CrowdPoseKeypoints.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bowen Cheng (bcheng9@illinois.edu) and Bin Xiao (leoxiaobin@gmail.com)
5 | # ------------------------------------------------------------------------------
6 |
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 |
11 | import logging
12 |
13 | import numpy as np
14 |
15 | #import crowdposetools
16 | #from .CrowdPoseDataset import CrowdPoseDataset
17 | from .target_generators import HeatmapGenerator
18 |
19 |
20 | logger = logging.getLogger(__name__)
21 |
22 |
23 | class CrowdPoseKeypoints(CrowdPoseDataset):
24 | def __init__(self,
25 | cfg,
26 | dataset_name,
27 | remove_images_without_annotations,
28 | heatmap_generator,
29 | joints_generator,
30 | transforms=None):
31 | super().__init__(cfg.DATASET.ROOT,
32 | dataset_name,
33 | cfg.DATASET.DATA_FORMAT)
34 |
35 | if cfg.DATASET.WITH_CENTER:
36 | assert cfg.DATASET.NUM_JOINTS == 15, 'Number of joint with center for CrowdPose is 15'
37 | else:
38 | assert cfg.DATASET.NUM_JOINTS == 14, 'Number of joint for CrowdPose is 14'
39 |
40 | self.num_scales = self._init_check(heatmap_generator, joints_generator)
41 |
42 | self.num_joints = cfg.DATASET.NUM_JOINTS
43 | self.with_center = cfg.DATASET.WITH_CENTER
44 | self.num_joints_without_center = self.num_joints - 1 \
45 | if self.with_center else self.num_joints
46 | self.scale_aware_sigma = cfg.DATASET.SCALE_AWARE_SIGMA
47 | self.base_sigma = cfg.DATASET.BASE_SIGMA
48 | self.base_size = cfg.DATASET.BASE_SIZE
49 | self.int_sigma = cfg.DATASET.INT_SIGMA
50 |
51 | if remove_images_without_annotations:
52 | self.ids = [
53 | img_id
54 | for img_id in self.ids
55 | if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0
56 | ]
57 |
58 | self.transforms = transforms
59 | self.heatmap_generator = heatmap_generator
60 | self.joints_generator = joints_generator
61 |
62 | def __getitem__(self, idx):
63 | img, anno = super().__getitem__(idx)
64 |
65 | mask = self.get_mask(anno, idx)
66 |
67 | anno = [
68 | obj for obj in anno
69 | if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0
70 | ]
71 |
72 | # TODO(bowen): to generate scale-aware sigma, modify `get_joints` to associate a sigma to each joint
73 | joints = self.get_joints(anno)
74 |
75 | mask_list = [mask.copy() for _ in range(self.num_scales)]
76 | joints_list = [joints.copy() for _ in range(self.num_scales)]
77 | target_list = list()
78 |
79 | if self.transforms:
80 | img, mask_list, joints_list = self.transforms(
81 | img, mask_list, joints_list
82 | )
83 |
84 | for scale_id in range(self.num_scales):
85 | target_t = self.heatmap_generator[scale_id](joints_list[scale_id])
86 | joints_t = self.joints_generator[scale_id](joints_list[scale_id])
87 |
88 | target_list.append(target_t.astype(np.float32))
89 | mask_list[scale_id] = mask_list[scale_id].astype(np.float32)
90 | joints_list[scale_id] = joints_t.astype(np.int32)
91 |
92 | return img, target_list, mask_list, joints_list
93 |
94 | def get_joints(self, anno):
95 | num_people = len(anno)
96 |
97 | if self.scale_aware_sigma:
98 | joints = np.zeros((num_people, self.num_joints, 4))
99 | else:
100 | joints = np.zeros((num_people, self.num_joints, 3))
101 |
102 | for i, obj in enumerate(anno):
103 | joints[i, :self.num_joints_without_center, :3] = \
104 | np.array(obj['keypoints']).reshape([-1, 3])
105 | if self.with_center:
106 | joints_sum = np.sum(joints[i, :-1, :2], axis=0)
107 | num_vis_joints = len(np.nonzero(joints[i, :-1, 2])[0])
108 | if num_vis_joints > 0:
109 | joints[i, -1, :2] = joints_sum / num_vis_joints
110 | joints[i, -1, 2] = 1
111 | if self.scale_aware_sigma:
112 | # get person box
113 | box = obj['bbox']
114 | size = max(box[2], box[3])
115 | sigma = size / self.base_size * self.base_sigma
116 | if self.int_sigma:
117 | sigma = int(np.round(sigma + 0.5))
118 | assert sigma > 0, sigma
119 | joints[i, :, 3] = sigma
120 |
121 | return joints
122 |
123 | def get_mask(self, anno, idx):
124 | coco = self.coco
125 | img_info = coco.loadImgs(self.ids[idx])[0]
126 |
127 | m = np.zeros((img_info['height'], img_info['width']))
128 |
129 | return m < 0.5
130 |
131 | def _init_check(self, heatmap_generator, joints_generator):
132 | assert isinstance(heatmap_generator, (list, tuple)), 'heatmap_generator should be a list or tuple'
133 | assert isinstance(joints_generator, (list, tuple)), 'joints_generator should be a list or tuple'
134 | assert len(heatmap_generator) == len(joints_generator), \
135 | 'heatmap_generator and joints_generator should have same length,'\
136 | 'got {} vs {}.'.format(
137 | len(heatmap_generator), len(joints_generator)
138 | )
139 | return len(heatmap_generator)
140 |
--------------------------------------------------------------------------------
/lib/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
5 | # ------------------------------------------------------------------------------
6 |
7 | from .COCOKeypoints import CocoKeypoints as coco
8 | #from .CrowdPoseKeypoints import CrowdPoseKeypoints as crowd_pose
9 | from .build import make_dataloader
10 | from .build import make_test_dataloader
11 |
12 | # dataset dependent configuration for visualization
13 | coco_part_labels = [
14 | 'nose', 'eye_l', 'eye_r', 'ear_l', 'ear_r',
15 | 'sho_l', 'sho_r', 'elb_l', 'elb_r', 'wri_l', 'wri_r',
16 | 'hip_l', 'hip_r', 'kne_l', 'kne_r', 'ank_l', 'ank_r'
17 | ]
18 | coco_part_idx = {
19 | b: a for a, b in enumerate(coco_part_labels)
20 | }
21 | coco_part_orders = [
22 | ('nose', 'eye_l'), ('eye_l', 'eye_r'), ('eye_r', 'nose'),
23 | ('eye_l', 'ear_l'), ('eye_r', 'ear_r'), ('ear_l', 'sho_l'),
24 | ('ear_r', 'sho_r'), ('sho_l', 'sho_r'), ('sho_l', 'hip_l'),
25 | ('sho_r', 'hip_r'), ('hip_l', 'hip_r'), ('sho_l', 'elb_l'),
26 | ('elb_l', 'wri_l'), ('sho_r', 'elb_r'), ('elb_r', 'wri_r'),
27 | ('hip_l', 'kne_l'), ('kne_l', 'ank_l'), ('hip_r', 'kne_r'),
28 | ('kne_r', 'ank_r')
29 | ]
30 | '''
31 | crowd_pose_part_labels = [
32 | 'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow',
33 | 'left_wrist', 'right_wrist', 'left_hip', 'right_hip',
34 | 'left_knee', 'right_knee', 'left_ankle', 'right_ankle',
35 | 'head', 'neck'
36 | ]
37 | crowd_pose_part_idx = {
38 | b: a for a, b in enumerate(crowd_pose_part_labels)
39 | }
40 | crowd_pose_part_orders = [
41 | ('head', 'neck'), ('neck', 'left_shoulder'), ('neck', 'right_shoulder'),
42 | ('left_shoulder', 'right_shoulder'), ('left_shoulder', 'left_hip'),
43 | ('right_shoulder', 'right_hip'), ('left_hip', 'right_hip'), ('left_shoulder', 'left_elbow'),
44 | ('left_elbow', 'left_wrist'), ('right_shoulder', 'right_elbow'), ('right_elbow', 'right_wrist'),
45 | ('left_hip', 'left_knee'), ('left_knee', 'left_ankle'), ('right_hip', 'right_knee'),
46 | ('right_knee', 'right_ankle')
47 | ]
48 | '''
49 | VIS_CONFIG = {
50 | 'COCO': {
51 | 'part_labels': coco_part_labels,
52 | 'part_idx': coco_part_idx,
53 | 'part_orders': coco_part_orders
54 | #},
55 | #'CROWDPOSE': {
56 | # 'part_labels': crowd_pose_part_labels,
57 | # 'part_idx': crowd_pose_part_idx,
58 | # 'part_orders': crowd_pose_part_orders
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/lib/dataset/__pycache__/COCODataset.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/__pycache__/COCODataset.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/dataset/__pycache__/COCOKeypoints.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/__pycache__/COCOKeypoints.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/dataset/__pycache__/CrowdPoseDataset.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/__pycache__/CrowdPoseDataset.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/dataset/__pycache__/CrowdPoseKeypoints.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/__pycache__/CrowdPoseKeypoints.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/dataset/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/dataset/__pycache__/build.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/__pycache__/build.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/dataset/build.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
5 | # Modified by Bowen Cheng (bcheng9@illinois.edu)
6 | # ------------------------------------------------------------------------------
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 |
12 | import torch.utils.data
13 |
14 | from .COCODataset import CocoDataset as coco
15 | from .COCOKeypoints import CocoKeypoints as coco_kpt
16 | #from .CrowdPoseDataset import CrowdPoseDataset as crowd_pose
17 | #from .CrowdPoseKeypoints import CrowdPoseKeypoints as crowd_pose_kpt
18 | from .transforms import build_transforms
19 | from .target_generators import HeatmapGenerator
20 | from .target_generators import ScaleAwareHeatmapGenerator
21 | from .target_generators import JointsGenerator
22 |
23 |
24 | def build_dataset(cfg, is_train):
25 | transforms = build_transforms(cfg, is_train)
26 |
27 | if cfg.DATASET.SCALE_AWARE_SIGMA:
28 | _HeatmapGenerator = ScaleAwareHeatmapGenerator
29 | else:
30 | _HeatmapGenerator = HeatmapGenerator
31 |
32 | heatmap_generator = [
33 | _HeatmapGenerator(
34 | output_size, cfg.DATASET.NUM_JOINTS, cfg.DATASET.SIGMA
35 | ) for output_size in cfg.DATASET.OUTPUT_SIZE
36 | ]
37 | joints_generator = [
38 | JointsGenerator(
39 | cfg.DATASET.MAX_NUM_PEOPLE,
40 | cfg.DATASET.NUM_JOINTS,
41 | output_size,
42 | cfg.MODEL.TAG_PER_JOINT
43 | ) for output_size in cfg.DATASET.OUTPUT_SIZE
44 | ]
45 |
46 | dataset_name = cfg.DATASET.TRAIN if is_train else cfg.DATASET.TEST
47 |
48 | dataset = eval(cfg.DATASET.DATASET)(
49 | cfg,
50 | dataset_name,
51 | is_train,
52 | heatmap_generator,
53 | joints_generator,
54 | transforms
55 | )
56 |
57 | return dataset
58 |
59 |
60 | def make_dataloader(cfg, is_train=True, distributed=False):
61 | if is_train:
62 | images_per_gpu = cfg.TRAIN.IMAGES_PER_GPU
63 | shuffle = True
64 | else:
65 | images_per_gpu = cfg.TEST.IMAGES_PER_GPU
66 | shuffle = False
67 | images_per_batch = images_per_gpu * len(cfg.GPUS)
68 |
69 | dataset = build_dataset(cfg, is_train)
70 |
71 | if is_train and distributed:
72 | train_sampler = torch.utils.data.distributed.DistributedSampler(
73 | dataset
74 | )
75 | shuffle = False
76 | else:
77 | train_sampler = None
78 |
79 | data_loader = torch.utils.data.DataLoader(
80 | dataset,
81 | batch_size=images_per_batch,
82 | shuffle=shuffle,
83 | num_workers=cfg.WORKERS,
84 | pin_memory=cfg.PIN_MEMORY,
85 | sampler=train_sampler
86 | )
87 |
88 | return data_loader
89 |
90 |
91 | def make_test_dataloader(cfg):
92 | transforms = None
93 | dataset = eval(cfg.DATASET.DATASET_TEST)(
94 | cfg.DATASET.ROOT,
95 | cfg.DATASET.TEST,
96 | cfg.DATASET.DATA_FORMAT,
97 | transforms
98 | )
99 |
100 | data_loader = torch.utils.data.DataLoader(
101 | dataset,
102 | batch_size=1,
103 | shuffle=False,
104 | num_workers=0,
105 | pin_memory=False
106 | )
107 |
108 | return data_loader, dataset
109 |
--------------------------------------------------------------------------------
/lib/dataset/target_generators/__init__.py:
--------------------------------------------------------------------------------
1 | from .target_generators import HeatmapGenerator
2 | from .target_generators import ScaleAwareHeatmapGenerator
3 | from .target_generators import JointsGenerator
4 |
5 | __all__ = ['HeatmapGenerator', 'ScaleAwareHeatmapGenerator', 'JointsGenerator']
6 |
--------------------------------------------------------------------------------
/lib/dataset/target_generators/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/target_generators/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/dataset/target_generators/__pycache__/target_generators.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/target_generators/__pycache__/target_generators.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/dataset/target_generators/target_generators.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
5 | # Modified by Bowen Cheng (bcheng9@illinois.edu)
6 | # ------------------------------------------------------------------------------
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 |
12 | import numpy as np
13 |
14 |
15 | class HeatmapGenerator():
16 | def __init__(self, output_res, num_joints, sigma=-1):
17 | self.output_res = output_res
18 | self.num_joints = num_joints
19 | if sigma < 0:
20 | sigma = self.output_res/64
21 | self.sigma = sigma
22 | size = 6*sigma + 3
23 | x = np.arange(0, size, 1, float)
24 | y = x[:, np.newaxis]
25 | x0, y0 = 3*sigma + 1, 3*sigma + 1
26 | self.g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
27 |
28 | def __call__(self, joints):
29 | hms = np.zeros((self.num_joints, self.output_res, self.output_res),
30 | dtype=np.float32)
31 | sigma = self.sigma
32 | for p in joints:
33 | for idx, pt in enumerate(p):
34 | if pt[2] > 0:
35 | x, y = int(pt[0]), int(pt[1])
36 | if x < 0 or y < 0 or \
37 | x >= self.output_res or y >= self.output_res:
38 | continue
39 |
40 | ul = int(np.round(x - 3 * sigma - 1)), int(np.round(y - 3 * sigma - 1))
41 | br = int(np.round(x + 3 * sigma + 2)), int(np.round(y + 3 * sigma + 2))
42 |
43 | c, d = max(0, -ul[0]), min(br[0], self.output_res) - ul[0]
44 | a, b = max(0, -ul[1]), min(br[1], self.output_res) - ul[1]
45 |
46 | cc, dd = max(0, ul[0]), min(br[0], self.output_res)
47 | aa, bb = max(0, ul[1]), min(br[1], self.output_res)
48 | hms[idx, aa:bb, cc:dd] = np.maximum(
49 | hms[idx, aa:bb, cc:dd], self.g[a:b, c:d])
50 | return hms
51 |
52 |
53 | class ScaleAwareHeatmapGenerator():
54 | def __init__(self, output_res, num_joints):
55 | self.output_res = output_res
56 | self.num_joints = num_joints
57 |
58 | def get_gaussian_kernel(self, sigma):
59 | size = 6*sigma + 3
60 | x = np.arange(0, size, 1, float)
61 | y = x[:, np.newaxis]
62 | x0, y0 = 3*sigma + 1, 3*sigma + 1
63 | g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
64 | return g
65 |
66 | def __call__(self, joints):
67 | hms = np.zeros((self.num_joints, self.output_res, self.output_res),
68 | dtype=np.float32)
69 | for p in joints:
70 | sigma = p[0, 3]
71 | g = self.get_gaussian_kernel(sigma)
72 | for idx, pt in enumerate(p):
73 | if pt[2] > 0:
74 | x, y = int(pt[0]), int(pt[1])
75 | if x < 0 or y < 0 or \
76 | x >= self.output_res or y >= self.output_res:
77 | continue
78 |
79 | ul = int(np.round(x - 3 * sigma - 1)), int(np.round(y - 3 * sigma - 1))
80 | br = int(np.round(x + 3 * sigma + 2)), int(np.round(y + 3 * sigma + 2))
81 |
82 | c, d = max(0, -ul[0]), min(br[0], self.output_res) - ul[0]
83 | a, b = max(0, -ul[1]), min(br[1], self.output_res) - ul[1]
84 |
85 | cc, dd = max(0, ul[0]), min(br[0], self.output_res)
86 | aa, bb = max(0, ul[1]), min(br[1], self.output_res)
87 | hms[idx, aa:bb, cc:dd] = np.maximum(
88 | hms[idx, aa:bb, cc:dd], g[a:b, c:d])
89 | return hms
90 |
91 |
92 | class JointsGenerator():
93 | def __init__(self, max_num_people, num_joints, output_res, tag_per_joint):
94 | self.max_num_people = max_num_people
95 | self.num_joints = num_joints
96 | self.output_res = output_res
97 | self.tag_per_joint = tag_per_joint
98 |
99 | def __call__(self, joints):
100 | visible_nodes = np.zeros((self.max_num_people, self.num_joints, 2))
101 | output_res = self.output_res
102 | for i in range(len(joints)):
103 | tot = 0
104 | for idx, pt in enumerate(joints[i]):
105 | x, y = int(pt[0]), int(pt[1])
106 | if pt[2] > 0 and x >= 0 and y >= 0 \
107 | and x < self.output_res and y < self.output_res:
108 | if self.tag_per_joint:
109 | visible_nodes[i][tot] = \
110 | (idx * output_res**2 + y * output_res + x, 1)
111 | else:
112 | visible_nodes[i][tot] = \
113 | (y * output_res + x, 1)
114 | tot += 1
115 | return visible_nodes
116 |
--------------------------------------------------------------------------------
/lib/dataset/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | from .transforms import Compose
2 | from .transforms import RandomAffineTransform
3 | from .transforms import ToTensor
4 | from .transforms import Normalize
5 | from .transforms import RandomHorizontalFlip
6 |
7 | from .build import build_transforms
8 | from .build import FLIP_CONFIG
9 |
--------------------------------------------------------------------------------
/lib/dataset/transforms/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/transforms/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/dataset/transforms/__pycache__/build.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/transforms/__pycache__/build.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/dataset/transforms/__pycache__/transforms.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/transforms/__pycache__/transforms.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/dataset/transforms/build.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
5 | # Modified by Bowen Cheng (bcheng9@illinois.edu)
6 | # ------------------------------------------------------------------------------
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 |
12 | from . import transforms as T
13 |
14 |
15 | FLIP_CONFIG = {
16 | 'COCO': [
17 | 0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15
18 | ],
19 | 'COCO_WITH_CENTER': [
20 | 0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 17
21 | ],
22 | 'CROWDPOSE': [
23 | 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 13
24 | ],
25 | 'CROWDPOSE_WITH_CENTER': [
26 | 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 13, 14
27 | ]
28 | }
29 |
30 |
31 | def build_transforms(cfg, is_train=True):
32 | assert is_train is True, 'Please only use build_transforms for training.'
33 | assert isinstance(cfg.DATASET.OUTPUT_SIZE, (list, tuple)), 'DATASET.OUTPUT_SIZE should be list or tuple'
34 | if is_train:
35 | max_rotation = cfg.DATASET.MAX_ROTATION
36 | min_scale = cfg.DATASET.MIN_SCALE
37 | max_scale = cfg.DATASET.MAX_SCALE
38 | max_translate = cfg.DATASET.MAX_TRANSLATE
39 | input_size = cfg.DATASET.INPUT_SIZE
40 | output_size = cfg.DATASET.OUTPUT_SIZE
41 | flip = cfg.DATASET.FLIP
42 | scale_type = cfg.DATASET.SCALE_TYPE
43 | else:
44 | scale_type = cfg.DATASET.SCALE_TYPE
45 | max_rotation = 0
46 | min_scale = 1
47 | max_scale = 1
48 | max_translate = 0
49 | input_size = 512
50 | output_size = [128]
51 | flip = 0
52 |
53 | # coco_flip_index = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
54 | # if cfg.DATASET.WITH_CENTER:
55 | # coco_flip_index.append(17)
56 | if 'coco' in cfg.DATASET.DATASET:
57 | dataset_name = 'COCO'
58 | elif 'crowd_pose' in cfg.DATASET.DATASET:
59 | dataset_name = 'CROWDPOSE'
60 | else:
61 | raise ValueError('Please implement flip_index for new dataset: %s.' % cfg.DATASET.DATASET)
62 | if cfg.DATASET.WITH_CENTER:
63 | coco_flip_index = FLIP_CONFIG[dataset_name + '_WITH_CENTER']
64 | else:
65 | coco_flip_index = FLIP_CONFIG[dataset_name]
66 |
67 | transforms = T.Compose(
68 | [
69 | T.RandomAffineTransform(
70 | input_size,
71 | output_size,
72 | max_rotation,
73 | min_scale,
74 | max_scale,
75 | scale_type,
76 | max_translate,
77 | scale_aware_sigma=cfg.DATASET.SCALE_AWARE_SIGMA
78 | ),
79 | T.RandomHorizontalFlip(coco_flip_index, output_size, flip),
80 | T.ToTensor(),
81 | T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
82 | ]
83 | )
84 |
85 | return transforms
86 |
--------------------------------------------------------------------------------
/lib/dataset/transforms/transforms.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
5 | # Modified by Bowen Cheng (bcheng9@illinois.edu)
6 | # ------------------------------------------------------------------------------
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 |
12 | import random
13 |
14 | import cv2
15 | import numpy as np
16 | import torch
17 | import torchvision
18 | from torchvision.transforms import functional as F
19 |
20 |
21 | class Compose(object):
22 | def __init__(self, transforms):
23 | self.transforms = transforms
24 |
25 | def __call__(self, image, mask, joints):
26 | for t in self.transforms:
27 | image, mask, joints = t(image, mask, joints)
28 | return image, mask, joints
29 |
30 | def __repr__(self):
31 | format_string = self.__class__.__name__ + "("
32 | for t in self.transforms:
33 | format_string += "\n"
34 | format_string += " {0}".format(t)
35 | format_string += "\n)"
36 | return format_string
37 |
38 |
39 | class ToTensor(object):
40 | def __call__(self, image, mask, joints):
41 | return F.to_tensor(image), mask, joints
42 |
43 |
44 | class Normalize(object):
45 | def __init__(self, mean, std):
46 | self.mean = mean
47 | self.std = std
48 |
49 | def __call__(self, image, mask, joints):
50 | image = F.normalize(image, mean=self.mean, std=self.std)
51 | return image, mask, joints
52 |
53 |
54 | class RandomHorizontalFlip(object):
55 | def __init__(self, flip_index, output_size, prob=0.5):
56 | self.flip_index = flip_index
57 | self.prob = prob
58 | self.output_size = output_size if isinstance(output_size, list) \
59 | else [output_size]
60 |
61 | def __call__(self, image, mask, joints):
62 | assert isinstance(mask, list)
63 | assert isinstance(joints, list)
64 | assert len(mask) == len(joints)
65 | assert len(mask) == len(self.output_size)
66 |
67 | if random.random() < self.prob:
68 | image = image[:, ::-1] - np.zeros_like(image)
69 | for i, _output_size in enumerate(self.output_size):
70 | mask[i] = mask[i][:, ::-1] - np.zeros_like(mask[i])
71 | joints[i] = joints[i][:, self.flip_index]
72 | joints[i][:, :, 0] = _output_size - joints[i][:, :, 0] - 1
73 |
74 | return image, mask, joints
75 |
76 |
77 | class RandomAffineTransform(object):
78 | def __init__(self,
79 | input_size,
80 | output_size,
81 | max_rotation,
82 | min_scale,
83 | max_scale,
84 | scale_type,
85 | max_translate,
86 | scale_aware_sigma=False):
87 | self.input_size = input_size
88 | self.output_size = output_size if isinstance(output_size, list) \
89 | else [output_size]
90 |
91 | self.max_rotation = max_rotation
92 | self.min_scale = min_scale
93 | self.max_scale = max_scale
94 | self.scale_type = scale_type
95 | self.max_translate = max_translate
96 | self.scale_aware_sigma = scale_aware_sigma
97 |
98 | def _get_affine_matrix(self, center, scale, res, rot=0):
99 | # Generate transformation matrix
100 | h = 200 * scale
101 | t = np.zeros((3, 3))
102 | t[0, 0] = float(res[1]) / h
103 | t[1, 1] = float(res[0]) / h
104 | t[0, 2] = res[1] * (-float(center[0]) / h + .5)
105 | t[1, 2] = res[0] * (-float(center[1]) / h + .5)
106 | t[2, 2] = 1
107 | if not rot == 0:
108 | rot = -rot # To match direction of rotation from cropping
109 | rot_mat = np.zeros((3, 3))
110 | rot_rad = rot * np.pi / 180
111 | sn, cs = np.sin(rot_rad), np.cos(rot_rad)
112 | rot_mat[0, :2] = [cs, -sn]
113 | rot_mat[1, :2] = [sn, cs]
114 | rot_mat[2, 2] = 1
115 | # Need to rotate around center
116 | t_mat = np.eye(3)
117 | t_mat[0, 2] = -res[1]/2
118 | t_mat[1, 2] = -res[0]/2
119 | t_inv = t_mat.copy()
120 | t_inv[:2, 2] *= -1
121 | t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
122 | return t
123 |
124 | def _affine_joints(self, joints, mat):
125 | joints = np.array(joints)
126 | shape = joints.shape
127 | joints = joints.reshape(-1, 2)
128 | return np.dot(np.concatenate(
129 | (joints, joints[:, 0:1]*0+1), axis=1), mat.T).reshape(shape)
130 |
131 | def __call__(self, image, mask, joints):
132 | assert isinstance(mask, list)
133 | assert isinstance(joints, list)
134 | assert len(mask) == len(joints)
135 | assert len(mask) == len(self.output_size)
136 |
137 | height, width = image.shape[:2]
138 |
139 | center = np.array((width/2, height/2))
140 | if self.scale_type == 'long':
141 | scale = max(height, width)/200
142 | elif self.scale_type == 'short':
143 | scale = min(height, width)/200
144 | else:
145 | raise ValueError('Unkonw scale type: {}'.format(self.scale_type))
146 | aug_scale = np.random.random() * (self.max_scale - self.min_scale) \
147 | + self.min_scale
148 | scale *= aug_scale
149 | aug_rot = (np.random.random() * 2 - 1) * self.max_rotation
150 |
151 | if self.max_translate > 0:
152 | dx = np.random.randint(
153 | -self.max_translate*scale, self.max_translate*scale)
154 | dy = np.random.randint(
155 | -self.max_translate*scale, self.max_translate*scale)
156 | center[0] += dx
157 | center[1] += dy
158 |
159 | for i, _output_size in enumerate(self.output_size):
160 | mat_output = self._get_affine_matrix(
161 | center, scale, (_output_size, _output_size), aug_rot
162 | )[:2]
163 | mask[i] = cv2.warpAffine(
164 | (mask[i]*255).astype(np.uint8), mat_output,
165 | (_output_size, _output_size)
166 | ) / 255
167 | mask[i] = (mask[i] > 0.5).astype(np.float32)
168 |
169 | joints[i][:, :, 0:2] = self._affine_joints(
170 | joints[i][:, :, 0:2], mat_output
171 | )
172 | if self.scale_aware_sigma:
173 | joints[i][:, :, 3] = joints[i][:, :, 3] / aug_scale
174 |
175 | mat_input = self._get_affine_matrix(
176 | center, scale, (self.input_size, self.input_size), aug_rot
177 | )[:2]
178 | image = cv2.warpAffine(
179 | image, mat_input, (self.input_size, self.input_size)
180 | )
181 |
182 | return image, mask, joints
183 |
--------------------------------------------------------------------------------
/lib/fp16_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/fp16_utils/__init__.py
--------------------------------------------------------------------------------
/lib/fp16_utils/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/fp16_utils/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/fp16_utils/__pycache__/fp16_optimizer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/fp16_utils/__pycache__/fp16_optimizer.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/fp16_utils/__pycache__/fp16util.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/fp16_utils/__pycache__/fp16util.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/fp16_utils/__pycache__/loss_scaler.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/fp16_utils/__pycache__/loss_scaler.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/fp16_utils/fp16util.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Based on:
3 | # apex
4 | # Copyright (c) NVIDIA
5 | # Licence under The BSD 3-Clause "New" or "Revised" License
6 | # https://github.com/NVIDIA/apex
7 | # All rights reserved.
8 | #
9 | # Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
10 | # following conditions are met:
11 | #
12 | # 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following
13 | # disclaimer.
14 | #
15 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
16 | # disclaimer in the documentation and/or other materials provided with the distribution.
17 | #
18 | # 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
19 | # products derived from this software without specific prior written permission.
20 | #
21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
22 | # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | # ------------------------------------------------------------------------------
29 | # Copyright (c) Microsoft
30 | # Licensed under the MIT License.
31 | # Modified by Bowen Cheng
32 | # ------------------------------------------------------------------------------
33 |
34 | import torch
35 | import torch.nn as nn
36 | from torch.autograd import Variable
37 | from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
38 |
39 |
40 | class tofp16(nn.Module):
41 | """
42 | Model wrapper that implements::
43 | def forward(self, input):
44 | return input.half()
45 | """
46 |
47 | def __init__(self):
48 | super(tofp16, self).__init__()
49 |
50 | def forward(self, input):
51 | return input.half()
52 |
53 |
54 | class tofp32(nn.Module):
55 | """
56 | Model wrapper that implements::
57 | def forward(self, input):
58 | return input.half()
59 | """
60 |
61 | def __init__(self):
62 | super(tofp32, self).__init__()
63 |
64 | def forward(self, input):
65 | if isinstance(input, list):
66 | return list(map(lambda x: x.float(), input))
67 | else:
68 | return input.float()
69 |
70 |
71 | def BN_convert_float(module):
72 | '''
73 | Designed to work with network_to_half.
74 | BatchNorm layers need parameters in single precision.
75 | Find all layers and convert them back to float. This can't
76 | be done with built in .apply as that function will apply
77 | fn to all modules, parameters, and buffers. Thus we wouldn't
78 | be able to guard the float conversion based on the module type.
79 | '''
80 | if isinstance(module, torch.nn.modules.batchnorm._BatchNorm):
81 | module.float()
82 | for child in module.children():
83 | BN_convert_float(child)
84 | return module
85 |
86 |
87 | def network_to_half(network):
88 | """
89 | Convert model to half precision in a batchnorm-safe way.
90 | """
91 | return nn.Sequential(tofp16(), BN_convert_float(network.half()), tofp32())
92 |
93 |
94 | def backwards_debug_hook(grad):
95 | raise RuntimeError("master_params recieved a gradient in the backward pass!")
96 |
97 |
98 | def prep_param_lists(model, flat_master=False):
99 | """
100 | Creates a list of FP32 master parameters for a given model, as in
101 | `Training Neural Networks with Mixed Precision: Real Examples`_.
102 | Args:
103 | model (torch.nn.Module): Existing Pytorch model
104 | flat_master (bool, optional, default=False): Flatten the master parameters into a single tensor, as a performance optimization.
105 | Returns:
106 | A tuple (``model_params``, ``master_params``). ``model_params`` is a list of the model's parameters for later use with :func:`model_grads_to_master_grads` and :func:`master_params_to_model_params`. ``master_params`` is a list of FP32 master gradients. If ``flat_master=True``, ``master_params`` will be a list with one element.
107 | Example::
108 | model_params, master_params = prep_param_lists(model)
109 | .. warning::
110 | Currently, if ``flat_master=True``, all the model's parameters must be the same type. If the model has parameters of different types, use ``flat_master=False``, or use :class:`FP16_Optimizer`.
111 | .. _`Training Neural Networks with Mixed Precision: Real Examples`:
112 | http://on-demand.gputechconf.com/gtc/2018/video/S81012/
113 | """
114 | model_params = [param for param in model.parameters() if param.requires_grad]
115 |
116 | if flat_master:
117 | # Give the user some more useful error messages
118 | try:
119 | # flatten_dense_tensors returns a contiguous flat array.
120 | # http://pytorch.org/docs/master/_modules/torch/_utils.html
121 | master_params = _flatten_dense_tensors([param.data for param in model_params]).float()
122 | except:
123 | print("Error in prep_param_lists: model may contain a mixture of parameters "
124 | "of different types. Use flat_master=False, or use F16_Optimizer.")
125 | raise
126 | master_params = torch.nn.Parameter(master_params)
127 | master_params.requires_grad = True
128 | # master_params.register_hook(backwards_debug_hook)
129 | if master_params.grad is None:
130 | master_params.grad = master_params.new(*master_params.size())
131 | return model_params, [master_params]
132 | else:
133 | master_params = [param.clone().float().detach() for param in model_params]
134 | for param in master_params:
135 | param.requires_grad = True
136 | return model_params, master_params
137 |
138 |
139 | def model_grads_to_master_grads(model_params, master_params, flat_master=False):
140 | """
141 | Copy model gradients to master gradients.
142 | Args:
143 | model_params: List of model parameters created by :func:`prep_param_lists`.
144 | master_params: List of FP32 master parameters created by :func:`prep_param_lists`. If ``master_params`` was created with ``flat_master=True``, ``flat_master=True`` should also be supplied to :func:`model_grads_to_master_grads`.
145 | """
146 | if flat_master:
147 | # The flattening may incur one more deep copy than is necessary.
148 | master_params[0].grad.data.copy_(
149 | _flatten_dense_tensors([p.grad.data for p in model_params]))
150 | else:
151 | for model, master in zip(model_params, master_params):
152 | if model.grad is not None:
153 | if master.grad is None:
154 | master.grad = Variable(master.data.new(*master.data.size()))
155 | master.grad.data.copy_(model.grad.data)
156 | else:
157 | master.grad = None
158 |
159 |
160 | def master_params_to_model_params(model_params, master_params, flat_master=False):
161 | """
162 | Copy master parameters to model parameters.
163 | Args:
164 | model_params: List of model parameters created by :func:`prep_param_lists`.
165 | master_params: List of FP32 master parameters created by :func:`prep_param_lists`. If ``master_params`` was created with ``flat_master=True``, ``flat_master=True`` should also be supplied to :func:`master_params_to_model_params`.
166 | """
167 | if flat_master:
168 | for model, master in zip(model_params,
169 | _unflatten_dense_tensors(master_params[0].data, model_params)):
170 | model.data.copy_(master)
171 | else:
172 | for model, master in zip(model_params, master_params):
173 | model.data.copy_(master.data)
174 |
175 |
176 | # Backward compatibility fixes
177 | def to_python_float(t):
178 | if hasattr(t, 'item'):
179 | return t.item()
180 | else:
181 | return t[0]
182 |
183 | TORCH_MAJOR = int(torch.__version__.split('.')[0])
184 | TORCH_MINOR = int(torch.__version__.split('.')[1])
185 | if TORCH_MAJOR == 0 and TORCH_MINOR <= 4:
186 | clip_grad_norm = torch.nn.utils.clip_grad_norm
187 | else:
188 | clip_grad_norm = torch.nn.utils.clip_grad_norm_
189 |
--------------------------------------------------------------------------------
/lib/fp16_utils/loss_scaler.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Based on:
3 | # apex
4 | # Copyright (c) NVIDIA
5 | # Licence under The BSD 3-Clause "New" or "Revised" License
6 | # https://github.com/NVIDIA/apex
7 | # All rights reserved.
8 | #
9 | # Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
10 | # following conditions are met:
11 | #
12 | # 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following
13 | # disclaimer.
14 | #
15 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
16 | # disclaimer in the documentation and/or other materials provided with the distribution.
17 | #
18 | # 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
19 | # products derived from this software without specific prior written permission.
20 | #
21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
22 | # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | # ------------------------------------------------------------------------------
29 | # Copyright (c) Microsoft
30 | # Licensed under the MIT License.
31 | # Modified by Bowen Cheng
32 | # ------------------------------------------------------------------------------
33 |
34 | import torch
35 |
36 |
37 | # item() is a recent addition, so this helps with backward compatibility.
38 | def to_python_float(t):
39 | if hasattr(t, 'item'):
40 | return t.item()
41 | else:
42 | return t[0]
43 |
44 |
45 | class LossScaler:
46 | """
47 | Class that manages a static loss scale. This class is intended to interact with
48 | :class:`FP16_Optimizer`, and should not be directly manipulated by the user.
49 | Use of :class:`LossScaler` is enabled via the ``static_loss_scale`` argument to
50 | :class:`FP16_Optimizer`'s constructor.
51 | Args:
52 | scale (float, optional, default=1.0): The loss scale.
53 | """
54 |
55 | def __init__(self, scale=1):
56 | self.cur_scale = scale
57 |
58 | # `params` is a list / generator of torch.Variable
59 | def has_overflow(self, params):
60 | return False
61 |
62 | # `x` is a torch.Tensor
63 | def _has_inf_or_nan(x):
64 | return False
65 |
66 | def update_scale(self, overflow):
67 | pass
68 |
69 | @property
70 | def loss_scale(self):
71 | return self.cur_scale
72 |
73 | def scale_gradient(self, module, grad_in, grad_out):
74 | return tuple(self.loss_scale * g for g in grad_in)
75 |
76 | def backward(self, loss):
77 | scaled_loss = loss * self.loss_scale
78 | scaled_loss.backward()
79 |
80 |
81 | class DynamicLossScaler:
82 | """
83 | Class that manages dynamic loss scaling. It is recommended to use :class:`DynamicLossScaler`
84 | indirectly, by supplying ``dynamic_loss_scale=True`` to the constructor of
85 | :class:`FP16_Optimizer`. However, it's important to understand how :class:`DynamicLossScaler`
86 | operates, because the default options can be changed using the
87 | the ``dynamic_loss_args`` argument to :class:`FP16_Optimizer`'s constructor.
88 | Loss scaling is designed to combat the problem of underflowing gradients encountered at long
89 | times when training fp16 networks. Dynamic loss scaling begins by attempting a very high loss
90 | scale. Ironically, this may result in OVERflowing gradients. If overflowing gradients are
91 | encountered, :class:`DynamicLossScaler` informs :class:`FP16_Optimizer` that an overflow has
92 | occurred.
93 | :class:`FP16_Optimizer` then skips the update step for this particular iteration/minibatch,
94 | and :class:`DynamicLossScaler` adjusts the loss scale to a lower value.
95 | If a certain number of iterations occur without overflowing gradients detected,
96 | :class:`DynamicLossScaler` increases the loss scale once more.
97 | In this way :class:`DynamicLossScaler` attempts to "ride the edge" of
98 | always using the highest loss scale possible without incurring overflow.
99 | Args:
100 | init_scale (float, optional, default=2**32): Initial loss scale attempted by :class:`DynamicLossScaler.`
101 | scale_factor (float, optional, default=2.0): Factor used when adjusting the loss scale. If an overflow is encountered, the loss scale is readjusted to loss scale/``scale_factor``. If ``scale_window`` consecutive iterations take place without an overflow, the loss scale is readjusted to loss_scale*``scale_factor``.
102 | scale_window (int, optional, default=1000): Number of consecutive iterations without an overflow to wait before increasing the loss scale.
103 | """
104 |
105 | def __init__(self,
106 | init_scale=2 ** 32,
107 | scale_factor=2.,
108 | scale_window=1000):
109 | self.cur_scale = init_scale
110 | self.cur_iter = 0
111 | self.last_overflow_iter = -1
112 | self.scale_factor = scale_factor
113 | self.scale_window = scale_window
114 |
115 | # `params` is a list / generator of torch.Variable
116 | def has_overflow(self, params):
117 | for p in params:
118 | # if p.grad is not None and DynamicLossScaler._has_inf_or_nan(p.grad.data):
119 | # return True
120 | if p.grad is not None and self._has_inf_or_nan(p.grad.data):
121 | return True
122 |
123 | return False
124 |
125 | # `x` is a torch.Tensor
126 | # def _has_inf_or_nan(x):
127 | def _has_inf_or_nan(self, x):
128 | try:
129 | # if x is half, the .float() incurs an additional deep copy, but it's necessary if
130 | # Pytorch's .sum() creates a one-element tensor of the same type as x
131 | # (which is true for some recent version of pytorch).
132 | cpu_sum = float(x.float().sum())
133 | # More efficient version that can be used if .sum() returns a Python scalar
134 | # cpu_sum = float(x.sum())
135 | except RuntimeError as instance:
136 | # We want to check if inst is actually an overflow exception.
137 | # RuntimeError could come from a different error.
138 | # If so, we still want the exception to propagate.
139 | if "value cannot be converted" not in instance.args[0]:
140 | raise
141 | return True
142 | else:
143 | if cpu_sum == float('inf') or cpu_sum == -float('inf') or cpu_sum != cpu_sum:
144 | return True
145 | return False
146 |
147 | # `overflow` is boolean indicating whether the gradient overflowed
148 | def update_scale(self, overflow):
149 | if overflow:
150 | # self.cur_scale /= self.scale_factor
151 | self.cur_scale = max(self.cur_scale / self.scale_factor, 1)
152 | self.last_overflow_iter = self.cur_iter
153 | else:
154 | if (self.cur_iter - self.last_overflow_iter) % self.scale_window == 0:
155 | self.cur_scale *= self.scale_factor
156 | self.cur_iter += 1
157 |
158 | @property
159 | def loss_scale(self):
160 | return self.cur_scale
161 |
162 | def scale_gradient(self, module, grad_in, grad_out):
163 | return tuple(self.loss_scale * g for g in grad_in)
164 |
165 | def backward(self, loss):
166 | scaled_loss = loss * self.loss_scale
167 | scaled_loss.backward()
168 |
169 |
170 | ##############################################################
171 | # Example usage below here -- assuming it's in a separate file
172 | ##############################################################
173 | """
174 | TO-DO separate out into an example.
175 | if __name__ == "__main__":
176 | import torch
177 | from torch.autograd import Variable
178 | from dynamic_loss_scaler import DynamicLossScaler
179 | # N is batch size; D_in is input dimension;
180 | # H is hidden dimension; D_out is output dimension.
181 | N, D_in, H, D_out = 64, 1000, 100, 10
182 | # Create random Tensors to hold inputs and outputs, and wrap them in Variables.
183 | x = Variable(torch.randn(N, D_in), requires_grad=False)
184 | y = Variable(torch.randn(N, D_out), requires_grad=False)
185 | w1 = Variable(torch.randn(D_in, H), requires_grad=True)
186 | w2 = Variable(torch.randn(H, D_out), requires_grad=True)
187 | parameters = [w1, w2]
188 | learning_rate = 1e-6
189 | optimizer = torch.optim.SGD(parameters, lr=learning_rate)
190 | loss_scaler = DynamicLossScaler()
191 | for t in range(500):
192 | y_pred = x.mm(w1).clamp(min=0).mm(w2)
193 | loss = (y_pred - y).pow(2).sum() * loss_scaler.loss_scale
194 | print('Iter {} loss scale: {}'.format(t, loss_scaler.loss_scale))
195 | print('Iter {} scaled loss: {}'.format(t, loss.data[0]))
196 | print('Iter {} unscaled loss: {}'.format(t, loss.data[0] / loss_scaler.loss_scale))
197 | # Run backprop
198 | optimizer.zero_grad()
199 | loss.backward()
200 | # Check for overflow
201 | has_overflow = DynamicLossScaler.has_overflow(parameters)
202 | # If no overflow, unscale grad and update as usual
203 | if not has_overflow:
204 | for param in parameters:
205 | param.grad.data.mul_(1. / loss_scaler.loss_scale)
206 | optimizer.step()
207 | # Otherwise, don't do anything -- ie, skip iteration
208 | else:
209 | print('OVERFLOW!')
210 | # Update loss scale for next iteration
211 | loss_scaler.update_scale(has_overflow)
212 | """
213 |
--------------------------------------------------------------------------------
/lib/models/__init__.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
5 | # ------------------------------------------------------------------------------
6 |
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 |
11 | import models.pose_higher_hrnet
12 | #import models.pose_efficientnet
13 | #import models.pose_efficientnet_all
--------------------------------------------------------------------------------
/lib/models/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/models/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/models/__pycache__/efficientnet_blocks.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/models/__pycache__/efficientnet_blocks.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/models/__pycache__/pose_efficientnet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/models/__pycache__/pose_efficientnet.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/models/__pycache__/pose_efficientnet_all.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/models/__pycache__/pose_efficientnet_all.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/models/__pycache__/pose_higher_hrnet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/models/__pycache__/pose_higher_hrnet.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/models/efficientnet_blocks.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 |
7 | def conv(in_channels, out_channels, kernel_size=3, padding=1, bn=True, dilation=1, stride=1, relu=True, bias=True):
8 | modules = [nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias)]
9 | if bn:
10 | modules.append(nn.BatchNorm2d(out_channels))
11 | if relu:
12 | modules.append(nn.ReLU(inplace=True))
13 | return nn.Sequential(*modules)
14 |
15 | def conv_bn(inp, oup, stride, use_batch_norm=True):
16 | #ReLU = nn.ReLU if onnx_compatible else nn.ReLU6
17 |
18 | if use_batch_norm:
19 | return nn.Sequential(
20 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
21 | nn.BatchNorm2d(oup),
22 | nn.ReLU(inplace=True),
23 | )
24 | else:
25 | return nn.Sequential(
26 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
27 | nn.ReLU(inplace=True),
28 | )
29 |
30 | def conv1(in_channels,out_channels,stride):
31 | return nn.Sequential(
32 | nn.Conv2d(in_channels,out_channels,3,stride,1,bias=False),
33 | nn.BatchNorm2d(out_channels),
34 | nn.ReLU6(inplace=True),
35 | )
36 |
37 | def conv_pw(in_channels, out_channels):
38 | return nn.Sequential(
39 | nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False),
40 | nn.BatchNorm2d(out_channels),
41 | nn.ReLU6(inplace=True),
42 | )
43 |
44 | def conv_dw_no_bn(in_channels, out_channels, kernel_size=3, padding=1, stride=1, dilation=1):
45 | return nn.Sequential(
46 | nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, dilation=dilation, groups=in_channels, bias=False),
47 | nn.ELU(inplace=True),
48 |
49 | nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False),
50 | nn.ELU(inplace=True),
51 | )
52 |
53 | def conv_bn_act(in_, out_, kernel_size,
54 | stride=1, groups=1, bias=True,
55 | eps=1e-3, momentum=0.01):
56 | return nn.Sequential(
57 | SamePadConv2d(in_, out_, kernel_size, stride, groups=groups, bias=bias),
58 | nn.BatchNorm2d(out_, eps, momentum),
59 | Swish()
60 | )
61 |
62 |
63 | class SamePadConv2d(nn.Conv2d):
64 | """
65 | Conv with TF padding='same'
66 | https://github.com/pytorch/pytorch/issues/3867#issuecomment-349279036
67 | """
68 |
69 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True, padding_mode="zeros"):
70 | super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias, padding_mode)
71 |
72 | def get_pad_odd(self, in_, weight, stride, dilation):
73 | effective_filter_size_rows = (weight - 1) * dilation + 1
74 | out_rows = (in_ + stride - 1) // stride
75 | padding_needed = max(0, (out_rows - 1) * stride + effective_filter_size_rows - in_)
76 | padding_rows = max(0, (out_rows - 1) * stride + (weight - 1) * dilation + 1 - in_)
77 | rows_odd = (padding_rows % 2 != 0)
78 | return padding_rows, rows_odd
79 |
80 | def forward(self, x):
81 | padding_rows, rows_odd = self.get_pad_odd(x.shape[2], self.weight.shape[2], self.stride[0], self.dilation[0])
82 | padding_cols, cols_odd = self.get_pad_odd(x.shape[3], self.weight.shape[3], self.stride[1], self.dilation[1])
83 |
84 | if rows_odd or cols_odd:
85 | x = F.pad(x, [0, int(cols_odd), 0, int(rows_odd)])
86 |
87 | return F.conv2d(x, self.weight, self.bias, self.stride,
88 | padding=(padding_rows // 2, padding_cols // 2),
89 | dilation=self.dilation, groups=self.groups)
90 |
91 |
92 | class Swish(nn.Module):
93 | def forward(self, x):
94 | return x * torch.sigmoid(x)
95 |
96 |
97 | class Flatten(nn.Module):
98 | def forward(self, x):
99 | return x.view(x.shape[0], -1)
100 |
101 |
102 | class SEModule(nn.Module):
103 | def __init__(self, in_, squeeze_ch):
104 | super().__init__()
105 | self.se = nn.Sequential(
106 | nn.AdaptiveAvgPool2d(1),
107 | nn.Conv2d(in_, squeeze_ch, kernel_size=1, stride=1, padding=0, bias=True),
108 | Swish(),
109 | nn.Conv2d(squeeze_ch, in_, kernel_size=1, stride=1, padding=0, bias=True),
110 | )
111 |
112 | def forward(self, x):
113 | return x * torch.sigmoid(self.se(x))
114 |
115 |
116 | class DropConnect(nn.Module):
117 | def __init__(self, ratio):
118 | super().__init__()
119 | self.ratio = 1.0 - ratio
120 |
121 | def forward(self, x):
122 | if not self.training:
123 | return x
124 |
125 | random_tensor = self.ratio
126 | random_tensor += torch.rand([x.shape[0], 1, 1, 1], dtype=torch.float, device=x.device)
127 | random_tensor.requires_grad_(False)
128 | return x / self.ratio * random_tensor.floor()
129 |
130 |
--------------------------------------------------------------------------------
/lib/utils/__pycache__/transforms.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/utils/__pycache__/transforms.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/utils/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/utils/__pycache__/utils.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/utils/__pycache__/vis.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/utils/__pycache__/vis.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/utils/__pycache__/zipreader.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/utils/__pycache__/zipreader.cpython-36.pyc
--------------------------------------------------------------------------------
/lib/utils/transforms.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
5 | # Modified by Bowen Cheng (bcheng9@illinois.edu)
6 | # ------------------------------------------------------------------------------
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 |
12 | import numpy as np
13 | import cv2
14 |
15 |
16 | def flip_back(output_flipped, matched_parts):
17 | '''
18 | ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width)
19 | '''
20 | assert output_flipped.ndim == 4,\
21 | 'output_flipped should be [batch_size, num_joints, height, width]'
22 |
23 | output_flipped = output_flipped[:, :, :, ::-1]
24 |
25 | for pair in matched_parts:
26 | tmp = output_flipped[:, pair[0], :, :].copy()
27 | output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
28 | output_flipped[:, pair[1], :, :] = tmp
29 |
30 | return output_flipped
31 |
32 |
33 | def fliplr_joints(joints, joints_vis, width, matched_parts):
34 | """
35 | flip coords
36 | """
37 | # Flip horizontal
38 | joints[:, 0] = width - joints[:, 0] - 1
39 |
40 | # Change left-right parts
41 | for pair in matched_parts:
42 | joints[pair[0], :], joints[pair[1], :] = \
43 | joints[pair[1], :], joints[pair[0], :].copy()
44 | joints_vis[pair[0], :], joints_vis[pair[1], :] = \
45 | joints_vis[pair[1], :], joints_vis[pair[0], :].copy()
46 |
47 | return joints*joints_vis, joints_vis
48 |
49 |
50 | def transform_preds(coords, center, scale, output_size):
51 | # target_coords = np.zeros(coords.shape)
52 | target_coords = coords.copy()
53 | trans = get_affine_transform(center, scale, 0, output_size, inv=1)
54 | for p in range(coords.shape[0]):
55 | target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
56 | return target_coords
57 |
58 |
59 | def get_affine_transform(center,
60 | scale,
61 | rot,
62 | output_size,
63 | shift=np.array([0, 0], dtype=np.float32),
64 | inv=0):
65 | if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
66 | print(scale)
67 | scale = np.array([scale, scale])
68 |
69 | scale_tmp = scale * 200.0
70 | src_w = scale_tmp[0]
71 | dst_w = output_size[0]
72 | dst_h = output_size[1]
73 |
74 | rot_rad = np.pi * rot / 180
75 | src_dir = get_dir([0, src_w * -0.5], rot_rad)
76 | dst_dir = np.array([0, dst_w * -0.5], np.float32)
77 |
78 | src = np.zeros((3, 2), dtype=np.float32)
79 | dst = np.zeros((3, 2), dtype=np.float32)
80 | src[0, :] = center + scale_tmp * shift
81 | src[1, :] = center + src_dir + scale_tmp * shift
82 | dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
83 | dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
84 |
85 | src[2:, :] = get_3rd_point(src[0, :], src[1, :])
86 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
87 |
88 | if inv:
89 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
90 | else:
91 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
92 |
93 | return trans
94 |
95 |
96 | def affine_transform(pt, t):
97 | new_pt = np.array([pt[0], pt[1], 1.]).T
98 | new_pt = np.dot(t, new_pt)
99 | return new_pt[:2]
100 |
101 |
102 | def get_3rd_point(a, b):
103 | direct = a - b
104 | return b + np.array([-direct[1], direct[0]], dtype=np.float32)
105 |
106 |
107 | def get_dir(src_point, rot_rad):
108 | sn, cs = np.sin(rot_rad), np.cos(rot_rad)
109 |
110 | src_result = [0, 0]
111 | src_result[0] = src_point[0] * cs - src_point[1] * sn
112 | src_result[1] = src_point[0] * sn + src_point[1] * cs
113 |
114 | return src_result
115 |
116 |
117 | def crop(img, center, scale, output_size, rot=0):
118 | trans = get_affine_transform(center, scale, rot, output_size)
119 |
120 | dst_img = cv2.warpAffine(img,
121 | trans,
122 | (int(output_size[0]), int(output_size[1])),
123 | flags=cv2.INTER_LINEAR)
124 |
125 | return dst_img
126 |
127 |
128 | def resize(image, input_size):
129 | h, w, _ = image.shape
130 |
131 | center = np.array([int(w/2.0+0.5), int(h/2.0+0.5)])
132 | if w < h:
133 | w_resized = input_size
134 | h_resized = int((input_size / w * h + 63) // 64 * 64)
135 | scale_w = w / 200.0
136 | scale_h = h_resized / w_resized * w / 200.0
137 | else:
138 | h_resized = input_size
139 | w_resized = int((input_size / h * w + 63) // 64 * 64)
140 | scale_h = h / 200.0
141 | scale_w = w_resized / h_resized * h / 200.0
142 |
143 | scale = np.array([scale_w, scale_h])
144 | trans = get_affine_transform(center, scale, 0, (w_resized, h_resized))
145 |
146 | image_resized = cv2.warpAffine(
147 | image,
148 | trans,
149 | (int(w_resized), int(h_resized))
150 | )
151 |
152 | return image_resized, center, scale
153 |
154 |
155 | def get_multi_scale_size(image, input_size, current_scale, min_scale):
156 | h, w, _ = image.shape
157 | center = np.array([int(w / 2.0 + 0.5), int(h / 2.0 + 0.5)])
158 |
159 | # calculate the size for min_scale
160 | min_input_size = int((min_scale * input_size + 63)//64 * 64)
161 | if w < h:
162 | w_resized = int(min_input_size * current_scale / min_scale)
163 | h_resized = int(
164 | int((min_input_size/w*h+63)//64*64)*current_scale/min_scale
165 | )
166 | scale_w = w / 200.0
167 | scale_h = h_resized / w_resized * w / 200.0
168 | else:
169 | h_resized = int(min_input_size * current_scale / min_scale)
170 | w_resized = int(
171 | int((min_input_size/h*w+63)//64*64)*current_scale/min_scale
172 | )
173 | scale_h = h / 200.0
174 | scale_w = w_resized / h_resized * h / 200.0
175 |
176 | return (w_resized, h_resized), center, np.array([scale_w, scale_h])
177 |
178 |
179 | def resize_align_multi_scale(image, input_size, current_scale, min_scale):
180 | size_resized, center, scale = get_multi_scale_size(
181 | image, input_size, current_scale, min_scale
182 | )
183 | trans = get_affine_transform(center, scale, 0, size_resized)
184 |
185 | image_resized = cv2.warpAffine(
186 | image,
187 | trans,
188 | size_resized
189 | # (int(w_resized), int(h_resized))
190 | )
191 |
192 | return image_resized, center, scale
193 |
194 |
195 | def get_final_preds(grouped_joints, center, scale, heatmap_size):
196 | final_results = []
197 | for person in grouped_joints[0]:
198 | joints = np.zeros((person.shape[0], 3))
199 | joints = transform_preds(person, center, scale, heatmap_size)
200 | final_results.append(joints)
201 |
202 | return final_results
203 |
--------------------------------------------------------------------------------
/lib/utils/utils.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
5 | # ------------------------------------------------------------------------------
6 |
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 |
11 | import os
12 | import logging
13 | import time
14 | from collections import namedtuple
15 | from pathlib import Path
16 |
17 | import torch
18 | import torch.optim as optim
19 | import torch.nn as nn
20 |
21 |
22 | def setup_logger(final_output_dir, rank, phase):
23 | time_str = time.strftime('%Y-%m-%d-%H-%M')
24 | log_file = '{}_{}_rank{}.log'.format(phase, time_str, rank)
25 | final_log_file = os.path.join(final_output_dir, log_file)
26 | head = '%(asctime)-15s %(message)s'
27 | # logging.basicConfig(format=head)
28 | logging.basicConfig(filename=str(final_log_file),
29 | format=head)
30 | logger = logging.getLogger()
31 | logger.setLevel(logging.INFO)
32 | console = logging.StreamHandler()
33 | logging.getLogger('').addHandler(console)
34 |
35 | return logger, time_str
36 |
37 |
38 | def create_logger(cfg, cfg_name, phase='train'):
39 | root_output_dir = Path(cfg.OUTPUT_DIR)
40 | # set up logger
41 | if not root_output_dir.exists() and cfg.RANK == 0:
42 | print('=> creating {}'.format(root_output_dir))
43 | root_output_dir.mkdir()
44 | else:
45 | while not root_output_dir.exists():
46 | print('=> wait for {} created'.format(root_output_dir))
47 | time.sleep(30)
48 |
49 | dataset = cfg.DATASET.DATASET
50 | dataset = dataset.replace(':', '_')
51 | model = cfg.MODEL.NAME
52 | cfg_name = os.path.basename(cfg_name).split('.')[0]
53 |
54 | final_output_dir = root_output_dir / dataset / model / cfg_name
55 |
56 | if cfg.RANK == 0:
57 | print('=> creating {}'.format(final_output_dir))
58 | final_output_dir.mkdir(parents=True, exist_ok=True)
59 | else:
60 | while not final_output_dir.exists():
61 | print('=> wait for {} created'.format(final_output_dir))
62 | time.sleep(5)
63 |
64 | logger, time_str = setup_logger(final_output_dir, cfg.RANK, phase)
65 |
66 | tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / \
67 | (cfg_name + '_' + time_str)
68 |
69 | print('=> creating {}'.format(tensorboard_log_dir))
70 | tensorboard_log_dir.mkdir(parents=True, exist_ok=True)
71 |
72 | return logger, str(final_output_dir), str(tensorboard_log_dir)
73 |
74 |
75 | def get_optimizer(cfg, model):
76 | optimizer = None
77 | if cfg.TRAIN.OPTIMIZER == 'sgd':
78 | optimizer = optim.SGD(
79 | model.parameters(),
80 | lr=cfg.TRAIN.LR,
81 | momentum=cfg.TRAIN.MOMENTUM,
82 | weight_decay=cfg.TRAIN.WD,
83 | nesterov=cfg.TRAIN.NESTEROV
84 | )
85 | elif cfg.TRAIN.OPTIMIZER == 'adam':
86 | optimizer = optim.Adam(
87 | model.parameters(),
88 | lr=cfg.TRAIN.LR
89 | )
90 |
91 | return optimizer
92 |
93 |
94 | def save_checkpoint(states, is_best, output_dir,
95 | filename='checkpoint.pth.tar'):
96 | torch.save(states, os.path.join(output_dir, filename))
97 |
98 | if is_best and 'state_dict' in states:
99 | torch.save(
100 | states['best_state_dict'],
101 | os.path.join(output_dir, 'model_best.pth.tar')
102 | )
103 |
104 |
105 | def get_model_summary(model, *input_tensors, item_length=26, verbose=True):
106 | """
107 | :param model:
108 | :param input_tensors:
109 | :param item_length:
110 | :return:
111 | """
112 |
113 | summary = []
114 |
115 | ModuleDetails = namedtuple(
116 | "Layer", ["name", "input_size", "output_size", "num_parameters", "multiply_adds"])
117 | hooks = []
118 | layer_instances = {}
119 |
120 | def add_hooks(module):
121 |
122 | def hook(module, input, output):
123 | class_name = str(module.__class__.__name__)
124 |
125 | instance_index = 1
126 | if class_name not in layer_instances:
127 | layer_instances[class_name] = instance_index
128 | else:
129 | instance_index = layer_instances[class_name] + 1
130 | layer_instances[class_name] = instance_index
131 |
132 | layer_name = class_name + "_" + str(instance_index)
133 |
134 | params = 0
135 |
136 | if class_name.find("Conv") != -1 or class_name.find("BatchNorm") != -1 or \
137 | class_name.find("Linear") != -1:
138 | for param_ in module.parameters():
139 | params += param_.view(-1).size(0)
140 |
141 | flops = "Not Available"
142 | if class_name.find("Conv") != -1 and hasattr(module, "weight"):
143 | flops = (
144 | torch.prod(
145 | torch.LongTensor(list(module.weight.data.size()))) *
146 | torch.prod(
147 | torch.LongTensor(list(output.size())[2:]))).item()
148 | elif isinstance(module, nn.Linear):
149 | flops = (torch.prod(torch.LongTensor(list(output.size()))) \
150 | * input[0].size(1)).item()
151 |
152 | if isinstance(input[0], list):
153 | input = input[0]
154 | if isinstance(output, list):
155 | output = output[0]
156 |
157 | summary.append(
158 | ModuleDetails(
159 | name=layer_name,
160 | input_size=list(input[0].size()),
161 | output_size=list(output.size()),
162 | num_parameters=params,
163 | multiply_adds=flops)
164 | )
165 |
166 | if not isinstance(module, nn.ModuleList) \
167 | and not isinstance(module, nn.Sequential) \
168 | and module != model:
169 | hooks.append(module.register_forward_hook(hook))
170 |
171 | model.eval()
172 | model.apply(add_hooks)
173 |
174 | space_len = item_length
175 |
176 | model(*input_tensors)
177 | for hook in hooks:
178 | hook.remove()
179 |
180 | details = ''
181 | if verbose:
182 | details = "Model Summary" + \
183 | os.linesep + \
184 | "Name{}Input Size{}Output Size{}Parameters{}Multiply Adds (Flops){}".format(
185 | ' ' * (space_len - len("Name")),
186 | ' ' * (space_len - len("Input Size")),
187 | ' ' * (space_len - len("Output Size")),
188 | ' ' * (space_len - len("Parameters")),
189 | ' ' * (space_len - len("Multiply Adds (Flops)"))) \
190 | + os.linesep + '-' * space_len * 5 + os.linesep
191 | params_sum = 0
192 | flops_sum = 0
193 | for layer in summary:
194 | params_sum += layer.num_parameters
195 | if layer.multiply_adds != "Not Available":
196 | flops_sum += layer.multiply_adds
197 | if verbose:
198 | details += "{}{}{}{}{}{}{}{}{}{}".format(
199 | layer.name,
200 | ' ' * (space_len - len(layer.name)),
201 | layer.input_size,
202 | ' ' * (space_len - len(str(layer.input_size))),
203 | layer.output_size,
204 | ' ' * (space_len - len(str(layer.output_size))),
205 | layer.num_parameters,
206 | ' ' * (space_len - len(str(layer.num_parameters))),
207 | layer.multiply_adds,
208 | ' ' * (space_len - len(str(layer.multiply_adds)))) \
209 | + os.linesep + '-' * space_len * 5 + os.linesep
210 |
211 | details += os.linesep \
212 | + "Total Parameters: {:,}".format(params_sum) \
213 | + os.linesep + '-' * space_len * 5 + os.linesep
214 | details += "Total Multiply Adds (For Convolution and Linear Layers only): {:,}".format(flops_sum) \
215 | + os.linesep + '-' * space_len * 5 + os.linesep
216 | details += "Number of Layers" + os.linesep
217 | for layer in layer_instances:
218 | details += "{} : {} layers ".format(layer, layer_instances[layer])
219 |
220 | return details
221 |
222 |
223 | class AverageMeter(object):
224 | """Computes and stores the average and current value"""
225 | def __init__(self):
226 | self.reset()
227 |
228 | def reset(self):
229 | self.val = 0
230 | self.avg = 0
231 | self.sum = 0
232 | self.count = 0
233 |
234 | def update(self, val, n=1):
235 | self.val = val
236 | self.sum += val * n
237 | self.count += n
238 | self.avg = self.sum / self.count if self.count != 0 else 0
239 |
--------------------------------------------------------------------------------
/lib/utils/vis.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
5 | # Modified by Bowen Cheng (bcheng9@illinois.edu)
6 | # ------------------------------------------------------------------------------
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 |
12 | import math
13 |
14 | import cv2
15 | import numpy as np
16 | import torchvision
17 |
18 | from dataset import VIS_CONFIG
19 |
20 |
21 | def add_joints(image, joints, color, dataset='COCO'):
22 | part_idx = VIS_CONFIG[dataset]['part_idx']
23 | part_orders = VIS_CONFIG[dataset]['part_orders']
24 |
25 | def link(a, b, color):
26 | if part_idx[a] < joints.shape[0] and part_idx[b] < joints.shape[0]:
27 | jointa = joints[part_idx[a]]
28 | jointb = joints[part_idx[b]]
29 | if jointa[2] > 0 and jointb[2] > 0:
30 | cv2.line(
31 | image,
32 | (int(jointa[0]), int(jointa[1])),
33 | (int(jointb[0]), int(jointb[1])),
34 | color,
35 | 2
36 | )
37 |
38 | # add joints
39 | for joint in joints:
40 | if joint[2] > 0:
41 | cv2.circle(image, (int(joint[0]), int(joint[1])), 1, color, 2)
42 |
43 | # add link
44 | for pair in part_orders:
45 | link(pair[0], pair[1], color)
46 |
47 | return image
48 |
49 |
50 | def save_valid_image(image, joints, file_name, dataset='COCO'):
51 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
52 |
53 | for person in joints:
54 | color = np.random.randint(0, 255, size=3)
55 | color = [int(i) for i in color]
56 | add_joints(image, person, color, dataset=dataset)
57 |
58 | cv2.imwrite(file_name, image)
59 |
60 |
61 | def make_heatmaps(image, heatmaps):
62 | heatmaps = heatmaps.mul(255)\
63 | .clamp(0, 255)\
64 | .byte()\
65 | .cpu().numpy()
66 |
67 | num_joints, height, width = heatmaps.shape
68 | image_resized = cv2.resize(image, (int(width), int(height)))
69 |
70 | image_grid = np.zeros((height, (num_joints+1)*width, 3), dtype=np.uint8)
71 |
72 | for j in range(num_joints):
73 | # add_joints(image_resized, joints[:, j, :])
74 | heatmap = heatmaps[j, :, :]
75 | colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
76 | image_fused = colored_heatmap*0.7 + image_resized*0.3
77 |
78 | width_begin = width * (j+1)
79 | width_end = width * (j+2)
80 | image_grid[:, width_begin:width_end, :] = image_fused
81 |
82 | image_grid[:, 0:width, :] = image_resized
83 |
84 | return image_grid
85 |
86 |
87 | def make_tagmaps(image, tagmaps):
88 | num_joints, height, width = tagmaps.shape
89 | image_resized = cv2.resize(image, (int(width), int(height)))
90 |
91 | image_grid = np.zeros((height, (num_joints+1)*width, 3), dtype=np.uint8)
92 |
93 | for j in range(num_joints):
94 | tagmap = tagmaps[j, :, :]
95 | min = float(tagmap.min())
96 | max = float(tagmap.max())
97 | tagmap = tagmap.add(-min)\
98 | .div(max - min + 1e-5)\
99 | .mul(255)\
100 | .clamp(0, 255)\
101 | .byte()\
102 | .cpu()\
103 | .numpy()
104 |
105 | colored_tagmap = cv2.applyColorMap(tagmap, cv2.COLORMAP_JET)
106 | image_fused = colored_tagmap*0.9 + image_resized*0.1
107 |
108 | width_begin = width * (j+1)
109 | width_end = width * (j+2)
110 | image_grid[:, width_begin:width_end, :] = image_fused
111 |
112 | image_grid[:, 0:width, :] = image_resized
113 |
114 | return image_grid
115 |
116 |
117 | def save_batch_image_with_joints(batch_image, batch_joints, batch_joints_vis,
118 | file_name, nrow=8, padding=2):
119 | '''
120 | batch_image: [batch_size, channel, height, width]
121 | batch_joints: [batch_size, num_joints, 3],
122 | batch_joints_vis: [batch_size, num_joints, 1],
123 | }
124 | '''
125 | grid = torchvision.utils.make_grid(batch_image, nrow, padding, True)
126 | ndarr = grid.mul(255).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy()
127 | ndarr = cv2.cvtColor(ndarr, cv2.COLOR_RGB2BGR)
128 |
129 | nmaps = batch_image.size(0)
130 | xmaps = min(nrow, nmaps)
131 | ymaps = int(math.ceil(float(nmaps) / xmaps))
132 | height = int(batch_image.size(2) + padding)
133 | width = int(batch_image.size(3) + padding)
134 | k = 0
135 | for y in range(ymaps):
136 | for x in range(xmaps):
137 | if k >= nmaps:
138 | break
139 | joints = batch_joints[k]
140 | joints_vis = batch_joints_vis[k]
141 |
142 | for joint, joint_vis in zip(joints, joints_vis):
143 | joint[0] = x * width + padding + joint[0]
144 | joint[1] = y * height + padding + joint[1]
145 | if joint_vis[0]:
146 | cv2.circle(
147 | ndarr,
148 | (int(joint[0]), int(joint[1])),
149 | 2,
150 | [255, 0, 0],
151 | 2
152 | )
153 | k = k + 1
154 | cv2.imwrite(file_name, ndarr)
155 |
156 |
157 | def save_batch_maps(
158 | batch_image,
159 | batch_maps,
160 | batch_mask,
161 | file_name,
162 | map_type='heatmap',
163 | normalize=True
164 | ):
165 | if normalize:
166 | batch_image = batch_image.clone()
167 | min = float(batch_image.min())
168 | max = float(batch_image.max())
169 |
170 | batch_image.add_(-min).div_(max - min + 1e-5)
171 |
172 | batch_size = batch_maps.size(0)
173 | num_joints = batch_maps.size(1)
174 | map_height = batch_maps.size(2)
175 | map_width = batch_maps.size(3)
176 |
177 | grid_image = np.zeros(
178 | (batch_size*map_height, (num_joints+1)*map_width, 3),
179 | dtype=np.uint8
180 | )
181 |
182 | for i in range(batch_size):
183 | image = batch_image[i].mul(255)\
184 | .clamp(0, 255)\
185 | .byte()\
186 | .permute(1, 2, 0)\
187 | .cpu().numpy()
188 |
189 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
190 | maps = batch_maps[i]
191 |
192 | if map_type == 'heatmap':
193 | image_with_hms = make_heatmaps(image, maps)
194 | elif map_type == 'tagmap':
195 | image_with_hms = make_tagmaps(image, maps)
196 |
197 | height_begin = map_height * i
198 | height_end = map_height * (i + 1)
199 |
200 | grid_image[height_begin:height_end, :, :] = image_with_hms
201 | if batch_mask is not None:
202 | mask = np.expand_dims(batch_mask[i].byte().cpu().numpy(), -1)
203 | grid_image[height_begin:height_end, :map_width, :] = \
204 | grid_image[height_begin:height_end, :map_width, :] * mask
205 |
206 | cv2.imwrite(file_name, grid_image)
207 |
208 |
209 | def save_debug_images(
210 | config,
211 | batch_images,
212 | batch_heatmaps,
213 | batch_masks,
214 | batch_outputs,
215 | prefix
216 | ):
217 | if not config.DEBUG.DEBUG:
218 | return
219 |
220 | num_joints = config.DATASET.NUM_JOINTS
221 | batch_pred_heatmaps = batch_outputs[:, :num_joints, :, :]
222 | batch_pred_tagmaps = batch_outputs[:, num_joints:, :, :]
223 |
224 | if config.DEBUG.SAVE_HEATMAPS_GT and batch_heatmaps is not None:
225 | file_name = '{}_hm_gt.jpg'.format(prefix)
226 | save_batch_maps(
227 | batch_images, batch_heatmaps, batch_masks, file_name, 'heatmap'
228 | )
229 | if config.DEBUG.SAVE_HEATMAPS_PRED:
230 | file_name = '{}_hm_pred.jpg'.format(prefix)
231 | save_batch_maps(
232 | batch_images, batch_pred_heatmaps, batch_masks, file_name, 'heatmap'
233 | )
234 | if config.DEBUG.SAVE_TAGMAPS_PRED:
235 | file_name = '{}_tag_pred.jpg'.format(prefix)
236 | save_batch_maps(
237 | batch_images, batch_pred_tagmaps, batch_masks, file_name, 'tagmap'
238 | )
239 |
--------------------------------------------------------------------------------
/lib/utils/zipreader.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
5 | # ------------------------------------------------------------------------------
6 |
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 |
11 | import os
12 | import zipfile
13 | import xml.etree.ElementTree as ET
14 |
15 | import cv2
16 | import numpy as np
17 |
18 | _im_zfile = []
19 | _xml_path_zip = []
20 | _xml_zfile = []
21 |
22 |
23 | def imread(filename, flags=cv2.IMREAD_COLOR):
24 | global _im_zfile
25 | path = filename
26 | pos_at = path.index('@')
27 | if pos_at == -1:
28 | print("character '@' is not found from the given path '%s'"%(path))
29 | assert 0
30 | path_zip = path[0: pos_at]
31 | path_img = path[pos_at + 1:]
32 | if not os.path.isfile(path_zip):
33 | print("zip file '%s' is not found"%(path_zip))
34 | assert 0
35 | for i in range(len(_im_zfile)):
36 | if _im_zfile[i]['path'] == path_zip:
37 | data = _im_zfile[i]['zipfile'].read(path_img)
38 | return cv2.imdecode(np.frombuffer(data, np.uint8), flags)
39 |
40 | _im_zfile.append({
41 | 'path': path_zip,
42 | 'zipfile': zipfile.ZipFile(path_zip, 'r')
43 | })
44 | data = _im_zfile[-1]['zipfile'].read(path_img)
45 |
46 | return cv2.imdecode(np.frombuffer(data, np.uint8), flags)
47 |
48 |
49 | def xmlread(filename):
50 | global _xml_path_zip
51 | global _xml_zfile
52 | path = filename
53 | pos_at = path.index('@')
54 | if pos_at == -1:
55 | print("character '@' is not found from the given path '%s'"%(path))
56 | assert 0
57 | path_zip = path[0: pos_at]
58 | path_xml = path[pos_at + 2:]
59 | if not os.path.isfile(path_zip):
60 | print("zip file '%s' is not found"%(path_zip))
61 | assert 0
62 | for i in xrange(len(_xml_path_zip)):
63 | if _xml_path_zip[i] == path_zip:
64 | data = _xml_zfile[i].open(path_xml)
65 | return ET.fromstring(data.read())
66 | _xml_path_zip.append(path_zip)
67 | print("read new xml file '%s'"%(path_zip))
68 | _xml_zfile.append(zipfile.ZipFile(path_zip, 'r'))
69 | data = _xml_zfile[-1].open(path_xml)
70 | return ET.fromstring(data.read())
71 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pyyaml
2 | scipy
3 | EasyDict==1.7
4 | opencv-python
5 | Cython
6 | pandas
7 | json_tricks
8 | scikit-image
9 | tensorboardX
10 | yacs
11 | cffi
12 | munkres
13 | tqdm
--------------------------------------------------------------------------------
/tools/__pycache__/_init_paths.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/tools/__pycache__/_init_paths.cpython-36.pyc
--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
5 | # ------------------------------------------------------------------------------
6 |
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 |
11 | import os.path as osp
12 | import sys
13 |
14 |
15 | def add_path(path):
16 | if path not in sys.path:
17 | sys.path.insert(0, path)
18 |
19 |
20 | this_dir = osp.dirname(__file__)
21 |
22 | lib_path = osp.join(this_dir, '..', 'lib')
23 | add_path(lib_path)
24 |
--------------------------------------------------------------------------------
/tools/crowdpose_concat_train_val.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bowen Cheng (bcheng9@illinois.edu)
5 | # ------------------------------------------------------------------------------
6 |
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 |
11 | import argparse
12 | import json
13 | import os
14 |
15 |
16 | def parse_args():
17 | parser = argparse.ArgumentParser(description='Concat CrowdPose train and val')
18 |
19 | parser.add_argument('--data_dir',
20 | help='data directory containing json annotation file',
21 | default='data/crowd_pose/json',
22 | type=str)
23 |
24 | args = parser.parse_args()
25 |
26 | return args
27 |
28 |
29 | def main():
30 | args = parse_args()
31 |
32 | train_dataset = json.load(open(os.path.join(args.data_dir, 'crowdpose_train.json')))
33 | val_dataset = json.load(open(os.path.join(args.data_dir, 'crowdpose_val.json')))
34 |
35 | trainval_dataset = {}
36 | trainval_dataset['categories'] = train_dataset['categories']
37 | trainval_dataset['images'] = []
38 | trainval_dataset['images'].extend(train_dataset['images'])
39 | trainval_dataset['images'].extend(val_dataset['images'])
40 | trainval_dataset['annotations'] = []
41 | trainval_dataset['annotations'].extend(train_dataset['annotations'])
42 | trainval_dataset['annotations'].extend(val_dataset['annotations'])
43 |
44 | with open(os.path.join(args.data_dir, 'crowdpose_trainval.json'), 'w') as f:
45 | json.dump(trainval_dataset, f)
46 |
47 |
48 | if __name__ == '__main__':
49 | main()
50 |
--------------------------------------------------------------------------------
/tools/dist_train.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
5 | # Modified by Bowen Cheng (bcheng9@illinois.edu)
6 | # ------------------------------------------------------------------------------
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 |
12 | import argparse
13 | import os
14 | import pprint
15 | import shutil
16 | import warnings
17 |
18 | import torch
19 | import torch.backends.cudnn as cudnn
20 | import torch.distributed as dist
21 | import torch.multiprocessing as mp
22 | import torch.nn as nn
23 | import torch.nn.parallel
24 | import torch.optim
25 | import torch.utils.data
26 | import torch.utils.data.distributed
27 | from tensorboardX import SummaryWriter
28 |
29 | import _init_paths
30 | import models
31 |
32 | from config import cfg
33 | from config import update_config
34 | from core.loss import MultiLossFactory
35 | from core.trainer import do_train
36 | from dataset import make_dataloader
37 | from fp16_utils.fp16util import network_to_half
38 | from fp16_utils.fp16_optimizer import FP16_Optimizer
39 | from utils.utils import create_logger
40 | from utils.utils import get_optimizer
41 | from utils.utils import save_checkpoint
42 | from utils.utils import setup_logger
43 | import re
44 | #import hickle as hkl
45 | import torch
46 | import torch.nn.functional as F
47 | from torch.autograd import Variable
48 | #from torchviz import make_dot
49 |
50 | #os.environ["CUDA_VISIBLE_DEVICES"]="0"
51 | #os.environ['MASTER_PORT'] = '2000901'
52 |
53 |
54 | def parse_args():
55 | parser = argparse.ArgumentParser(description='Train keypoints network')
56 | # general
57 | parser.add_argument('--cfg',
58 | help='experiment configure file name',
59 | required=True,
60 | type=str)
61 |
62 | parser.add_argument('opts',
63 | help="Modify config options using the command-line",
64 | default=None,
65 | nargs=argparse.REMAINDER)
66 |
67 | # distributed training
68 | parser.add_argument('--gpu',
69 | help='gpu id for multiprocessing training',
70 | type=str)
71 | parser.add_argument('--world-size',
72 | default=1,
73 | type=int,
74 | help='number of nodes for distributed training')
75 | parser.add_argument('--dist-url',
76 | default='tcp://127.0.0.1:24456',
77 | type=str,
78 | help='url used to set up distributed training')
79 | parser.add_argument('--rank',
80 | default=0,
81 | type=int,
82 | help='node rank for distributed training')
83 |
84 | args = parser.parse_args()
85 |
86 | return args
87 |
88 |
89 | def main():
90 | args = parse_args()
91 | update_config(cfg, args)
92 |
93 | cfg.defrost()
94 | cfg.RANK = args.rank
95 | cfg.freeze()
96 |
97 | logger, final_output_dir, tb_log_dir = create_logger(
98 | cfg, args.cfg, 'train'
99 | )
100 |
101 | logger.info(pprint.pformat(args))
102 | logger.info(cfg)
103 |
104 | if args.gpu is not None:
105 | warnings.warn('You have chosen a specific GPU. This will completely '
106 | 'disable data parallelism.')
107 |
108 | if args.dist_url == "env://" and args.world_size == -1:
109 | args.world_size = int(os.environ["WORLD_SIZE"])
110 |
111 | args.distributed = args.world_size > 1 or cfg.MULTIPROCESSING_DISTRIBUTED
112 |
113 | ngpus_per_node = torch.cuda.device_count()
114 | if cfg.MULTIPROCESSING_DISTRIBUTED:
115 | # Since we have ngpus_per_node processes per node, the total world_size
116 | # needs to be adjusted accordingly
117 | args.world_size = ngpus_per_node * args.world_size
118 | # Use torch.multiprocessing.spawn to launch distributed processes: the
119 | # main_worker process function
120 | mp.spawn(
121 | main_worker,
122 | nprocs=ngpus_per_node,
123 | args=(ngpus_per_node, args, final_output_dir, tb_log_dir)
124 | )
125 | else:
126 | # Simply call main_worker function
127 | main_worker(
128 | ','.join([str(i) for i in cfg.GPUS]),
129 | ngpus_per_node,
130 | args,
131 | final_output_dir,
132 | tb_log_dir
133 | )
134 |
135 |
136 | def main_worker(
137 | gpu, ngpus_per_node, args, final_output_dir, tb_log_dir
138 | ):
139 | # cudnn related setting
140 | cudnn.benchmark = cfg.CUDNN.BENCHMARK
141 | torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
142 | torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
143 |
144 | if cfg.FP16.ENABLED:
145 | assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled."
146 |
147 | if cfg.FP16.STATIC_LOSS_SCALE != 1.0:
148 | if not cfg.FP16.ENABLED:
149 | print("Warning: if --fp16 is not used, static_loss_scale will be ignored.")
150 |
151 | args.gpu = gpu
152 |
153 | if args.gpu is not None:
154 | print("Use GPU: {} for training".format(args.gpu))
155 |
156 | if args.distributed:
157 | if args.dist_url == "env://" and args.rank == -1:
158 | args.rank = int(os.environ["RANK"])
159 | if cfg.MULTIPROCESSING_DISTRIBUTED:
160 | # For multiprocessing distributed training, rank needs to be the
161 | # global rank among all the processes
162 | args.rank = args.rank * ngpus_per_node + gpu
163 | print('Init process group: dist_url: {}, world_size: {}, rank: {}'.
164 | format(args.dist_url, args.world_size, args.rank))
165 | dist.init_process_group(
166 | backend=cfg.DIST_BACKEND,
167 | init_method=args.dist_url,
168 | world_size=args.world_size,
169 | rank=args.rank,
170 | )
171 |
172 | update_config(cfg, args)
173 |
174 | # setup logger
175 | logger, _ = setup_logger(final_output_dir, args.rank, 'train')
176 |
177 | model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
178 | cfg, is_train=True
179 | )
180 |
181 | # copy model file
182 | if not cfg.MULTIPROCESSING_DISTRIBUTED or (
183 | cfg.MULTIPROCESSING_DISTRIBUTED
184 | and args.rank % ngpus_per_node == 0
185 | ):
186 | this_dir = os.path.dirname(__file__)
187 | shutil.copy2(
188 | os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
189 | final_output_dir
190 | )
191 |
192 | writer_dict = {
193 | 'writer': SummaryWriter(log_dir=tb_log_dir),
194 | 'train_global_steps': 0,
195 | 'valid_global_steps': 0,
196 | }
197 |
198 | if not cfg.MULTIPROCESSING_DISTRIBUTED or (
199 | cfg.MULTIPROCESSING_DISTRIBUTED
200 | and args.rank % ngpus_per_node == 0
201 | ):
202 | dump_input = torch.rand(
203 | (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE)
204 | )
205 | #writer_dict['writer'].add_graph(model, (dump_input, ))
206 | # logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))
207 |
208 | if cfg.FP16.ENABLED:
209 | model = network_to_half(model)
210 |
211 | if cfg.MODEL.SYNC_BN and not args.distributed:
212 | print('Warning: Sync BatchNorm is only supported in distributed training.')
213 |
214 | if args.distributed:
215 | if cfg.MODEL.SYNC_BN:
216 | model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
217 | # For multiprocessing distributed, DistributedDataParallel constructor
218 | # should always set the single device scope, otherwise,
219 | # DistributedDataParallel will use all available devices.
220 | if args.gpu is not None:
221 | torch.cuda.set_device(args.gpu)
222 | model.cuda(args.gpu)
223 | # When using a single GPU per process and per
224 | # DistributedDataParallel, we need to divide the batch size
225 | # ourselves based on the total number of GPUs we have
226 | # args.workers = int(args.workers / ngpus_per_node)
227 | model = torch.nn.parallel.DistributedDataParallel(
228 | model, device_ids=[args.gpu], find_unused_parameters=True,
229 | )
230 | else:
231 | model.cuda()
232 | # DistributedDataParallel will divide and allocate batch_size to all
233 | # available GPUs if device_ids are not set
234 | model = torch.nn.parallel.DistributedDataParallel(model)
235 | elif args.gpu is not None:
236 | torch.cuda.set_device(args.gpu)
237 | model = model.cuda(args.gpu)
238 | else:
239 | model = torch.nn.DataParallel(model).cuda()
240 |
241 | # define loss function (criterion) and optimizer
242 | loss_factory = MultiLossFactory(cfg).cuda()
243 |
244 | # Data loading code
245 | train_loader = make_dataloader(
246 | cfg, is_train=True, distributed=args.distributed
247 | )
248 | logger.info(train_loader.dataset)
249 |
250 | best_perf = -1
251 | best_model = False
252 | last_epoch = -1
253 | optimizer = get_optimizer(cfg, model)
254 |
255 | if cfg.FP16.ENABLED:
256 | optimizer = FP16_Optimizer(
257 | optimizer,
258 | static_loss_scale=cfg.FP16.STATIC_LOSS_SCALE,
259 | dynamic_loss_scale=cfg.FP16.DYNAMIC_LOSS_SCALE
260 | )
261 |
262 | begin_epoch = cfg.TRAIN.BEGIN_EPOCH
263 | checkpoint_file = os.path.join(
264 | final_output_dir, 'checkpoint.pth.tar')
265 | if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
266 | logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
267 | checkpoint = torch.load(checkpoint_file, map_location='cpu')
268 | begin_epoch = checkpoint['epoch']
269 | best_perf = checkpoint['perf']
270 | last_epoch = checkpoint['epoch']
271 | model.load_state_dict(checkpoint['state_dict'])
272 |
273 | optimizer.load_state_dict(checkpoint['optimizer'])
274 | logger.info("=> loaded checkpoint '{}' (epoch {})".format(
275 | checkpoint_file, checkpoint['epoch']))
276 |
277 | if cfg.FP16.ENABLED:
278 | lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
279 | optimizer.optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR,
280 | last_epoch=last_epoch
281 | )
282 | '''
283 | else:
284 | lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=1, T_mult=2, eta_min=0.00001, last_epoch=last_epoch)
285 | '''
286 | else:
287 | lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
288 | optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR,
289 | last_epoch=last_epoch)
290 |
291 | #)#inputs = torch.randn(1,3,672,672)
292 | #y = model(Variable(inputs))
293 | #print(y)
294 | for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
295 | # train one epoch
296 | do_train(cfg, model, train_loader, loss_factory, optimizer, epoch,
297 | final_output_dir, tb_log_dir, writer_dict, fp16=cfg.FP16.ENABLED)
298 |
299 | # In PyTorch 1.1.0 and later, you should call `lr_scheduler.step()` after `optimizer.step()`.
300 | lr_scheduler.step()
301 |
302 | perf_indicator = epoch
303 | if perf_indicator >= best_perf:
304 | best_perf = perf_indicator
305 | best_model = True
306 | else:
307 | best_model = False
308 |
309 | if not cfg.MULTIPROCESSING_DISTRIBUTED or (
310 | cfg.MULTIPROCESSING_DISTRIBUTED
311 | and args.rank == 0
312 | ):
313 | logger.info('=> saving checkpoint to {}'.format(final_output_dir))
314 | save_checkpoint({
315 | 'epoch': epoch + 1,
316 | 'model': cfg.MODEL.NAME,
317 | 'state_dict': model.state_dict(),
318 | 'best_state_dict': model.module.state_dict(),
319 | 'perf': perf_indicator,
320 | 'optimizer': optimizer.state_dict(),
321 | }, best_model, final_output_dir)
322 |
323 | final_model_state_file = os.path.join(
324 | final_output_dir, 'final_state{}.pth.tar'.format(gpu)
325 | )
326 |
327 | logger.info('saving final model state to {}'.format(
328 | final_model_state_file))
329 | torch.save(model.module.state_dict(), final_model_state_file)
330 | writer_dict['writer'].close()
331 |
332 |
333 | if __name__ == '__main__':
334 | main()
335 |
--------------------------------------------------------------------------------
/tools/valid.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
5 | # Modified by Bowen Cheng (bcheng9@illinois.edu)
6 | # ------------------------------------------------------------------------------
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 |
12 | import argparse
13 | import os
14 | import pprint
15 |
16 | import torch
17 | import torch.backends.cudnn as cudnn
18 | import torch.nn.parallel
19 | import torch.optim
20 | import torch.utils.data
21 | import torch.utils.data.distributed
22 | import torchvision.transforms
23 | import torch.multiprocessing
24 | from tqdm import tqdm
25 |
26 | import _init_paths
27 | import models
28 |
29 | from config import cfg
30 | from config import check_config
31 | from config import update_config
32 | from core.inference import get_multi_stage_outputs
33 | from core.inference import aggregate_results
34 | from core.group import HeatmapParser
35 | from dataset import make_test_dataloader
36 | from fp16_utils.fp16util import network_to_half
37 | from utils.utils import create_logger
38 | from utils.utils import get_model_summary
39 | from utils.vis import save_debug_images
40 | from utils.vis import save_valid_image
41 | from utils.transforms import resize_align_multi_scale
42 | from utils.transforms import get_final_preds
43 | from utils.transforms import get_multi_scale_size
44 |
45 | torch.multiprocessing.set_sharing_strategy('file_system')
46 |
47 |
48 | def parse_args():
49 | parser = argparse.ArgumentParser(description='Test keypoints network')
50 | # general
51 | parser.add_argument('--cfg',
52 | help='experiment configure file name',
53 | required=True,
54 | type=str)
55 |
56 | parser.add_argument('opts',
57 | help="Modify config options using the command-line",
58 | default=None,
59 | nargs=argparse.REMAINDER)
60 |
61 | args = parser.parse_args()
62 |
63 | return args
64 |
65 |
66 | # markdown format output
67 | def _print_name_value(logger, name_value, full_arch_name):
68 | names = name_value.keys()
69 | values = name_value.values()
70 | num_values = len(name_value)
71 | logger.info(
72 | '| Arch ' +
73 | ' '.join(['| {}'.format(name) for name in names]) +
74 | ' |'
75 | )
76 | logger.info('|---' * (num_values+1) + '|')
77 |
78 | if len(full_arch_name) > 15:
79 | full_arch_name = full_arch_name[:8] + '...'
80 | logger.info(
81 | '| ' + full_arch_name + ' ' +
82 | ' '.join(['| {:.3f}'.format(value) for value in values]) +
83 | ' |'
84 | )
85 |
86 |
87 | def main():
88 | args = parse_args()
89 | update_config(cfg, args)
90 | check_config(cfg)
91 |
92 | logger, final_output_dir, tb_log_dir = create_logger(
93 | cfg, args.cfg, 'valid'
94 | )
95 |
96 | logger.info(pprint.pformat(args))
97 | logger.info(cfg)
98 |
99 | # cudnn related setting
100 | cudnn.benchmark = cfg.CUDNN.BENCHMARK
101 | torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
102 | torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
103 |
104 | model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
105 | cfg, is_train=False
106 | )
107 |
108 | dump_input = torch.rand(
109 | (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE)
110 | )
111 | logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))
112 |
113 | if cfg.FP16.ENABLED:
114 | model = network_to_half(model)
115 |
116 | if cfg.TEST.MODEL_FILE:
117 | logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
118 | model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True)
119 | else:
120 | model_state_file = os.path.join(
121 | final_output_dir, 'model_best.pth.tar'
122 | )
123 | logger.info('=> loading model from {}'.format(model_state_file))
124 | model.load_state_dict(torch.load(model_state_file))
125 |
126 | model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
127 | model.eval()
128 |
129 | data_loader, test_dataset = make_test_dataloader(cfg)
130 |
131 | if cfg.MODEL.NAME == 'pose_hourglass':
132 | transforms = torchvision.transforms.Compose(
133 | [
134 | torchvision.transforms.ToTensor(),
135 | ]
136 | )
137 | else:
138 | transforms = torchvision.transforms.Compose(
139 | [
140 | torchvision.transforms.ToTensor(),
141 | torchvision.transforms.Normalize(
142 | mean=[0.485, 0.456, 0.406],
143 | std=[0.229, 0.224, 0.225]
144 | )
145 | ]
146 | )
147 |
148 | parser = HeatmapParser(cfg)
149 | all_preds = []
150 | all_scores = []
151 |
152 | pbar = tqdm(total=len(test_dataset)) if cfg.TEST.LOG_PROGRESS else None
153 | for i, (images, annos) in enumerate(data_loader):
154 | assert 1 == images.size(0), 'Test batch size should be 1'
155 |
156 | image = images[0].cpu().numpy()
157 | # size at scale 1.0
158 | base_size, center, scale = get_multi_scale_size(
159 | image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR)
160 | )
161 |
162 | with torch.no_grad():
163 | final_heatmaps = None
164 | tags_list = []
165 | for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR, reverse=True)):
166 | input_size = cfg.DATASET.INPUT_SIZE
167 | image_resized, center, scale = resize_align_multi_scale(
168 | image, input_size, s, min(cfg.TEST.SCALE_FACTOR)
169 | )
170 | image_resized = transforms(image_resized)
171 | image_resized = image_resized.unsqueeze(0).cuda()
172 |
173 | outputs, heatmaps, tags = get_multi_stage_outputs(
174 | cfg, model, image_resized, cfg.TEST.FLIP_TEST,
175 | cfg.TEST.PROJECT2IMAGE, base_size
176 | )
177 |
178 | final_heatmaps, tags_list = aggregate_results(
179 | cfg, s, final_heatmaps, tags_list, heatmaps, tags
180 | )
181 |
182 | final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR))
183 | tags = torch.cat(tags_list, dim=4)
184 | grouped, scores = parser.parse(
185 | final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE
186 | )
187 |
188 | final_results = get_final_preds(
189 | grouped, center, scale,
190 | [final_heatmaps.size(3), final_heatmaps.size(2)]
191 | )
192 |
193 | if cfg.TEST.LOG_PROGRESS:
194 | pbar.update()
195 |
196 | if i % cfg.PRINT_FREQ == 0:
197 | prefix = '{}_{}'.format(os.path.join(final_output_dir, 'result_valid'), i)
198 | # logger.info('=> write {}'.format(prefix))
199 | save_valid_image(image, final_results, '{}.jpg'.format(prefix), dataset=test_dataset.name)
200 | # save_debug_images(cfg, image_resized, None, None, outputs, prefix)
201 |
202 | all_preds.append(final_results)
203 | all_scores.append(scores)
204 |
205 | if cfg.TEST.LOG_PROGRESS:
206 | pbar.close()
207 |
208 | name_values, _ = test_dataset.evaluate(
209 | cfg, all_preds, all_scores, final_output_dir
210 | )
211 |
212 | if isinstance(name_values, list):
213 | for name_value in name_values:
214 | _print_name_value(logger, name_value, cfg.MODEL.NAME)
215 | else:
216 | _print_name_value(logger, name_values, cfg.MODEL.NAME)
217 |
218 |
219 | if __name__ == '__main__':
220 | main()
221 |
--------------------------------------------------------------------------------