├── Example Configs ├── H-1.yaml ├── H-2.yaml ├── H-3.yaml ├── H-4.yaml └── H0.yaml ├── LICENSE ├── README.md ├── experiments ├── coco │ └── higher_hrnet │ │ ├── frozen.yaml │ │ └── unfrozen.yaml └── crowd_pose │ └── higher_hrnet │ ├── w32_512_adam_lr1e-3.yaml │ ├── w32_512_adam_lr1e-3_coco.yaml │ ├── w32_512_adam_lr1e-3_syncbn.yaml │ ├── w32_640_adam_lr1e-3.yaml │ └── w48_640_adam_lr1e-3.yaml ├── lib ├── config │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── default.cpython-36.pyc │ │ └── models.cpython-36.pyc │ ├── default.py │ └── models.py ├── core │ ├── __pycache__ │ │ ├── group.cpython-36.pyc │ │ ├── inference.cpython-36.pyc │ │ ├── loss.cpython-36.pyc │ │ └── trainer.cpython-36.pyc │ ├── group.py │ ├── inference.py │ ├── loss.py │ └── trainer.py ├── dataset │ ├── COCODataset.py │ ├── COCOKeypoints.py │ ├── CrowdPoseDataset.py │ ├── CrowdPoseKeypoints.py │ ├── __init__.py │ ├── __pycache__ │ │ ├── COCODataset.cpython-36.pyc │ │ ├── COCOKeypoints.cpython-36.pyc │ │ ├── CrowdPoseDataset.cpython-36.pyc │ │ ├── CrowdPoseKeypoints.cpython-36.pyc │ │ ├── __init__.cpython-36.pyc │ │ └── build.cpython-36.pyc │ ├── build.py │ ├── target_generators │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ └── target_generators.cpython-36.pyc │ │ └── target_generators.py │ └── transforms │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── build.cpython-36.pyc │ │ └── transforms.cpython-36.pyc │ │ ├── build.py │ │ └── transforms.py ├── fp16_utils │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── fp16_optimizer.cpython-36.pyc │ │ ├── fp16util.cpython-36.pyc │ │ └── loss_scaler.cpython-36.pyc │ ├── fp16_optimizer.py │ ├── fp16util.py │ └── loss_scaler.py ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── efficientnet_blocks.cpython-36.pyc │ │ ├── pose_efficientnet.cpython-36.pyc │ │ ├── pose_efficientnet_all.cpython-36.pyc │ │ └── pose_higher_hrnet.cpython-36.pyc │ ├── efficientnet_blocks.py │ └── pose_higher_hrnet.py └── utils │ ├── __pycache__ │ ├── transforms.cpython-36.pyc │ ├── utils.cpython-36.pyc │ ├── vis.cpython-36.pyc │ └── zipreader.cpython-36.pyc │ ├── transforms.py │ ├── utils.py │ ├── vis.py │ └── zipreader.py ├── requirements.txt └── tools ├── __pycache__ └── _init_paths.cpython-36.pyc ├── _init_paths.py ├── crowdpose_concat_train_val.py ├── dist_train.py └── valid.py /Example Configs/H-1.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output_H-1 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: True 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: coco_kpt 14 | DATASET_TEST: coco 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 480 18 | OUTPUT_SIZE: [120, 240] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 17 26 | ROOT: 'data/coco' #Dataset Root Folder 27 | TEST: val2017 28 | TRAIN: train2017 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | STEM_INPLANES: 64 49 | STAGE2: 50 | NUM_MODULES: 1 51 | NUM_BRANCHES: 2 52 | BLOCK: BASIC 53 | NUM_BLOCKS: 54 | - 2 55 | - 2 56 | NUM_CHANNELS: 57 | - 32 58 | - 64 59 | FUSE_METHOD: SUM 60 | STAGE3: 61 | NUM_MODULES: 3 62 | NUM_BRANCHES: 3 63 | BLOCK: BASIC 64 | NUM_BLOCKS: 65 | - 2 66 | - 2 67 | - 2 68 | NUM_CHANNELS: 69 | - 32 70 | - 64 71 | - 128 72 | FUSE_METHOD: SUM 73 | STAGE4: 74 | NUM_MODULES: 3 75 | NUM_BRANCHES: 4 76 | BLOCK: BASIC 77 | NUM_BLOCKS: 78 | - 2 79 | - 2 80 | - 2 81 | - 2 82 | NUM_CHANNELS: 83 | - 32 84 | - 64 85 | - 128 86 | - 256 87 | FUSE_METHOD: SUM 88 | DECONV: 89 | NUM_DECONVS: 1 90 | NUM_CHANNELS: 91 | - 32 92 | KERNEL_SIZE: 93 | - 4 94 | NUM_BASIC_BLOCKS: 2 95 | CAT_OUTPUT: 96 | - True 97 | INIT_WEIGHTS: True 98 | SCALE_FACTOR: -1 99 | WIDTH_MULT: 0.909 100 | DEPTH_MULT: 0.833 101 | NAME: pose_higher_hrnet 102 | NUM_JOINTS: 17 103 | PRETRAINED: 'example_path/efficientnet-b0-4cfa50.pth' #Path to pretrained backbone model 104 | TAG_PER_JOINT: True 105 | TEST: 106 | FLIP_TEST: True 107 | IMAGES_PER_GPU: 1 108 | MODEL_FILE: '' 109 | SCALE_FACTOR: [1] 110 | DETECTION_THRESHOLD: 0.1 111 | WITH_HEATMAPS: (True, True) 112 | WITH_AE: (True, False) 113 | PROJECT2IMAGE: True 114 | NMS_KERNEL: 5 115 | NMS_PADDING: 2 116 | TRAIN: 117 | BEGIN_EPOCH: 0 118 | CHECKPOINT: '' 119 | END_EPOCH: 300 120 | GAMMA1: 0.99 121 | GAMMA2: 0.0 122 | IMAGES_PER_GPU: 6 123 | LR: 0.001 124 | LR_FACTOR: 0.1 125 | LR_STEP: [200, 260] 126 | MOMENTUM: 0.9 127 | NESTEROV: False 128 | OPTIMIZER: adam 129 | RESUME: False 130 | SHUFFLE: True 131 | WD: 0.0001 132 | WORKERS: 8 133 | 134 | -------------------------------------------------------------------------------- /Example Configs/H-2.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output_H-2 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: True 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: coco_kpt 14 | DATASET_TEST: coco 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 448 18 | OUTPUT_SIZE: [112, 224] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 17 26 | ROOT: 'data/coco' #Dataset Root Folder 27 | TEST: val2017 28 | TRAIN: train2017 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | STEM_INPLANES: 64 49 | STAGE2: 50 | NUM_MODULES: 1 51 | NUM_BRANCHES: 2 52 | BLOCK: BASIC 53 | NUM_BLOCKS: 54 | - 2 55 | - 2 56 | NUM_CHANNELS: 57 | - 32 58 | - 64 59 | FUSE_METHOD: SUM 60 | STAGE3: 61 | NUM_MODULES: 2 62 | NUM_BRANCHES: 3 63 | BLOCK: BASIC 64 | NUM_BLOCKS: 65 | - 2 66 | - 2 67 | - 2 68 | NUM_CHANNELS: 69 | - 32 70 | - 64 71 | - 128 72 | FUSE_METHOD: SUM 73 | STAGE4: 74 | NUM_MODULES: 3 75 | NUM_BRANCHES: 4 76 | BLOCK: BASIC 77 | NUM_BLOCKS: 78 | - 2 79 | - 2 80 | - 2 81 | - 2 82 | NUM_CHANNELS: 83 | - 32 84 | - 64 85 | - 128 86 | - 256 87 | FUSE_METHOD: SUM 88 | DECONV: 89 | NUM_DECONVS: 1 90 | NUM_CHANNELS: 91 | - 32 92 | KERNEL_SIZE: 93 | - 4 94 | NUM_BASIC_BLOCKS: 2 95 | CAT_OUTPUT: 96 | - True 97 | INIT_WEIGHTS: True 98 | SCALE_FACTOR: -2 99 | WIDTH_MULT: 0.826 100 | DEPTH_MULT: 0.694 101 | NAME: pose_higher_hrnet 102 | NUM_JOINTS: 17 103 | PRETRAINED: 'example_path/efficientnet-b0-4cfa50.pth' #Path to pretrained backbone model 104 | TAG_PER_JOINT: True 105 | TEST: 106 | FLIP_TEST: True 107 | IMAGES_PER_GPU: 1 108 | MODEL_FILE: '' 109 | SCALE_FACTOR: [1] 110 | DETECTION_THRESHOLD: 0.1 111 | WITH_HEATMAPS: (True, True) 112 | WITH_AE: (True, False) 113 | PROJECT2IMAGE: True 114 | NMS_KERNEL: 5 115 | NMS_PADDING: 2 116 | TRAIN: 117 | BEGIN_EPOCH: 0 118 | CHECKPOINT: '' 119 | END_EPOCH: 300 120 | GAMMA1: 0.99 121 | GAMMA2: 0.0 122 | IMAGES_PER_GPU: 8 123 | LR: 0.001 124 | LR_FACTOR: 0.1 125 | LR_STEP: [200, 260] 126 | MOMENTUM: 0.9 127 | NESTEROV: False 128 | OPTIMIZER: adam 129 | RESUME: False 130 | SHUFFLE: True 131 | WD: 0.0001 132 | WORKERS: 8 133 | 134 | -------------------------------------------------------------------------------- /Example Configs/H-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output_H-2 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: True 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: coco_kpt 14 | DATASET_TEST: coco 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 416 18 | OUTPUT_SIZE: [104, 208] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 17 26 | ROOT: 'data/coco' #Dataset Root Folder 27 | TEST: val2017 28 | TRAIN: train2017 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | STEM_INPLANES: 64 49 | STAGE2: 50 | NUM_MODULES: 1 51 | NUM_BRANCHES: 2 52 | BLOCK: BASIC 53 | NUM_BLOCKS: 54 | - 2 55 | - 2 56 | NUM_CHANNELS: 57 | - 32 58 | - 64 59 | FUSE_METHOD: SUM 60 | STAGE3: 61 | NUM_MODULES: 1 62 | NUM_BRANCHES: 3 63 | BLOCK: BASIC 64 | NUM_BLOCKS: 65 | - 2 66 | - 2 67 | - 2 68 | NUM_CHANNELS: 69 | - 32 70 | - 64 71 | - 128 72 | FUSE_METHOD: SUM 73 | STAGE4: 74 | NUM_MODULES: 3 75 | NUM_BRANCHES: 4 76 | BLOCK: BASIC 77 | NUM_BLOCKS: 78 | - 2 79 | - 2 80 | - 2 81 | - 2 82 | NUM_CHANNELS: 83 | - 32 84 | - 64 85 | - 128 86 | - 256 87 | FUSE_METHOD: SUM 88 | DECONV: 89 | NUM_DECONVS: 1 90 | NUM_CHANNELS: 91 | - 32 92 | KERNEL_SIZE: 93 | - 4 94 | NUM_BASIC_BLOCKS: 2 95 | CAT_OUTPUT: 96 | - True 97 | INIT_WEIGHTS: True 98 | SCALE_FACTOR: -3 99 | WIDTH_MULT: 0.751 100 | DEPTH_MULT: 0.578 101 | NAME: pose_higher_hrnet 102 | NUM_JOINTS: 17 103 | PRETRAINED: 'example_path/efficientnet-b0-4cfa50.pth' #Path to pretrained backbone model 104 | TAG_PER_JOINT: True 105 | TEST: 106 | FLIP_TEST: True 107 | IMAGES_PER_GPU: 1 108 | MODEL_FILE: '' 109 | SCALE_FACTOR: [1] 110 | DETECTION_THRESHOLD: 0.1 111 | WITH_HEATMAPS: (True, True) 112 | WITH_AE: (True, False) 113 | PROJECT2IMAGE: True 114 | NMS_KERNEL: 5 115 | NMS_PADDING: 2 116 | TRAIN: 117 | BEGIN_EPOCH: 0 118 | CHECKPOINT: '' 119 | END_EPOCH: 300 120 | GAMMA1: 0.99 121 | GAMMA2: 0.0 122 | IMAGES_PER_GPU: 10 123 | LR: 0.001 124 | LR_FACTOR: 0.1 125 | LR_STEP: [200, 260] 126 | MOMENTUM: 0.9 127 | NESTEROV: False 128 | OPTIMIZER: adam 129 | RESUME: False 130 | SHUFFLE: True 131 | WD: 0.0001 132 | WORKERS: 8 133 | 134 | -------------------------------------------------------------------------------- /Example Configs/H-4.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output_H-4 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: True 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: coco_kpt 14 | DATASET_TEST: coco 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 384 18 | OUTPUT_SIZE: [96, 192] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 17 26 | ROOT: 'data/coco' #Dataset Root Folder 27 | TEST: val2017 28 | TRAIN: train2017 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | STEM_INPLANES: 64 49 | STAGE2: 50 | NUM_MODULES: 1 51 | NUM_BRANCHES: 2 52 | BLOCK: BASIC 53 | NUM_BLOCKS: 54 | - 2 55 | - 2 56 | NUM_CHANNELS: 57 | - 32 58 | - 64 59 | FUSE_METHOD: SUM 60 | STAGE3: 61 | NUM_MODULES: 1 62 | NUM_BRANCHES: 3 63 | BLOCK: BASIC 64 | NUM_BLOCKS: 65 | - 2 66 | - 2 67 | - 2 68 | NUM_CHANNELS: 69 | - 32 70 | - 64 71 | - 128 72 | FUSE_METHOD: SUM 73 | STAGE4: 74 | NUM_MODULES: 2 75 | NUM_BRANCHES: 4 76 | BLOCK: BASIC 77 | NUM_BLOCKS: 78 | - 2 79 | - 2 80 | - 2 81 | - 2 82 | NUM_CHANNELS: 83 | - 32 84 | - 64 85 | - 128 86 | - 256 87 | FUSE_METHOD: SUM 88 | DECONV: 89 | NUM_DECONVS: 1 90 | NUM_CHANNELS: 91 | - 32 92 | KERNEL_SIZE: 93 | - 4 94 | NUM_BASIC_BLOCKS: 2 95 | CAT_OUTPUT: 96 | - True 97 | INIT_WEIGHTS: True 98 | SCALE_FACTOR: -4 99 | WIDTH_MULT: 0.684 100 | DEPTH_MULT: 0.483 101 | NAME: pose_higher_hrnet 102 | NUM_JOINTS: 17 103 | PRETRAINED: 'example_path/efficientnet-b0-4cfa50.pth' #Path to pretrained backbone model 104 | TAG_PER_JOINT: True 105 | TEST: 106 | FLIP_TEST: True 107 | IMAGES_PER_GPU: 1 108 | MODEL_FILE: '' 109 | SCALE_FACTOR: [1] 110 | DETECTION_THRESHOLD: 0.1 111 | WITH_HEATMAPS: (True, True) 112 | WITH_AE: (True, False) 113 | PROJECT2IMAGE: True 114 | NMS_KERNEL: 5 115 | NMS_PADDING: 2 116 | TRAIN: 117 | BEGIN_EPOCH: 0 118 | CHECKPOINT: '' 119 | END_EPOCH: 300 120 | GAMMA1: 0.99 121 | GAMMA2: 0.0 122 | IMAGES_PER_GPU: 48 123 | LR: 0.001 124 | LR_FACTOR: 0.1 125 | LR_STEP: [200, 260] 126 | MOMENTUM: 0.9 127 | NESTEROV: False 128 | OPTIMIZER: adam 129 | RESUME: False 130 | SHUFFLE: True 131 | WD: 0.0001 132 | WORKERS: 8 133 | 134 | -------------------------------------------------------------------------------- /Example Configs/H0.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: False 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output_H0 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: coco_kpt 14 | DATASET_TEST: coco 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 512 18 | OUTPUT_SIZE: [128, 256] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 17 26 | ROOT: 'data/coco' #Dataset Root Folder 27 | TEST: val2017 28 | TRAIN: train2017 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | STEM_INPLANES: 64 49 | STAGE2: 50 | NUM_MODULES: 1 51 | NUM_BRANCHES: 2 52 | BLOCK: BASIC 53 | NUM_BLOCKS: 54 | - 2 55 | - 2 56 | NUM_CHANNELS: 57 | - 32 58 | - 64 59 | FUSE_METHOD: SUM 60 | STAGE3: 61 | NUM_MODULES: 4 62 | NUM_BRANCHES: 3 63 | BLOCK: BASIC 64 | NUM_BLOCKS: 65 | - 2 66 | - 2 67 | - 2 68 | NUM_CHANNELS: 69 | - 32 70 | - 64 71 | - 128 72 | FUSE_METHOD: SUM 73 | STAGE4: 74 | NUM_MODULES: 3 75 | NUM_BRANCHES: 4 76 | BLOCK: BASIC 77 | NUM_BLOCKS: 78 | - 2 79 | - 2 80 | - 2 81 | - 2 82 | NUM_CHANNELS: 83 | - 32 84 | - 64 85 | - 128 86 | - 256 87 | FUSE_METHOD: SUM 88 | DECONV: 89 | NUM_DECONVS: 1 90 | NUM_CHANNELS: 91 | - 32 92 | KERNEL_SIZE: 93 | - 4 94 | NUM_BASIC_BLOCKS: 2 95 | CAT_OUTPUT: 96 | - True 97 | INIT_WEIGHTS: True 98 | NAME: pose_higher_hrnet 99 | NUM_JOINTS: 17 100 | PRETRAINED: 'example_path/efficientnet-b0-4cfa50.pth' #Path to pretrained backbone model 101 | TAG_PER_JOINT: True 102 | TEST: 103 | FLIP_TEST: True 104 | IMAGES_PER_GPU: 1 105 | MODEL_FILE: '' 106 | SCALE_FACTOR: [1] 107 | DETECTION_THRESHOLD: 0.1 108 | WITH_HEATMAPS: (True, True) 109 | WITH_AE: (True, False) 110 | PROJECT2IMAGE: True 111 | NMS_KERNEL: 5 112 | NMS_PADDING: 2 113 | TRAIN: 114 | BEGIN_EPOCH: 0 115 | CHECKPOINT: '' 116 | END_EPOCH: 300 117 | GAMMA1: 0.99 118 | GAMMA2: 0.0 119 | IMAGES_PER_GPU: 1 120 | LR: 0.001 121 | LR_FACTOR: 0.1 122 | LR_STEP: [200, 260] 123 | MOMENTUM: 0.9 124 | NESTEROV: False 125 | OPTIMIZER: adam 126 | RESUME: False 127 | SHUFFLE: True 128 | WD: 0.0001 129 | WORKERS: 0 130 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EfficientHRNet 2 | 3 | EfficientHRNet is a family of scalable and efficient networks created by unifiying EfficientNet and HigherHRNet for Multi-person human pose estimation. A preprint of our paper can be found [here.](https://arxiv.org/abs/2007.08090) 4 | 5 | Our code is based on the 6 | 7 | 1) Official implementation of [HigherHRNet](https://github.com/HRNet/HigherHRNet-Human-Pose-Estimation) 8 | 9 | 2) PyTorch implementation of [EfficientNet](https://github.com/narumiruna/efficientnet-pytorch) 10 | 11 | 12 | We provide a formulation for jointly scaling our backbone EfficientNet below the baseline B0 and the rest of EfficientHRNet with it. Ultimately, we are able to create a family 13 | of highly accurate and efficient 2D human pose estimators that is flexible enough to provide lightweight solution for a variety of application and device requirements. 14 | 15 | 16 | ## Environment Setup ## 17 | 18 | * Pytorch >= 1.1.0 19 | 20 | * Dependencies as listed in requirements.txt 21 | 22 | * COCO Keypoints Dataset along with COCOAPI as given in https://cocodataset.org/#download and https://github.com/cocodataset/cocoapi 23 | 24 | The code was developed using python 3.6 and NVIDIA GPUs, both of which are required. 25 | 26 | 27 | ## Configurations for EfficientHRNet models ## 28 | 29 | Config files are found at experiments/coco/higher_hrnet/ 30 | 31 | Varying the following parameters provide different EfficientHRNet models ranging from H0 to H-4 : 32 | 33 | * scale_factor 34 | * input_size 35 | * width_mult 36 | * depth_mult 37 | 38 | More details on scaling can be found in our paper: https://arxiv.org/pdf/2007.08090.pdf 39 | 40 | Examples can be seen in the Example Configs folder. 41 | 42 | 43 | ## EfficientHRNet Training and Evaluation ## 44 | 45 | Distributed training is supported. Config settings can be customized based on user requirements. Training and validation scripts can be found at tools/ 46 | 47 | 48 | ### Training on COCO (Nvidia GPUs) ### 49 | 50 | Single GPU training example: 51 | ``` 52 | CUDA_VISIBLE_DEVICES=0 python3 tools/dist_train.py --cfg experiments/coco/higher-hrnet/config.yaml 53 | ``` 54 | Distributed training example: 55 | ``` 56 | CUDA_VISIBLE_DEVICES=0,1 python3 tools/dist_train.py --cfg experiments/coco/higher-hrnet/config.yaml --dist-url tcp://127.0.0.1:12345 57 | ``` 58 | 59 | ### Testing on COCO (Nvidia GPUs) ### 60 | 61 | Both single-scale and multi-scale testing are supported. 62 | 63 | Single scale testing: 64 | ``` 65 | python3 tools/valid.py --cfg experiments/coco/higher-hrnet/config.yaml TEST.MODEL_FILE /path/to/model.pth 66 | ``` 67 | Multi-scale testing: 68 | ``` 69 | python3 tools/valid.py --cfg experiments/coco/higher-hrnet/config.yaml TEST.MODEL_FILE /path/to/model.pth TEST.SCALE_FACTOR [0.5,1.0,1.5] 70 | ``` 71 | 72 | ## Pretrained Models ## 73 | 74 | COCO17 pretrained models for EfficientHRNet H0 to H-4 can be download [here.](https://drive.google.com/drive/folders/1FcJ1bawqWb1yAkcqb2sJfMsePMwupsWJ?usp=sharing) 75 | 76 | | Method | Input Size | Parameters | FLOPs | AP | APmulti-scale | 77 | |:--------------:|:----------:|:----------:|:-----:|:----:|:----:| 78 | | H0 | 512 | 23.3M | 25.6B | 64.0 | 67.1 | 79 | | H-1 | 480 | 16M | 14.2B | 59.1 | 62.3 | 80 | | H-2 | 448 | 10.3M | 7.7B | 52.8 | 55.0 | 81 | | H-3 | 416 | 6.9M | 4.2B | 44.5 | 45.5 | 82 | | H-4 | 384 | 3.7M | 2.1B | 35.5 | 39.7 | 83 | 84 | 85 | Compact EfficientNet ImageNet trained weights can be downloaded [here.](https://drive.google.com/drive/folders/1AZMYacfDcZv4QePcYONtg2in7oVmmSwV?usp=sharing) 86 | 87 | | | | |ImageNet | | Cifar-100 | | 88 | |:--------------:|:----------:|:------:|:----------:|:-----:|:----------:|:-----:| 89 | | Method | Input Size | FLOPs | Parameters | Top-1 | Parameters | Top-1 | 90 | | B0 | 512 | 0.4B | 5.3M | 75 | 4.1M | 81.9 | 91 | | B-1 | 480 | 0.3B | 4.5M | 73.8 | 3.5M | 81.4 | 92 | | B-2 | 448 | 0.2B | 3.4M | 71.3 | 2.5M | 79.8 | 93 | | B-3 | 416 | 0.1B | 2.8M | 68.5 | 1.9M | 78.2 | 94 | | B-4 | 384 | 0.05B | 1.3M | 65.6 | 1.3M | 74.3 | 95 | 96 | 97 | ## Citation ## 98 | 99 | If you would like use EfficientHRNet in your work, please use the following citation. 100 | 101 | ``` 102 | @misc{neff2020efficienthrnet, 103 | title={EfficientHRNet: Efficient Scaling for Lightweight High-Resolution Multi-Person Pose Estimation}, 104 | author={Christopher Neff and Aneri Sheth and Steven Furgurson and Hamed Tabkhi}, 105 | year={2020}, 106 | eprint={2007.08090}, 107 | archivePrefix={arXiv}, 108 | primaryClass={cs.CV} 109 | } 110 | ``` 111 | 112 | We also recommend citing EfficientNet and HigherHRNet, which inspired this work. 113 | -------------------------------------------------------------------------------- /experiments/coco/higher_hrnet/frozen.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: False 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: b0_frozen 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: True 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: coco_kpt 14 | DATASET_TEST: coco 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 512 18 | OUTPUT_SIZE: [128, 256] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 17 26 | ROOT: '/home/aneri/EfficientHRNet_b0/data/coco/' 27 | TEST: val2017 28 | TRAIN: train2017 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, True] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | STEM_INPLANES: 64 49 | STAGE2: 50 | NUM_MODULES: 1 51 | NUM_BRANCHES: 2 52 | BLOCK: BASIC 53 | NUM_BLOCKS: 54 | - 2 55 | - 2 56 | NUM_CHANNELS: 57 | - 32 58 | - 64 59 | FUSE_METHOD: SUM 60 | STAGE3: 61 | NUM_MODULES: 4 62 | NUM_BRANCHES: 3 63 | BLOCK: BASIC 64 | NUM_BLOCKS: 65 | - 2 66 | - 2 67 | - 2 68 | NUM_CHANNELS: 69 | - 32 70 | - 64 71 | - 128 72 | FUSE_METHOD: SUM 73 | STAGE4: 74 | NUM_MODULES: 3 75 | NUM_BRANCHES: 4 76 | BLOCK: BASIC 77 | NUM_BLOCKS: 78 | - 2 79 | - 2 80 | - 2 81 | - 2 82 | NUM_CHANNELS: 83 | - 32 84 | - 64 85 | - 128 86 | - 256 87 | FUSE_METHOD: SUM 88 | DECONV: 89 | NUM_DECONVS: 1 90 | NUM_CHANNELS: 91 | - 32 92 | KERNEL_SIZE: 93 | - 4 94 | NUM_BASIC_BLOCKS: 2 95 | CAT_OUTPUT: 96 | - True 97 | INIT_WEIGHTS: True 98 | SCALE_FACTOR: 0 99 | WIDTH_MULT: 1.0 100 | DEPTH_MULT: 1.0 101 | NAME: pose_higher_hrnet 102 | NUM_JOINTS: 17 103 | PRETRAINED: '/home/aneri/efficientnetb5-branch/efficientnet-b0-4cfa50.pth' 104 | TAG_PER_JOINT: True 105 | TEST: 106 | FLIP_TEST: True 107 | IMAGES_PER_GPU: 1 108 | MODEL_FILE: '' 109 | SCALE_FACTOR: [1] 110 | DETECTION_THRESHOLD: 0.1 111 | WITH_HEATMAPS: (True, True) 112 | WITH_AE: (True, True) 113 | PROJECT2IMAGE: True 114 | NMS_KERNEL: 5 115 | NMS_PADDING: 2 116 | TRAIN: 117 | BEGIN_EPOCH: 0 118 | CHECKPOINT: '' 119 | END_EPOCH: 300 120 | GAMMA1: 0.99 121 | GAMMA2: 0.0 122 | IMAGES_PER_GPU: 60 123 | LR: 0.001 124 | LR_FACTOR: 0.1 125 | LR_STEP: [200, 260] 126 | MOMENTUM: 0.9 127 | NESTEROV: False 128 | OPTIMIZER: adam 129 | RESUME: False 130 | SHUFFLE: True 131 | WD: 0.0001 132 | WORKERS: 8 133 | 134 | -------------------------------------------------------------------------------- /experiments/coco/higher_hrnet/unfrozen.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: False 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: b0_unfrozen 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: True 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: coco_kpt 14 | DATASET_TEST: coco 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 512 18 | OUTPUT_SIZE: [128, 256] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 17 26 | ROOT: '/home/aneri/EfficientHRNet_b0/data/coco/' 27 | TEST: val2017 28 | TRAIN: train2017 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, True] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | STEM_INPLANES: 64 49 | STAGE2: 50 | NUM_MODULES: 1 51 | NUM_BRANCHES: 2 52 | BLOCK: BASIC 53 | NUM_BLOCKS: 54 | - 2 55 | - 2 56 | NUM_CHANNELS: 57 | - 32 58 | - 64 59 | FUSE_METHOD: SUM 60 | STAGE3: 61 | NUM_MODULES: 4 62 | NUM_BRANCHES: 3 63 | BLOCK: BASIC 64 | NUM_BLOCKS: 65 | - 2 66 | - 2 67 | - 2 68 | NUM_CHANNELS: 69 | - 32 70 | - 64 71 | - 128 72 | FUSE_METHOD: SUM 73 | STAGE4: 74 | NUM_MODULES: 3 75 | NUM_BRANCHES: 4 76 | BLOCK: BASIC 77 | NUM_BLOCKS: 78 | - 2 79 | - 2 80 | - 2 81 | - 2 82 | NUM_CHANNELS: 83 | - 32 84 | - 64 85 | - 128 86 | - 256 87 | FUSE_METHOD: SUM 88 | DECONV: 89 | NUM_DECONVS: 1 90 | NUM_CHANNELS: 91 | - 32 92 | KERNEL_SIZE: 93 | - 4 94 | NUM_BASIC_BLOCKS: 2 95 | CAT_OUTPUT: 96 | - True 97 | INIT_WEIGHTS: True 98 | SCALE_FACTOR: 0 99 | WIDTH_MULT: 1.0 100 | DEPTH_MULT: 1.0 101 | NAME: pose_higher_hrnet 102 | NUM_JOINTS: 17 103 | PRETRAINED: '/home/aneri/efficientnetb5-branch/efficientnet-b0-4cfa50.pth' 104 | TAG_PER_JOINT: True 105 | TEST: 106 | FLIP_TEST: True 107 | IMAGES_PER_GPU: 1 108 | MODEL_FILE: '' 109 | SCALE_FACTOR: [1] 110 | DETECTION_THRESHOLD: 0.1 111 | WITH_HEATMAPS: (True, True) 112 | WITH_AE: (True, True) 113 | PROJECT2IMAGE: True 114 | NMS_KERNEL: 5 115 | NMS_PADDING: 2 116 | TRAIN: 117 | BEGIN_EPOCH: 0 118 | CHECKPOINT: '' 119 | END_EPOCH: 300 120 | GAMMA1: 0.99 121 | GAMMA2: 0.0 122 | IMAGES_PER_GPU: 29 123 | LR: 0.001 124 | LR_FACTOR: 0.1 125 | LR_STEP: [200, 260] 126 | MOMENTUM: 0.9 127 | NESTEROV: False 128 | OPTIMIZER: adam 129 | RESUME: False 130 | SHUFFLE: True 131 | WD: 0.0001 132 | WORKERS: 8 133 | 134 | -------------------------------------------------------------------------------- /experiments/crowd_pose/higher_hrnet/w32_512_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: crowd_pose_kpt 14 | DATASET_TEST: crowd_pose 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 512 18 | OUTPUT_SIZE: [128, 256] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 14 26 | ROOT: 'data/crowd_pose' 27 | TEST: test 28 | TRAIN: trainval 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | STEM_INPLANES: 64 49 | STAGE2: 50 | NUM_MODULES: 1 51 | NUM_BRANCHES: 2 52 | BLOCK: BASIC 53 | NUM_BLOCKS: 54 | - 4 55 | - 4 56 | NUM_CHANNELS: 57 | - 32 58 | - 64 59 | FUSE_METHOD: SUM 60 | STAGE3: 61 | NUM_MODULES: 4 62 | NUM_BRANCHES: 3 63 | BLOCK: BASIC 64 | NUM_BLOCKS: 65 | - 4 66 | - 4 67 | - 4 68 | NUM_CHANNELS: 69 | - 32 70 | - 64 71 | - 128 72 | FUSE_METHOD: SUM 73 | STAGE4: 74 | NUM_MODULES: 3 75 | NUM_BRANCHES: 4 76 | BLOCK: BASIC 77 | NUM_BLOCKS: 78 | - 4 79 | - 4 80 | - 4 81 | - 4 82 | NUM_CHANNELS: 83 | - 32 84 | - 64 85 | - 128 86 | - 256 87 | FUSE_METHOD: SUM 88 | DECONV: 89 | NUM_DECONVS: 1 90 | NUM_CHANNELS: 91 | - 32 92 | KERNEL_SIZE: 93 | - 4 94 | NUM_BASIC_BLOCKS: 4 95 | CAT_OUTPUT: 96 | - True 97 | INIT_WEIGHTS: True 98 | NAME: pose_higher_hrnet 99 | NUM_JOINTS: 14 100 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth' 101 | TAG_PER_JOINT: True 102 | TEST: 103 | FLIP_TEST: True 104 | IMAGES_PER_GPU: 1 105 | MODEL_FILE: '' 106 | SCALE_FACTOR: [1] 107 | DETECTION_THRESHOLD: 0.1 108 | WITH_HEATMAPS: (True, True) 109 | WITH_AE: (True, False) 110 | PROJECT2IMAGE: True 111 | NMS_KERNEL: 5 112 | NMS_PADDING: 2 113 | TRAIN: 114 | BEGIN_EPOCH: 0 115 | CHECKPOINT: '' 116 | END_EPOCH: 300 117 | GAMMA1: 0.99 118 | GAMMA2: 0.0 119 | IMAGES_PER_GPU: 12 120 | LR: 0.001 121 | LR_FACTOR: 0.1 122 | LR_STEP: [200, 260] 123 | MOMENTUM: 0.9 124 | NESTEROV: False 125 | OPTIMIZER: adam 126 | RESUME: False 127 | SHUFFLE: True 128 | WD: 0.0001 129 | WORKERS: 4 130 | -------------------------------------------------------------------------------- /experiments/crowd_pose/higher_hrnet/w32_512_adam_lr1e-3_coco.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: crowd_pose_kpt 14 | DATASET_TEST: crowd_pose 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 512 18 | OUTPUT_SIZE: [128, 256] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 14 26 | ROOT: 'data/crowd_pose' 27 | TEST: test 28 | TRAIN: trainval 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: 48 | - 'conv1' 49 | - 'bn1' 50 | - 'conv2' 51 | - 'bn2' 52 | - 'layer1' 53 | - 'transition1' 54 | - 'stage2' 55 | - 'transition2' 56 | - 'stage3' 57 | - 'transition3' 58 | - 'stage4' 59 | STEM_INPLANES: 64 60 | STAGE2: 61 | NUM_MODULES: 1 62 | NUM_BRANCHES: 2 63 | BLOCK: BASIC 64 | NUM_BLOCKS: 65 | - 4 66 | - 4 67 | NUM_CHANNELS: 68 | - 32 69 | - 64 70 | FUSE_METHOD: SUM 71 | STAGE3: 72 | NUM_MODULES: 4 73 | NUM_BRANCHES: 3 74 | BLOCK: BASIC 75 | NUM_BLOCKS: 76 | - 4 77 | - 4 78 | - 4 79 | NUM_CHANNELS: 80 | - 32 81 | - 64 82 | - 128 83 | FUSE_METHOD: SUM 84 | STAGE4: 85 | NUM_MODULES: 3 86 | NUM_BRANCHES: 4 87 | BLOCK: BASIC 88 | NUM_BLOCKS: 89 | - 4 90 | - 4 91 | - 4 92 | - 4 93 | NUM_CHANNELS: 94 | - 32 95 | - 64 96 | - 128 97 | - 256 98 | FUSE_METHOD: SUM 99 | DECONV: 100 | NUM_DECONVS: 1 101 | NUM_CHANNELS: 102 | - 32 103 | KERNEL_SIZE: 104 | - 4 105 | NUM_BASIC_BLOCKS: 4 106 | CAT_OUTPUT: 107 | - True 108 | INIT_WEIGHTS: True 109 | NAME: pose_higher_hrnet 110 | NUM_JOINTS: 14 111 | PRETRAINED: 'models/pytorch/pose_coco/pose_higher_hrnet_w32_512.pth' 112 | TAG_PER_JOINT: True 113 | TEST: 114 | FLIP_TEST: True 115 | IMAGES_PER_GPU: 1 116 | MODEL_FILE: '' 117 | SCALE_FACTOR: [1] 118 | DETECTION_THRESHOLD: 0.1 119 | WITH_HEATMAPS: (True, True) 120 | WITH_AE: (True, False) 121 | PROJECT2IMAGE: True 122 | NMS_KERNEL: 5 123 | NMS_PADDING: 2 124 | TRAIN: 125 | BEGIN_EPOCH: 0 126 | CHECKPOINT: '' 127 | END_EPOCH: 300 128 | GAMMA1: 0.99 129 | GAMMA2: 0.0 130 | IMAGES_PER_GPU: 12 131 | LR: 0.001 132 | LR_FACTOR: 0.1 133 | LR_STEP: [200, 260] 134 | MOMENTUM: 0.9 135 | NESTEROV: False 136 | OPTIMIZER: adam 137 | RESUME: False 138 | SHUFFLE: True 139 | WD: 0.0001 140 | WORKERS: 4 141 | -------------------------------------------------------------------------------- /experiments/crowd_pose/higher_hrnet/w32_512_adam_lr1e-3_syncbn.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: crowd_pose_kpt 14 | DATASET_TEST: crowd_pose 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 512 18 | OUTPUT_SIZE: [128, 256] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 14 26 | ROOT: 'data/crowd_pose' 27 | TEST: test 28 | TRAIN: trainval 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | STEM_INPLANES: 64 49 | STAGE2: 50 | NUM_MODULES: 1 51 | NUM_BRANCHES: 2 52 | BLOCK: BASIC 53 | NUM_BLOCKS: 54 | - 4 55 | - 4 56 | NUM_CHANNELS: 57 | - 32 58 | - 64 59 | FUSE_METHOD: SUM 60 | STAGE3: 61 | NUM_MODULES: 4 62 | NUM_BRANCHES: 3 63 | BLOCK: BASIC 64 | NUM_BLOCKS: 65 | - 4 66 | - 4 67 | - 4 68 | NUM_CHANNELS: 69 | - 32 70 | - 64 71 | - 128 72 | FUSE_METHOD: SUM 73 | STAGE4: 74 | NUM_MODULES: 3 75 | NUM_BRANCHES: 4 76 | BLOCK: BASIC 77 | NUM_BLOCKS: 78 | - 4 79 | - 4 80 | - 4 81 | - 4 82 | NUM_CHANNELS: 83 | - 32 84 | - 64 85 | - 128 86 | - 256 87 | FUSE_METHOD: SUM 88 | DECONV: 89 | NUM_DECONVS: 1 90 | NUM_CHANNELS: 91 | - 32 92 | KERNEL_SIZE: 93 | - 4 94 | NUM_BASIC_BLOCKS: 4 95 | CAT_OUTPUT: 96 | - True 97 | INIT_WEIGHTS: True 98 | NAME: pose_higher_hrnet 99 | NUM_JOINTS: 14 100 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth' 101 | TAG_PER_JOINT: True 102 | SYNC_BN: True 103 | TEST: 104 | FLIP_TEST: True 105 | IMAGES_PER_GPU: 1 106 | MODEL_FILE: '' 107 | SCALE_FACTOR: [1] 108 | DETECTION_THRESHOLD: 0.1 109 | WITH_HEATMAPS: (True, True) 110 | WITH_AE: (True, False) 111 | PROJECT2IMAGE: True 112 | NMS_KERNEL: 5 113 | NMS_PADDING: 2 114 | TRAIN: 115 | BEGIN_EPOCH: 0 116 | CHECKPOINT: '' 117 | END_EPOCH: 300 118 | GAMMA1: 0.99 119 | GAMMA2: 0.0 120 | IMAGES_PER_GPU: 12 121 | LR: 0.001 122 | LR_FACTOR: 0.1 123 | LR_STEP: [200, 260] 124 | MOMENTUM: 0.9 125 | NESTEROV: False 126 | OPTIMIZER: adam 127 | RESUME: False 128 | SHUFFLE: True 129 | WD: 0.0001 130 | WORKERS: 4 131 | -------------------------------------------------------------------------------- /experiments/crowd_pose/higher_hrnet/w32_640_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 100 7 | FP16: 8 | ENABLED: True 9 | DYNAMIC_LOSS_SCALE: True 10 | CUDNN: 11 | BENCHMARK: True 12 | DETERMINISTIC: False 13 | ENABLED: True 14 | DATASET: 15 | SIGMA: 2 16 | DATASET: crowd_pose_kpt 17 | DATASET_TEST: crowd_pose 18 | DATA_FORMAT: jpg 19 | FLIP: 0.5 20 | INPUT_SIZE: 640 21 | OUTPUT_SIZE: [160, 320] 22 | MAX_NUM_PEOPLE: 30 23 | MAX_ROTATION: 30 24 | MAX_SCALE: 1.5 25 | SCALE_TYPE: 'short' 26 | MAX_TRANSLATE: 40 27 | MIN_SCALE: 0.75 28 | NUM_JOINTS: 14 29 | ROOT: 'data/crowd_pose' 30 | TEST: test 31 | TRAIN: trainval 32 | DEBUG: 33 | DEBUG: True 34 | SAVE_BATCH_IMAGES_GT: False 35 | SAVE_BATCH_IMAGES_PRED: False 36 | SAVE_HEATMAPS_GT: True 37 | SAVE_HEATMAPS_PRED: True 38 | SAVE_TAGMAPS_PRED: True 39 | LOSS: 40 | NUM_STAGES: 2 41 | AE_LOSS_TYPE: exp 42 | WITH_AE_LOSS: [True, False] 43 | PUSH_LOSS_FACTOR: [0.001, 0.001] 44 | PULL_LOSS_FACTOR: [0.001, 0.001] 45 | WITH_HEATMAPS_LOSS: [True, True] 46 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 47 | MODEL: 48 | EXTRA: 49 | FINAL_CONV_KERNEL: 1 50 | PRETRAINED_LAYERS: ['*'] 51 | STEM_INPLANES: 64 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | DECONV: 92 | NUM_DECONVS: 1 93 | NUM_CHANNELS: 94 | - 32 95 | KERNEL_SIZE: 96 | - 4 97 | NUM_BASIC_BLOCKS: 4 98 | CAT_OUTPUT: 99 | - True 100 | INIT_WEIGHTS: True 101 | NAME: pose_higher_hrnet 102 | NUM_JOINTS: 14 103 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth' 104 | TAG_PER_JOINT: True 105 | TEST: 106 | FLIP_TEST: True 107 | IMAGES_PER_GPU: 1 108 | MODEL_FILE: '' 109 | SCALE_FACTOR: [1] 110 | DETECTION_THRESHOLD: 0.1 111 | WITH_HEATMAPS: (True, True) 112 | WITH_AE: (True, False) 113 | PROJECT2IMAGE: True 114 | NMS_KERNEL: 5 115 | NMS_PADDING: 2 116 | TRAIN: 117 | BEGIN_EPOCH: 0 118 | CHECKPOINT: '' 119 | END_EPOCH: 300 120 | GAMMA1: 0.99 121 | GAMMA2: 0.0 122 | IMAGES_PER_GPU: 12 123 | LR: 0.001 124 | LR_FACTOR: 0.1 125 | LR_STEP: [200, 260] 126 | MOMENTUM: 0.9 127 | NESTEROV: False 128 | OPTIMIZER: adam 129 | RESUME: False 130 | SHUFFLE: True 131 | WD: 0.0001 132 | WORKERS: 4 133 | -------------------------------------------------------------------------------- /experiments/crowd_pose/higher_hrnet/w48_640_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 100 7 | FP16: 8 | ENABLED: True 9 | DYNAMIC_LOSS_SCALE: True 10 | CUDNN: 11 | BENCHMARK: True 12 | DETERMINISTIC: False 13 | ENABLED: True 14 | DATASET: 15 | SIGMA: 2 16 | DATASET: crowd_pose_kpt 17 | DATASET_TEST: crowd_pose 18 | DATA_FORMAT: jpg 19 | FLIP: 0.5 20 | INPUT_SIZE: 640 21 | OUTPUT_SIZE: [160, 320] 22 | MAX_NUM_PEOPLE: 30 23 | MAX_ROTATION: 30 24 | MAX_SCALE: 1.5 25 | SCALE_TYPE: 'short' 26 | MAX_TRANSLATE: 40 27 | MIN_SCALE: 0.75 28 | NUM_JOINTS: 14 29 | ROOT: 'data/crowd_pose' 30 | TEST: test 31 | TRAIN: trainval 32 | DEBUG: 33 | DEBUG: True 34 | SAVE_BATCH_IMAGES_GT: False 35 | SAVE_BATCH_IMAGES_PRED: False 36 | SAVE_HEATMAPS_GT: True 37 | SAVE_HEATMAPS_PRED: True 38 | SAVE_TAGMAPS_PRED: True 39 | LOSS: 40 | NUM_STAGES: 2 41 | AE_LOSS_TYPE: exp 42 | WITH_AE_LOSS: [True, False] 43 | PUSH_LOSS_FACTOR: [0.001, 0.001] 44 | PULL_LOSS_FACTOR: [0.001, 0.001] 45 | WITH_HEATMAPS_LOSS: [True, True] 46 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 47 | MODEL: 48 | EXTRA: 49 | FINAL_CONV_KERNEL: 1 50 | PRETRAINED_LAYERS: ['*'] 51 | STEM_INPLANES: 64 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 48 61 | - 96 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 48 73 | - 96 74 | - 192 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 48 87 | - 96 88 | - 192 89 | - 384 90 | FUSE_METHOD: SUM 91 | DECONV: 92 | NUM_DECONVS: 1 93 | NUM_CHANNELS: 94 | - 48 95 | KERNEL_SIZE: 96 | - 4 97 | NUM_BASIC_BLOCKS: 4 98 | CAT_OUTPUT: 99 | - True 100 | INIT_WEIGHTS: True 101 | NAME: pose_higher_hrnet 102 | NUM_JOINTS: 14 103 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth' 104 | TAG_PER_JOINT: True 105 | TEST: 106 | FLIP_TEST: True 107 | IMAGES_PER_GPU: 1 108 | MODEL_FILE: '' 109 | SCALE_FACTOR: [1] 110 | DETECTION_THRESHOLD: 0.1 111 | WITH_HEATMAPS: (True, True) 112 | WITH_AE: (True, False) 113 | PROJECT2IMAGE: True 114 | NMS_KERNEL: 5 115 | NMS_PADDING: 2 116 | TRAIN: 117 | BEGIN_EPOCH: 0 118 | CHECKPOINT: '' 119 | END_EPOCH: 300 120 | GAMMA1: 0.99 121 | GAMMA2: 0.0 122 | IMAGES_PER_GPU: 10 123 | LR: 0.001 124 | LR_FACTOR: 0.1 125 | LR_STEP: [200, 260] 126 | MOMENTUM: 0.9 127 | NESTEROV: False 128 | OPTIMIZER: adam 129 | RESUME: False 130 | SHUFFLE: True 131 | WD: 0.0001 132 | WORKERS: 4 133 | -------------------------------------------------------------------------------- /lib/config/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from .default import _C as cfg 8 | from .default import update_config 9 | from .default import check_config 10 | -------------------------------------------------------------------------------- /lib/config/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/config/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/config/__pycache__/default.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/config/__pycache__/default.cpython-36.pyc -------------------------------------------------------------------------------- /lib/config/__pycache__/models.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/config/__pycache__/models.cpython-36.pyc -------------------------------------------------------------------------------- /lib/config/default.py: -------------------------------------------------------------------------------- 1 | 2 | # ------------------------------------------------------------------------------ 3 | # Copyright (c) Microsoft 4 | # Licensed under the MIT License. 5 | # Written by Bin Xiao (leoxiaobin@gmail.com) 6 | # Modified by Bowen Cheng (bcheng9@illinois.edu) 7 | # ------------------------------------------------------------------------------ 8 | 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import os 14 | 15 | from yacs.config import CfgNode as CN 16 | 17 | from .models import MODEL_EXTRAS 18 | 19 | 20 | _C = CN() 21 | 22 | _C.OUTPUT_DIR = '' 23 | _C.LOG_DIR = '' 24 | _C.DATA_DIR = '' 25 | _C.GPUS = (0,) 26 | _C.WORKERS = 4 27 | _C.PRINT_FREQ = 20 28 | _C.AUTO_RESUME = False 29 | _C.PIN_MEMORY = True 30 | _C.RANK = 0 31 | _C.VERBOSE = True 32 | _C.DIST_BACKEND = 'nccl' 33 | _C.MULTIPROCESSING_DISTRIBUTED = True 34 | 35 | # FP16 training params 36 | _C.FP16 = CN() 37 | _C.FP16.ENABLED = False 38 | _C.FP16.STATIC_LOSS_SCALE = 1.0 39 | _C.FP16.DYNAMIC_LOSS_SCALE = False 40 | 41 | # Cudnn related params 42 | _C.CUDNN = CN() 43 | _C.CUDNN.BENCHMARK = True 44 | _C.CUDNN.DETERMINISTIC = False 45 | _C.CUDNN.ENABLED = True 46 | 47 | # common params for NETWORK 48 | _C.MODEL = CN() 49 | _C.MODEL.NAME = 'pose_multi_resolution_net_v16' 50 | _C.MODEL.INIT_WEIGHTS = True 51 | _C.MODEL.SCALE_FACTOR = 0 52 | _C.MODEL.WIDTH_MULT = 1.0 53 | _C.MODEL.DEPTH_MULT = 1.0 54 | _C.MODEL.PRETRAINED = '' 55 | _C.MODEL.NUM_JOINTS = 17 56 | _C.MODEL.TAG_PER_JOINT = True 57 | _C.MODEL.EXTRA = CN(new_allowed=True) 58 | _C.MODEL.SYNC_BN = False 59 | 60 | _C.LOSS = CN() 61 | _C.LOSS.NUM_STAGES = 1 62 | _C.LOSS.WITH_HEATMAPS_LOSS = (True,) 63 | _C.LOSS.HEATMAPS_LOSS_FACTOR = (1.0,) 64 | _C.LOSS.WITH_AE_LOSS = (True,) 65 | _C.LOSS.AE_LOSS_TYPE = 'max' 66 | _C.LOSS.PUSH_LOSS_FACTOR = (0.001,) 67 | _C.LOSS.PULL_LOSS_FACTOR = (0.001,) 68 | 69 | # DATASET related params 70 | _C.DATASET = CN() 71 | _C.DATASET.ROOT = '' 72 | _C.DATASET.DATASET = 'coco_kpt' 73 | _C.DATASET.DATASET_TEST = 'coco' 74 | _C.DATASET.NUM_JOINTS = 17 75 | _C.DATASET.MAX_NUM_PEOPLE = 30 76 | _C.DATASET.TRAIN = 'train2017' 77 | _C.DATASET.TEST = 'val2017' 78 | _C.DATASET.DATA_FORMAT = 'jpg' 79 | 80 | # training data augmentation 81 | _C.DATASET.MAX_ROTATION = 30 82 | _C.DATASET.MIN_SCALE = 0.75 83 | _C.DATASET.MAX_SCALE = 1.25 84 | _C.DATASET.SCALE_TYPE = 'short' 85 | _C.DATASET.MAX_TRANSLATE = 40 86 | _C.DATASET.INPUT_SIZE = 512 87 | _C.DATASET.OUTPUT_SIZE = [128, 256, 512] 88 | _C.DATASET.FLIP = 0.5 89 | 90 | # heatmap generator (default is OUTPUT_SIZE/64) 91 | _C.DATASET.SIGMA = -1 92 | _C.DATASET.SCALE_AWARE_SIGMA = False 93 | _C.DATASET.BASE_SIZE = 256.0 94 | _C.DATASET.BASE_SIGMA = 2.0 95 | _C.DATASET.INT_SIGMA = False 96 | 97 | _C.DATASET.WITH_CENTER = False 98 | 99 | # train 100 | _C.TRAIN = CN() 101 | 102 | _C.TRAIN.LR_FACTOR = 0.1 103 | _C.TRAIN.LR_STEP = [90, 110] 104 | _C.TRAIN.LR = 0.001 105 | 106 | _C.TRAIN.OPTIMIZER = 'adam' 107 | _C.TRAIN.MOMENTUM = 0.9 108 | _C.TRAIN.WD = 0.0001 109 | _C.TRAIN.NESTEROV = False 110 | _C.TRAIN.GAMMA1 = 0.99 111 | _C.TRAIN.GAMMA2 = 0.0 112 | 113 | _C.TRAIN.BEGIN_EPOCH = 0 114 | _C.TRAIN.END_EPOCH = 140 115 | 116 | _C.TRAIN.RESUME = False 117 | _C.TRAIN.CHECKPOINT = '' 118 | 119 | _C.TRAIN.IMAGES_PER_GPU = 1 120 | _C.TRAIN.SHUFFLE = True 121 | 122 | # testing 123 | _C.TEST = CN() 124 | 125 | # size of images for each device 126 | # _C.TEST.BATCH_SIZE = 32 127 | _C.TEST.IMAGES_PER_GPU = 1 128 | # Test Model Epoch 129 | _C.TEST.FLIP_TEST = False 130 | _C.TEST.ADJUST = True 131 | _C.TEST.REFINE = True 132 | _C.TEST.SCALE_FACTOR = [1] 133 | # group 134 | _C.TEST.DETECTION_THRESHOLD = 0.2 135 | _C.TEST.TAG_THRESHOLD = 1. 136 | _C.TEST.USE_DETECTION_VAL = True 137 | _C.TEST.IGNORE_TOO_MUCH = False 138 | _C.TEST.MODEL_FILE = '' 139 | _C.TEST.IGNORE_CENTER = True 140 | _C.TEST.NMS_KERNEL = 3 141 | _C.TEST.NMS_PADDING = 1 142 | _C.TEST.PROJECT2IMAGE = False 143 | 144 | _C.TEST.WITH_HEATMAPS = (True,) 145 | _C.TEST.WITH_AE = (True,) 146 | 147 | _C.TEST.LOG_PROGRESS = False 148 | 149 | # debug 150 | _C.DEBUG = CN() 151 | _C.DEBUG.DEBUG = True 152 | _C.DEBUG.SAVE_BATCH_IMAGES_GT = False 153 | _C.DEBUG.SAVE_BATCH_IMAGES_PRED = False 154 | _C.DEBUG.SAVE_HEATMAPS_GT = True 155 | _C.DEBUG.SAVE_HEATMAPS_PRED = True 156 | _C.DEBUG.SAVE_TAGMAPS_PRED = True 157 | 158 | 159 | def update_config(cfg, args): 160 | cfg.defrost() 161 | cfg.merge_from_file(args.cfg) 162 | cfg.merge_from_list(args.opts) 163 | 164 | if not os.path.exists(cfg.DATASET.ROOT): 165 | cfg.DATASET.ROOT = os.path.join( 166 | cfg.DATA_DIR, cfg.DATASET.ROOT 167 | ) 168 | 169 | cfg.MODEL.PRETRAINED = os.path.join( 170 | cfg.DATA_DIR, cfg.MODEL.PRETRAINED 171 | ) 172 | 173 | if cfg.TEST.MODEL_FILE: 174 | cfg.TEST.MODEL_FILE = os.path.join( 175 | cfg.DATA_DIR, cfg.TEST.MODEL_FILE 176 | ) 177 | 178 | if cfg.DATASET.WITH_CENTER: 179 | cfg.DATASET.NUM_JOINTS += 1 180 | cfg.MODEL.NUM_JOINTS = cfg.DATASET.NUM_JOINTS 181 | 182 | if not isinstance(cfg.DATASET.OUTPUT_SIZE, (list, tuple)): 183 | cfg.DATASET.OUTPUT_SIZE = [cfg.DATASET.OUTPUT_SIZE] 184 | if not isinstance(cfg.LOSS.WITH_HEATMAPS_LOSS, (list, tuple)): 185 | cfg.LOSS.WITH_HEATMAPS_LOSS = (cfg.LOSS.WITH_HEATMAPS_LOSS) 186 | 187 | if not isinstance(cfg.LOSS.HEATMAPS_LOSS_FACTOR, (list, tuple)): 188 | cfg.LOSS.HEATMAPS_LOSS_FACTOR = (cfg.LOSS.HEATMAPS_LOSS_FACTOR) 189 | 190 | if not isinstance(cfg.LOSS.WITH_AE_LOSS, (list, tuple)): 191 | cfg.LOSS.WITH_AE_LOSS = (cfg.LOSS.WITH_AE_LOSS) 192 | 193 | if not isinstance(cfg.LOSS.PUSH_LOSS_FACTOR, (list, tuple)): 194 | cfg.LOSS.PUSH_LOSS_FACTOR = (cfg.LOSS.PUSH_LOSS_FACTOR) 195 | 196 | if not isinstance(cfg.LOSS.PULL_LOSS_FACTOR, (list, tuple)): 197 | cfg.LOSS.PULL_LOSS_FACTOR = (cfg.LOSS.PULL_LOSS_FACTOR) 198 | 199 | cfg.freeze() 200 | 201 | 202 | def check_config(cfg): 203 | assert cfg.LOSS.NUM_STAGES == len(cfg.LOSS.WITH_HEATMAPS_LOSS), \ 204 | 'LOSS.NUM_SCALE should be the same as the length of LOSS.WITH_HEATMAPS_LOSS' 205 | assert cfg.LOSS.NUM_STAGES == len(cfg.LOSS.HEATMAPS_LOSS_FACTOR), \ 206 | 'LOSS.NUM_SCALE should be the same as the length of LOSS.HEATMAPS_LOSS_FACTOR' 207 | assert cfg.LOSS.NUM_STAGES == len(cfg.LOSS.WITH_AE_LOSS), \ 208 | 'LOSS.NUM_SCALE should be the same as the length of LOSS.WITH_AE_LOSS' 209 | assert cfg.LOSS.NUM_STAGES == len(cfg.LOSS.PUSH_LOSS_FACTOR), \ 210 | 'LOSS.NUM_SCALE should be the same as the length of LOSS.PUSH_LOSS_FACTOR' 211 | assert cfg.LOSS.NUM_STAGES == len(cfg.LOSS.PULL_LOSS_FACTOR), \ 212 | 'LOSS.NUM_SCALE should be the same as the length of LOSS.PULL_LOSS_FACTOR' 213 | assert cfg.LOSS.NUM_STAGES == len(cfg.TEST.WITH_HEATMAPS), \ 214 | 'LOSS.NUM_SCALE should be the same as the length of TEST.WITH_HEATMAPS' 215 | assert cfg.LOSS.NUM_STAGES == len(cfg.TEST.WITH_AE), \ 216 | 'LOSS.NUM_SCALE should be the same as the length of TEST.WITH_AE' 217 | 218 | 219 | if __name__ == '__main__': 220 | import sys 221 | with open(sys.argv[1], 'w') as f: 222 | print(_C, file=f) 223 | -------------------------------------------------------------------------------- /lib/config/models.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from yacs.config import CfgNode as CN 12 | 13 | 14 | # pose_multi_resoluton_net related params 15 | POSE_HIGHER_RESOLUTION_NET = CN() 16 | POSE_HIGHER_RESOLUTION_NET.PRETRAINED_LAYERS = ['*'] 17 | POSE_HIGHER_RESOLUTION_NET.STEM_INPLANES = 64 18 | POSE_HIGHER_RESOLUTION_NET.FINAL_CONV_KERNEL = 1 19 | 20 | POSE_HIGHER_RESOLUTION_NET.STAGE1 = CN() 21 | POSE_HIGHER_RESOLUTION_NET.STAGE1.NUM_MODULES = 1 22 | POSE_HIGHER_RESOLUTION_NET.STAGE1.NUM_BRANCHES = 1 23 | POSE_HIGHER_RESOLUTION_NET.STAGE1.NUM_BLOCKS = [4] 24 | POSE_HIGHER_RESOLUTION_NET.STAGE1.NUM_CHANNELS = [64] 25 | POSE_HIGHER_RESOLUTION_NET.STAGE1.BLOCK = 'BOTTLENECK' 26 | POSE_HIGHER_RESOLUTION_NET.STAGE1.FUSE_METHOD = 'SUM' 27 | 28 | POSE_HIGHER_RESOLUTION_NET.STAGE2 = CN() 29 | POSE_HIGHER_RESOLUTION_NET.STAGE2.NUM_MODULES = 1 30 | POSE_HIGHER_RESOLUTION_NET.STAGE2.NUM_BRANCHES = 2 31 | POSE_HIGHER_RESOLUTION_NET.STAGE2.NUM_BLOCKS = [4, 4] 32 | POSE_HIGHER_RESOLUTION_NET.STAGE2.NUM_CHANNELS = [24, 48] 33 | POSE_HIGHER_RESOLUTION_NET.STAGE2.BLOCK = 'BOTTLENECK' 34 | POSE_HIGHER_RESOLUTION_NET.STAGE2.FUSE_METHOD = 'SUM' 35 | 36 | POSE_HIGHER_RESOLUTION_NET.STAGE3 = CN() 37 | POSE_HIGHER_RESOLUTION_NET.STAGE3.NUM_MODULES = 1 38 | POSE_HIGHER_RESOLUTION_NET.STAGE3.NUM_BRANCHES = 3 39 | POSE_HIGHER_RESOLUTION_NET.STAGE3.NUM_BLOCKS = [4, 4, 4] 40 | POSE_HIGHER_RESOLUTION_NET.STAGE3.NUM_CHANNELS = [24, 48, 92] 41 | POSE_HIGHER_RESOLUTION_NET.STAGE3.BLOCK = 'BOTTLENECK' 42 | POSE_HIGHER_RESOLUTION_NET.STAGE3.FUSE_METHOD = 'SUM' 43 | 44 | POSE_HIGHER_RESOLUTION_NET.STAGE4 = CN() 45 | POSE_HIGHER_RESOLUTION_NET.STAGE4.NUM_MODULES = 1 46 | POSE_HIGHER_RESOLUTION_NET.STAGE4.NUM_BRANCHES = 4 47 | POSE_HIGHER_RESOLUTION_NET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] 48 | POSE_HIGHER_RESOLUTION_NET.STAGE4.NUM_CHANNELS = [24, 48, 92, 192] 49 | POSE_HIGHER_RESOLUTION_NET.STAGE4.BLOCK = 'BOTTLENECK' 50 | POSE_HIGHER_RESOLUTION_NET.STAGE4.FUSE_METHOD = 'SUM' 51 | 52 | POSE_HIGHER_RESOLUTION_NET.DECONV = CN() 53 | POSE_HIGHER_RESOLUTION_NET.DECONV.NUM_DCONVS = 2 54 | POSE_HIGHER_RESOLUTION_NET.DECONV.NUM_CHANNELS = [32, 32] 55 | POSE_HIGHER_RESOLUTION_NET.DECONV.NUM_BASIC_BLOCKS = 4 56 | POSE_HIGHER_RESOLUTION_NET.DECONV.KERNEL_SIZE = [2, 2] 57 | POSE_HIGHER_RESOLUTION_NET.DECONV.CAT_OUTPUT = [True, True] 58 | 59 | 60 | MODEL_EXTRAS = { 61 | 'pose_multi_resolution_net_v16': POSE_HIGHER_RESOLUTION_NET, 62 | } 63 | -------------------------------------------------------------------------------- /lib/core/__pycache__/group.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/core/__pycache__/group.cpython-36.pyc -------------------------------------------------------------------------------- /lib/core/__pycache__/inference.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/core/__pycache__/inference.cpython-36.pyc -------------------------------------------------------------------------------- /lib/core/__pycache__/loss.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/core/__pycache__/loss.cpython-36.pyc -------------------------------------------------------------------------------- /lib/core/__pycache__/trainer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/core/__pycache__/trainer.cpython-36.pyc -------------------------------------------------------------------------------- /lib/core/group.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Some code is from https://github.com/princeton-vl/pose-ae-train/blob/454d4ba113bbb9775d4dc259ef5e6c07c2ceed54/utils/group.py 5 | # Written by Bin Xiao (leoxiaobin@gmail.com) 6 | # Modified by Bowen Cheng (bcheng9@illinois.edu) 7 | # ------------------------------------------------------------------------------ 8 | 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | from munkres import Munkres 14 | import numpy as np 15 | import torch 16 | 17 | 18 | def py_max_match(scores): 19 | m = Munkres() 20 | tmp = m.compute(scores) 21 | tmp = np.array(tmp).astype(np.int32) 22 | return tmp 23 | 24 | 25 | def match_by_tag(inp, params): 26 | assert isinstance(params, Params), 'params should be class Params()' 27 | 28 | tag_k, loc_k, val_k = inp 29 | default_ = np.zeros((params.num_joints, 3 + tag_k.shape[2])) 30 | 31 | joint_dict = {} 32 | tag_dict = {} 33 | for i in range(params.num_joints): 34 | idx = params.joint_order[i] 35 | 36 | tags = tag_k[idx] 37 | joints = np.concatenate( 38 | (loc_k[idx], val_k[idx, :, None], tags), 1 39 | ) 40 | mask = joints[:, 2] > params.detection_threshold 41 | tags = tags[mask] 42 | joints = joints[mask] 43 | 44 | if joints.shape[0] == 0: 45 | continue 46 | 47 | if i == 0 or len(joint_dict) == 0: 48 | for tag, joint in zip(tags, joints): 49 | key = tag[0] 50 | joint_dict.setdefault(key, np.copy(default_))[idx] = joint 51 | tag_dict[key] = [tag] 52 | else: 53 | grouped_keys = list(joint_dict.keys())[:params.max_num_people] 54 | grouped_tags = [np.mean(tag_dict[i], axis=0) for i in grouped_keys] 55 | 56 | if params.ignore_too_much \ 57 | and len(grouped_keys) == params.max_num_people: 58 | continue 59 | 60 | diff = joints[:, None, 3:] - np.array(grouped_tags)[None, :, :] 61 | diff_normed = np.linalg.norm(diff, ord=2, axis=2) 62 | diff_saved = np.copy(diff_normed) 63 | 64 | if params.use_detection_val: 65 | diff_normed = np.round(diff_normed) * 100 - joints[:, 2:3] 66 | 67 | num_added = diff.shape[0] 68 | num_grouped = diff.shape[1] 69 | 70 | if num_added > num_grouped: 71 | diff_normed = np.concatenate( 72 | ( 73 | diff_normed, 74 | np.zeros((num_added, num_added-num_grouped))+1e10 75 | ), 76 | axis=1 77 | ) 78 | 79 | pairs = py_max_match(diff_normed) 80 | for row, col in pairs: 81 | if ( 82 | row < num_added 83 | and col < num_grouped 84 | and diff_saved[row][col] < params.tag_threshold 85 | ): 86 | key = grouped_keys[col] 87 | joint_dict[key][idx] = joints[row] 88 | tag_dict[key].append(tags[row]) 89 | else: 90 | key = tags[row][0] 91 | joint_dict.setdefault(key, np.copy(default_))[idx] = \ 92 | joints[row] 93 | tag_dict[key] = [tags[row]] 94 | 95 | ans = np.array([joint_dict[i] for i in joint_dict]).astype(np.float32) 96 | return ans 97 | 98 | 99 | class Params(object): 100 | def __init__(self, cfg): 101 | self.num_joints = cfg.DATASET.NUM_JOINTS 102 | self.max_num_people = cfg.DATASET.MAX_NUM_PEOPLE 103 | 104 | self.detection_threshold = cfg.TEST.DETECTION_THRESHOLD 105 | self.tag_threshold = cfg.TEST.TAG_THRESHOLD 106 | self.use_detection_val = cfg.TEST.USE_DETECTION_VAL 107 | self.ignore_too_much = cfg.TEST.IGNORE_TOO_MUCH 108 | 109 | if cfg.DATASET.WITH_CENTER and cfg.TEST.IGNORE_CENTER: 110 | self.num_joints -= 1 111 | 112 | if cfg.DATASET.WITH_CENTER and not cfg.TEST.IGNORE_CENTER: 113 | self.joint_order = [ 114 | i-1 for i in [18, 1, 2, 3, 4, 5, 6, 7, 12, 13, 8, 9, 10, 11, 14, 15, 16, 17] 115 | ] 116 | else: 117 | self.joint_order = [ 118 | i-1 for i in [1, 2, 3, 4, 5, 6, 7, 12, 13, 8, 9, 10, 11, 14, 15, 16, 17] 119 | ] 120 | 121 | 122 | class HeatmapParser(object): 123 | def __init__(self, cfg): 124 | self.params = Params(cfg) 125 | self.tag_per_joint = cfg.MODEL.TAG_PER_JOINT 126 | self.pool = torch.nn.MaxPool2d( 127 | cfg.TEST.NMS_KERNEL, 1, cfg.TEST.NMS_PADDING 128 | ) 129 | 130 | def nms(self, det): 131 | maxm = self.pool(det) 132 | maxm = torch.eq(maxm, det).float() 133 | det = det * maxm 134 | return det 135 | 136 | def match(self, tag_k, loc_k, val_k): 137 | match = lambda x: match_by_tag(x, self.params) 138 | return list(map(match, zip(tag_k, loc_k, val_k))) 139 | 140 | def top_k(self, det, tag): 141 | # det = torch.Tensor(det, requires_grad=False) 142 | # tag = torch.Tensor(tag, requires_grad=False) 143 | 144 | det = self.nms(det) 145 | num_images = det.size(0) 146 | num_joints = det.size(1) 147 | h = det.size(2) 148 | w = det.size(3) 149 | det = det.view(num_images, num_joints, -1) 150 | val_k, ind = det.topk(self.params.max_num_people, dim=2) 151 | 152 | tag = tag.view(tag.size(0), tag.size(1), w*h, -1) 153 | if not self.tag_per_joint: 154 | tag = tag.expand(-1, self.params.num_joints, -1, -1) 155 | 156 | tag_k = torch.stack( 157 | [ 158 | torch.gather(tag[:, :, :, i], 2, ind) 159 | for i in range(tag.size(3)) 160 | ], 161 | dim=3 162 | ) 163 | 164 | x = ind % w 165 | y = (ind // w).long() 166 | 167 | ind_k = torch.stack((x, y), dim=3) 168 | 169 | ans = { 170 | 'tag_k': tag_k.cpu().numpy(), 171 | 'loc_k': ind_k.cpu().numpy(), 172 | 'val_k': val_k.cpu().numpy() 173 | } 174 | 175 | return ans 176 | 177 | def adjust(self, ans, det): 178 | for batch_id, people in enumerate(ans): 179 | for people_id, i in enumerate(people): 180 | for joint_id, joint in enumerate(i): 181 | if joint[2] > 0: 182 | y, x = joint[0:2] 183 | xx, yy = int(x), int(y) 184 | #print(batch_id, joint_id, det[batch_id].shape) 185 | tmp = det[batch_id][joint_id] 186 | if tmp[xx, min(yy+1, tmp.shape[1]-1)] > tmp[xx, max(yy-1, 0)]: 187 | y += 0.25 188 | else: 189 | y -= 0.25 190 | 191 | if tmp[min(xx+1, tmp.shape[0]-1), yy] > tmp[max(0, xx-1), yy]: 192 | x += 0.25 193 | else: 194 | x -= 0.25 195 | ans[batch_id][people_id, joint_id, 0:2] = (y+0.5, x+0.5) 196 | return ans 197 | 198 | def refine(self, det, tag, keypoints): 199 | """ 200 | Given initial keypoint predictions, we identify missing joints 201 | :param det: numpy.ndarray of size (17, 128, 128) 202 | :param tag: numpy.ndarray of size (17, 128, 128) if not flip 203 | :param keypoints: numpy.ndarray of size (17, 4) if not flip, last dim is (x, y, det score, tag score) 204 | :return: 205 | """ 206 | if len(tag.shape) == 3: 207 | # tag shape: (17, 128, 128, 1) 208 | tag = tag[:, :, :, None] 209 | 210 | tags = [] 211 | for i in range(keypoints.shape[0]): 212 | if keypoints[i, 2] > 0: 213 | # save tag value of detected keypoint 214 | x, y = keypoints[i][:2].astype(np.int32) 215 | tags.append(tag[i, y, x]) 216 | 217 | # mean tag of current detected people 218 | prev_tag = np.mean(tags, axis=0) 219 | ans = [] 220 | 221 | for i in range(keypoints.shape[0]): 222 | # score of joints i at all position 223 | tmp = det[i, :, :] 224 | # distance of all tag values with mean tag of current detected people 225 | tt = (((tag[i, :, :] - prev_tag[None, None, :]) ** 2).sum(axis=2) ** 0.5) 226 | tmp2 = tmp - np.round(tt) 227 | 228 | # find maximum position 229 | y, x = np.unravel_index(np.argmax(tmp2), tmp.shape) 230 | xx = x 231 | yy = y 232 | # detection score at maximum position 233 | val = tmp[y, x] 234 | # offset by 0.5 235 | x += 0.5 236 | y += 0.5 237 | 238 | # add a quarter offset 239 | if tmp[yy, min(xx + 1, tmp.shape[1] - 1)] > tmp[yy, max(xx - 1, 0)]: 240 | x += 0.25 241 | else: 242 | x -= 0.25 243 | 244 | if tmp[min(yy + 1, tmp.shape[0] - 1), xx] > tmp[max(0, yy - 1), xx]: 245 | y += 0.25 246 | else: 247 | y -= 0.25 248 | 249 | ans.append((x, y, val)) 250 | ans = np.array(ans) 251 | 252 | if ans is not None: 253 | for i in range(det.shape[0]): 254 | # add keypoint if it is not detected 255 | if ans[i, 2] > 0 and keypoints[i, 2] == 0: 256 | # if ans[i, 2] > 0.01 and keypoints[i, 2] == 0: 257 | keypoints[i, :2] = ans[i, :2] 258 | keypoints[i, 2] = ans[i, 2] 259 | 260 | return keypoints 261 | 262 | def parse(self, det, tag, adjust=True, refine=True): 263 | ans = self.match(**self.top_k(det, tag)) 264 | 265 | if adjust: 266 | ans = self.adjust(ans, det) 267 | 268 | scores = [i[:, 2].mean() for i in ans[0]] 269 | 270 | if refine: 271 | ans = ans[0] 272 | # for every detected person 273 | for i in range(len(ans)): 274 | det_numpy = det[0].cpu().numpy() 275 | tag_numpy = tag[0].cpu().numpy() 276 | if not self.tag_per_joint: 277 | tag_numpy = np.tile( 278 | tag_numpy, (self.params.num_joints, 1, 1, 1) 279 | ) 280 | ans[i] = self.refine(det_numpy, tag_numpy, ans[i]) 281 | ans = [ans] 282 | 283 | return ans, scores 284 | -------------------------------------------------------------------------------- /lib/core/inference.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # Modified by Bowen Cheng (bcheng9@illinois.edu) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | 13 | import torch 14 | 15 | from dataset.transforms import FLIP_CONFIG 16 | 17 | 18 | def get_outputs( 19 | cfg, model, image, with_flip=False, 20 | project2image=False, size_projected=None 21 | ): 22 | outputs = [] 23 | heatmaps = [] 24 | tags = [] 25 | 26 | outputs.append(model(image)) 27 | heatmaps.append(outputs[-1][:, :cfg.DATASET.NUM_JOINTS]) 28 | tags.append(outputs[-1][:, cfg.DATASET.NUM_JOINTS:]) 29 | 30 | if with_flip: 31 | outputs.append(model(torch.flip(image, [3]))) 32 | outputs[-1] = torch.flip(outputs[-1], [3]) 33 | heatmaps.append(outputs[-1][:, :cfg.DATASET.NUM_JOINTS]) 34 | tags.append(outputs[-1][:, cfg.DATASET.NUM_JOINTS:]) 35 | if 'coco' in cfg.DATASET.DATASET: 36 | dataset_name = 'COCO' 37 | elif 'crowd_pose' in cfg.DATASET.DATASET: 38 | dataset_name = 'CROWDPOSE' 39 | else: 40 | raise ValueError('Please implement flip_index for new dataset: %s.' % cfg.DATASET.DATASET) 41 | flip_index = FLIP_CONFIG[dataset_name + '_WITH_CENTER'] \ 42 | if cfg.DATASET.WITH_CENTER else FLIP_CONFIG[dataset_name] 43 | heatmaps[-1] = heatmaps[-1][:, flip_index, :, :] 44 | if cfg.MODEL.TAG_PER_JOINT: 45 | tags[-1] = tags[-1][:, flip_index, :, :] 46 | 47 | if cfg.DATASET.WITH_CENTER and cfg.TEST.IGNORE_CENTER: 48 | heatmaps = [hms[:, :-1] for hms in heatmaps] 49 | tags = [tms[:, :-1] for tms in tags] 50 | 51 | if project2image and size_projected: 52 | heatmaps = [ 53 | torch.nn.functional.interpolate( 54 | hms, 55 | size=(size_projected[1], size_projected[0]), 56 | mode='bilinear', 57 | align_corners=False 58 | ) 59 | for hms in heatmaps 60 | ] 61 | 62 | tags = [ 63 | torch.nn.functional.interpolate( 64 | tms, 65 | size=(size_projected[1], size_projected[0]), 66 | mode='bilinear', 67 | align_corners=False 68 | ) 69 | for tms in tags 70 | ] 71 | 72 | return outputs, heatmaps, tags 73 | 74 | 75 | def get_multi_stage_outputs( 76 | cfg, model, image, with_flip=False, 77 | project2image=False, size_projected=None 78 | ): 79 | # outputs = [] 80 | heatmaps_avg = 0 81 | num_heatmaps = 0 82 | heatmaps = [] 83 | tags = [] 84 | 85 | outputs = model(image) 86 | for i, output in enumerate(outputs): 87 | if len(outputs) > 1 and i != len(outputs) - 1: 88 | output = torch.nn.functional.interpolate( 89 | output, 90 | size=(outputs[-1].size(2), outputs[-1].size(3)), 91 | mode='bilinear', 92 | align_corners=False 93 | ) 94 | 95 | offset_feat = cfg.DATASET.NUM_JOINTS \ 96 | if cfg.LOSS.WITH_HEATMAPS_LOSS[i] else 0 97 | 98 | if cfg.LOSS.WITH_HEATMAPS_LOSS[i] and cfg.TEST.WITH_HEATMAPS[i]: 99 | heatmaps_avg += output[:, :cfg.DATASET.NUM_JOINTS] 100 | num_heatmaps += 1 101 | 102 | if cfg.LOSS.WITH_AE_LOSS[i] and cfg.TEST.WITH_AE[i]: 103 | tags.append(output[:, offset_feat:]) 104 | 105 | if num_heatmaps > 0: 106 | heatmaps.append(heatmaps_avg/num_heatmaps) 107 | 108 | if with_flip: 109 | if 'coco' in cfg.DATASET.DATASET: 110 | dataset_name = 'COCO' 111 | elif 'crowd_pose' in cfg.DATASET.DATASET: 112 | dataset_name = 'CROWDPOSE' 113 | else: 114 | raise ValueError('Please implement flip_index for new dataset: %s.' % cfg.DATASET.DATASET) 115 | flip_index = FLIP_CONFIG[dataset_name + '_WITH_CENTER'] \ 116 | if cfg.DATASET.WITH_CENTER else FLIP_CONFIG[dataset_name] 117 | 118 | heatmaps_avg = 0 119 | num_heatmaps = 0 120 | outputs_flip = model(torch.flip(image, [3])) 121 | for i in range(len(outputs_flip)): 122 | output = outputs_flip[i] 123 | if len(outputs_flip) > 1 and i != len(outputs_flip) - 1: 124 | output = torch.nn.functional.interpolate( 125 | output, 126 | size=(outputs_flip[-1].size(2), outputs_flip[-1].size(3)), 127 | mode='bilinear', 128 | align_corners=False 129 | ) 130 | output = torch.flip(output, [3]) 131 | outputs.append(output) 132 | 133 | offset_feat = cfg.DATASET.NUM_JOINTS \ 134 | if cfg.LOSS.WITH_HEATMAPS_LOSS[i] else 0 135 | 136 | if cfg.LOSS.WITH_HEATMAPS_LOSS[i] and cfg.TEST.WITH_HEATMAPS[i]: 137 | heatmaps_avg += \ 138 | output[:, :cfg.DATASET.NUM_JOINTS][:, flip_index, :, :] 139 | num_heatmaps += 1 140 | 141 | if cfg.LOSS.WITH_AE_LOSS[i] and cfg.TEST.WITH_AE[i]: 142 | tags.append(output[:, offset_feat:]) 143 | if cfg.MODEL.TAG_PER_JOINT: 144 | tags[-1] = tags[-1][:, flip_index, :, :] 145 | 146 | heatmaps.append(heatmaps_avg/num_heatmaps) 147 | 148 | if cfg.DATASET.WITH_CENTER and cfg.TEST.IGNORE_CENTER: 149 | heatmaps = [hms[:, :-1] for hms in heatmaps] 150 | tags = [tms[:, :-1] for tms in tags] 151 | 152 | if project2image and size_projected: 153 | heatmaps = [ 154 | torch.nn.functional.interpolate( 155 | hms, 156 | size=(size_projected[1], size_projected[0]), 157 | mode='bilinear', 158 | align_corners=False 159 | ) 160 | for hms in heatmaps 161 | ] 162 | 163 | tags = [ 164 | torch.nn.functional.interpolate( 165 | tms, 166 | size=(size_projected[1], size_projected[0]), 167 | mode='bilinear', 168 | align_corners=False 169 | ) 170 | for tms in tags 171 | ] 172 | 173 | return outputs, heatmaps, tags 174 | 175 | 176 | def aggregate_results( 177 | cfg, scale_factor, final_heatmaps, tags_list, heatmaps, tags 178 | ): 179 | if scale_factor == 1 or len(cfg.TEST.SCALE_FACTOR) == 1: 180 | if final_heatmaps is not None and not cfg.TEST.PROJECT2IMAGE: 181 | tags = [ 182 | torch.nn.functional.interpolate( 183 | tms, 184 | size=(final_heatmaps.size(2), final_heatmaps.size(3)), 185 | mode='bilinear', 186 | align_corners=False 187 | ) 188 | for tms in tags 189 | ] 190 | for tms in tags: 191 | tags_list.append(torch.unsqueeze(tms, dim=4)) 192 | 193 | heatmaps_avg = (heatmaps[0] + heatmaps[1])/2.0 if cfg.TEST.FLIP_TEST \ 194 | else heatmaps[0] 195 | 196 | if final_heatmaps is None: 197 | final_heatmaps = heatmaps_avg 198 | elif cfg.TEST.PROJECT2IMAGE: 199 | final_heatmaps += heatmaps_avg 200 | else: 201 | final_heatmaps += torch.nn.functional.interpolate( 202 | heatmaps_avg, 203 | size=(final_heatmaps.size(2), final_heatmaps.size(3)), 204 | mode='bilinear', 205 | align_corners=False 206 | ) 207 | 208 | return final_heatmaps, tags_list 209 | -------------------------------------------------------------------------------- /lib/core/trainer.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # Modified by Bowen Cheng (bcheng9@illinois.edu) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import logging 13 | import os 14 | import time 15 | 16 | from utils.utils import AverageMeter 17 | from utils.vis import save_debug_images 18 | from ptflops import get_model_complexity_info 19 | 20 | 21 | def do_train(cfg, model, data_loader, loss_factory, optimizer, epoch, 22 | output_dir, tb_log_dir, writer_dict, fp16=False): 23 | logger = logging.getLogger("Training") 24 | 25 | batch_time = AverageMeter() 26 | data_time = AverageMeter() 27 | 28 | heatmaps_loss_meter = [AverageMeter() for _ in range(cfg.LOSS.NUM_STAGES)] 29 | push_loss_meter = [AverageMeter() for _ in range(cfg.LOSS.NUM_STAGES)] 30 | pull_loss_meter = [AverageMeter() for _ in range(cfg.LOSS.NUM_STAGES)] 31 | 32 | # switch to train mode 33 | model.train() 34 | #flops, params = get_model_complexity_info(model.module.features, (3,384,384), as_strings=False) 35 | #print("FLops", flops) 36 | #print("Params", params) 37 | 38 | 39 | for layer in model.module.features: 40 | for param in layer.parameters(): 41 | param.requires_grad = False 42 | 43 | end = time.time() 44 | for i, (images, heatmaps, masks, joints) in enumerate(data_loader): 45 | # measure data loading time 46 | data_time.update(time.time() - end) 47 | 48 | # compute output 49 | outputs = model(images) 50 | 51 | heatmaps = list(map(lambda x: x.cuda(non_blocking=True), heatmaps)) 52 | masks = list(map(lambda x: x.cuda(non_blocking=True), masks)) 53 | joints = list(map(lambda x: x.cuda(non_blocking=True), joints)) 54 | 55 | # loss = loss_factory(outputs, heatmaps, masks) 56 | heatmaps_losses, push_losses, pull_losses = \ 57 | loss_factory(outputs, heatmaps, masks, joints) 58 | 59 | loss = 0 60 | for idx in range(cfg.LOSS.NUM_STAGES): 61 | if heatmaps_losses[idx] is not None: 62 | heatmaps_loss = heatmaps_losses[idx].mean(dim=0) 63 | heatmaps_loss_meter[idx].update( 64 | heatmaps_loss.item(), images.size(0) 65 | ) 66 | loss = loss + heatmaps_loss 67 | if push_losses[idx] is not None: 68 | push_loss = push_losses[idx].mean(dim=0) 69 | push_loss_meter[idx].update( 70 | push_loss.item(), images.size(0) 71 | ) 72 | loss = loss + push_loss 73 | if pull_losses[idx] is not None: 74 | pull_loss = pull_losses[idx].mean(dim=0) 75 | pull_loss_meter[idx].update( 76 | pull_loss.item(), images.size(0) 77 | ) 78 | loss = loss + pull_loss 79 | 80 | # compute gradient and do update step 81 | optimizer.zero_grad() 82 | if fp16: 83 | optimizer.backward(loss) 84 | else: 85 | loss.backward() 86 | optimizer.step() 87 | 88 | # measure elapsed time 89 | batch_time.update(time.time() - end) 90 | end = time.time() 91 | 92 | if i % cfg.PRINT_FREQ == 0 and cfg.RANK == 0: 93 | msg = 'Epoch: [{0}][{1}/{2}]\t' \ 94 | 'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 95 | 'Speed: {speed:.1f} samples/s\t' \ 96 | 'Data: {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 97 | '{heatmaps_loss}{push_loss}{pull_loss}'.format( 98 | epoch, i, len(data_loader), 99 | batch_time=batch_time, 100 | speed=images.size(0)/batch_time.val, 101 | data_time=data_time, 102 | heatmaps_loss=_get_loss_info(heatmaps_loss_meter, 'heatmaps'), 103 | push_loss=_get_loss_info(push_loss_meter, 'push'), 104 | pull_loss=_get_loss_info(pull_loss_meter, 'pull') 105 | ) 106 | logger.info(msg) 107 | 108 | writer = writer_dict['writer'] 109 | global_steps = writer_dict['train_global_steps'] 110 | for idx in range(cfg.LOSS.NUM_STAGES): 111 | writer.add_scalar( 112 | 'train_stage{}_heatmaps_loss'.format(i), 113 | heatmaps_loss_meter[idx].val, 114 | global_steps 115 | ) 116 | writer.add_scalar( 117 | 'train_stage{}_push_loss'.format(idx), 118 | push_loss_meter[idx].val, 119 | global_steps 120 | ) 121 | writer.add_scalar( 122 | 'train_stage{}_pull_loss'.format(idx), 123 | pull_loss_meter[idx].val, 124 | global_steps 125 | ) 126 | writer_dict['train_global_steps'] = global_steps + 1 127 | 128 | prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) 129 | for scale_idx in range(len(outputs)): 130 | prefix_scale = prefix + '_output_{}'.format( 131 | cfg.DATASET.OUTPUT_SIZE[scale_idx] 132 | ) 133 | save_debug_images( 134 | cfg, images, heatmaps[scale_idx], masks[scale_idx], 135 | outputs[scale_idx], prefix_scale 136 | ) 137 | 138 | 139 | def _get_loss_info(loss_meters, loss_name): 140 | msg = '' 141 | for i, meter in enumerate(loss_meters): 142 | msg += 'Stage{i}-{name}: {meter.val:.3e} ({meter.avg:.3e})\t'.format( 143 | i=i, name=loss_name, meter=meter 144 | ) 145 | 146 | return msg 147 | -------------------------------------------------------------------------------- /lib/dataset/COCODataset.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # Modified by Bowen Cheng (bcheng9@illinois.edu) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | from collections import defaultdict 13 | from collections import OrderedDict 14 | import logging 15 | import os 16 | import os.path 17 | 18 | import cv2 19 | import json_tricks as json 20 | import numpy as np 21 | from torch.utils.data import Dataset 22 | 23 | from pycocotools.cocoeval import COCOeval 24 | from utils import zipreader 25 | 26 | logger = logging.getLogger(__name__) 27 | 28 | 29 | class CocoDataset(Dataset): 30 | """`MS Coco Detection `_ Dataset. 31 | 32 | Args: 33 | root (string): Root directory where dataset is located to. 34 | dataset (string): Dataset name(train2017, val2017, test2017). 35 | data_format(string): Data format for reading('jpg', 'zip') 36 | transform (callable, optional): A function/transform that takes in an opencv image 37 | and returns a transformed version. E.g, ``transforms.ToTensor`` 38 | target_transform (callable, optional): A function/transform that takes in the 39 | target and transforms it. 40 | """ 41 | 42 | def __init__(self, root, dataset, data_format, transform=None, 43 | target_transform=None): 44 | from pycocotools.coco import COCO 45 | self.name = 'COCO' 46 | self.root = root 47 | self.dataset = dataset 48 | self.data_format = data_format 49 | self.coco = COCO(self._get_anno_file_name()) 50 | self.ids = list(self.coco.imgs.keys()) 51 | self.transform = transform 52 | self.target_transform = target_transform 53 | 54 | cats = [cat['name'] 55 | for cat in self.coco.loadCats(self.coco.getCatIds())] 56 | self.classes = ['__background__'] + cats 57 | logger.info('=> classes: {}'.format(self.classes)) 58 | self.num_classes = len(self.classes) 59 | self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) 60 | self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) 61 | self._coco_ind_to_class_ind = dict( 62 | [ 63 | (self._class_to_coco_ind[cls], self._class_to_ind[cls]) 64 | for cls in self.classes[1:] 65 | ] 66 | ) 67 | 68 | def _get_anno_file_name(self): 69 | # example: root/annotations/person_keypoints_tran2017.json 70 | # image_info_test-dev2017.json 71 | if 'test' in self.dataset: 72 | return os.path.join( 73 | self.root, 74 | 'annotations', 75 | 'image_info_{}.json'.format( 76 | self.dataset 77 | ) 78 | ) 79 | else: 80 | return os.path.join( 81 | self.root, 82 | 'annotations', 83 | 'person_keypoints_{}.json'.format( 84 | self.dataset 85 | ) 86 | ) 87 | 88 | def _get_image_path(self, file_name): 89 | images_dir = os.path.join(self.root, 'images') 90 | dataset = 'test2017' if 'test' in self.dataset else self.dataset 91 | if self.data_format == 'zip': 92 | return os.path.join(images_dir, dataset) + '.zip@' + file_name 93 | else: 94 | return os.path.join(images_dir, dataset, file_name) 95 | 96 | def __getitem__(self, index): 97 | """ 98 | Args: 99 | index (int): Index 100 | 101 | Returns: 102 | tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``. 103 | """ 104 | coco = self.coco 105 | img_id = self.ids[index] 106 | ann_ids = coco.getAnnIds(imgIds=img_id) 107 | target = coco.loadAnns(ann_ids) 108 | 109 | file_name = coco.loadImgs(img_id)[0]['file_name'] 110 | 111 | if self.data_format == 'zip': 112 | img = zipreader.imread( 113 | self._get_image_path(file_name), 114 | cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION 115 | ) 116 | else: 117 | img = cv2.imread( 118 | self._get_image_path(file_name), 119 | cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION 120 | ) 121 | 122 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 123 | 124 | if self.transform is not None: 125 | img = self.transform(img) 126 | 127 | if self.target_transform is not None: 128 | target = self.target_transform(target) 129 | 130 | return img, target 131 | 132 | def __len__(self): 133 | return len(self.ids) 134 | 135 | def __repr__(self): 136 | fmt_str = 'Dataset ' + self.__class__.__name__ + '\n' 137 | fmt_str += ' Number of datapoints: {}\n'.format(self.__len__()) 138 | fmt_str += ' Root Location: {}\n'.format(self.root) 139 | tmp = ' Transforms (if any): ' 140 | fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) 141 | tmp = ' Target Transforms (if any): ' 142 | fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) 143 | return fmt_str 144 | 145 | def processKeypoints(self, keypoints): 146 | tmp = keypoints.copy() 147 | if keypoints[:, 2].max() > 0: 148 | p = keypoints[keypoints[:, 2] > 0][:, :2].mean(axis=0) 149 | num_keypoints = keypoints.shape[0] 150 | for i in range(num_keypoints): 151 | tmp[i][0:3] = [ 152 | float(keypoints[i][0]), 153 | float(keypoints[i][1]), 154 | float(keypoints[i][2]) 155 | ] 156 | 157 | return tmp 158 | 159 | def evaluate(self, cfg, preds, scores, output_dir, 160 | *args, **kwargs): 161 | ''' 162 | Perform evaluation on COCO keypoint task 163 | :param cfg: cfg dictionary 164 | :param preds: prediction 165 | :param output_dir: output directory 166 | :param args: 167 | :param kwargs: 168 | :return: 169 | ''' 170 | res_folder = os.path.join(output_dir, 'results') 171 | if not os.path.exists(res_folder): 172 | os.makedirs(res_folder) 173 | res_file = os.path.join( 174 | res_folder, 'keypoints_%s_results.json' % self.dataset) 175 | 176 | # preds is a list of: image x person x (keypoints) 177 | # keypoints: num_joints * 4 (x, y, score, tag) 178 | kpts = defaultdict(list) 179 | for idx, _kpts in enumerate(preds): 180 | img_id = self.ids[idx] 181 | file_name = self.coco.loadImgs(img_id)[0]['file_name'] 182 | for idx_kpt, kpt in enumerate(_kpts): 183 | area = (np.max(kpt[:, 0]) - np.min(kpt[:, 0])) * (np.max(kpt[:, 1]) - np.min(kpt[:, 1])) 184 | kpt = self.processKeypoints(kpt) 185 | # if self.with_center: 186 | if cfg.DATASET.WITH_CENTER and not cfg.TEST.IGNORE_CENTER: 187 | kpt = kpt[:-1] 188 | 189 | kpts[int(file_name[-16:-4])].append( 190 | { 191 | 'keypoints': kpt[:, 0:3], 192 | 'score': scores[idx][idx_kpt], 193 | 'tags': kpt[:, 3], 194 | 'image': int(file_name[-16:-4]), 195 | 'area': area 196 | } 197 | ) 198 | 199 | # rescoring and oks nms 200 | oks_nmsed_kpts = [] 201 | # image x person x (keypoints) 202 | for img in kpts.keys(): 203 | # person x (keypoints) 204 | img_kpts = kpts[img] 205 | # person x (keypoints) 206 | # do not use nms, keep all detections 207 | keep = [] 208 | if len(keep) == 0: 209 | oks_nmsed_kpts.append(img_kpts) 210 | else: 211 | oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) 212 | 213 | self._write_coco_keypoint_results( 214 | oks_nmsed_kpts, res_file 215 | ) 216 | 217 | if 'test' not in self.dataset: 218 | info_str = self._do_python_keypoint_eval( 219 | res_file, res_folder 220 | ) 221 | name_value = OrderedDict(info_str) 222 | return name_value, name_value['AP'] 223 | else: 224 | return {'Null': 0}, 0 225 | 226 | def _write_coco_keypoint_results(self, keypoints, res_file): 227 | data_pack = [ 228 | { 229 | 'cat_id': self._class_to_coco_ind[cls], 230 | 'cls_ind': cls_ind, 231 | 'cls': cls, 232 | 'ann_type': 'keypoints', 233 | 'keypoints': keypoints 234 | } 235 | for cls_ind, cls in enumerate(self.classes) if not cls == '__background__' 236 | ] 237 | 238 | results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) 239 | logger.info('=> Writing results json to %s' % res_file) 240 | with open(res_file, 'w') as f: 241 | json.dump(results, f, sort_keys=True, indent=4) 242 | try: 243 | json.load(open(res_file)) 244 | except Exception: 245 | content = [] 246 | with open(res_file, 'r') as f: 247 | for line in f: 248 | content.append(line) 249 | content[-1] = ']' 250 | with open(res_file, 'w') as f: 251 | for c in content: 252 | f.write(c) 253 | 254 | def _coco_keypoint_results_one_category_kernel(self, data_pack): 255 | cat_id = data_pack['cat_id'] 256 | keypoints = data_pack['keypoints'] 257 | cat_results = [] 258 | num_joints = 17 259 | 260 | for img_kpts in keypoints: 261 | if len(img_kpts) == 0: 262 | continue 263 | 264 | _key_points = np.array( 265 | [img_kpts[k]['keypoints'] for k in range(len(img_kpts))] 266 | ) 267 | key_points = np.zeros( 268 | (_key_points.shape[0], num_joints * 3), 269 | dtype=np.float 270 | ) 271 | 272 | for ipt in range(num_joints): 273 | key_points[:, ipt * 3 + 0] = _key_points[:, ipt, 0] 274 | key_points[:, ipt * 3 + 1] = _key_points[:, ipt, 1] 275 | key_points[:, ipt * 3 + 2] = _key_points[:, ipt, 2] # keypoints score. 276 | 277 | for k in range(len(img_kpts)): 278 | kpt = key_points[k].reshape((num_joints, 3)) 279 | left_top = np.amin(kpt, axis=0) 280 | right_bottom = np.amax(kpt, axis=0) 281 | 282 | w = right_bottom[0] - left_top[0] 283 | h = right_bottom[1] - left_top[1] 284 | 285 | cat_results.append({ 286 | 'image_id': img_kpts[k]['image'], 287 | 'category_id': cat_id, 288 | 'keypoints': list(key_points[k]), 289 | 'score': img_kpts[k]['score'], 290 | 'bbox': list([left_top[0], left_top[1], w, h]) 291 | }) 292 | 293 | return cat_results 294 | 295 | def _do_python_keypoint_eval(self, res_file, res_folder): 296 | coco_dt = self.coco.loadRes(res_file) 297 | coco_eval = COCOeval(self.coco, coco_dt, 'keypoints') 298 | coco_eval.params.useSegm = None 299 | coco_eval.evaluate() 300 | coco_eval.accumulate() 301 | coco_eval.summarize() 302 | stats_names = ['AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)'] 303 | 304 | info_str = [] 305 | for ind, name in enumerate(stats_names): 306 | info_str.append((name, coco_eval.stats[ind])) 307 | # info_str.append(coco_eval.stats[ind]) 308 | 309 | return info_str 310 | -------------------------------------------------------------------------------- /lib/dataset/COCOKeypoints.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # Modified by Bowen Cheng (bcheng9@illinois.edu) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import logging 13 | 14 | import numpy as np 15 | 16 | import pycocotools 17 | from .COCODataset import CocoDataset 18 | from .target_generators import HeatmapGenerator 19 | 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | 24 | class CocoKeypoints(CocoDataset): 25 | def __init__(self, 26 | cfg, 27 | dataset_name, 28 | remove_images_without_annotations, 29 | heatmap_generator, 30 | joints_generator, 31 | transforms=None): 32 | super().__init__(cfg.DATASET.ROOT, 33 | dataset_name, 34 | cfg.DATASET.DATA_FORMAT) 35 | 36 | if cfg.DATASET.WITH_CENTER: 37 | assert cfg.DATASET.NUM_JOINTS == 18, 'Number of joint with center for COCO is 18' 38 | else: 39 | assert cfg.DATASET.NUM_JOINTS == 17, 'Number of joint for COCO is 17' 40 | 41 | self.num_scales = self._init_check(heatmap_generator, joints_generator) 42 | 43 | self.num_joints = cfg.DATASET.NUM_JOINTS 44 | self.with_center = cfg.DATASET.WITH_CENTER 45 | self.num_joints_without_center = self.num_joints - 1 \ 46 | if self.with_center else self.num_joints 47 | self.scale_aware_sigma = cfg.DATASET.SCALE_AWARE_SIGMA 48 | self.base_sigma = cfg.DATASET.BASE_SIGMA 49 | self.base_size = cfg.DATASET.BASE_SIZE 50 | self.int_sigma = cfg.DATASET.INT_SIGMA 51 | 52 | if remove_images_without_annotations: 53 | self.ids = [ 54 | img_id 55 | for img_id in self.ids 56 | if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 57 | ] 58 | 59 | self.transforms = transforms 60 | self.heatmap_generator = heatmap_generator 61 | self.joints_generator = joints_generator 62 | 63 | def __getitem__(self, idx): 64 | img, anno = super().__getitem__(idx) 65 | 66 | mask = self.get_mask(anno, idx) 67 | 68 | anno = [ 69 | obj for obj in anno 70 | if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0 71 | ] 72 | 73 | # TODO(bowen): to generate scale-aware sigma, modify `get_joints` to associate a sigma to each joint 74 | joints = self.get_joints(anno) 75 | 76 | mask_list = [mask.copy() for _ in range(self.num_scales)] 77 | joints_list = [joints.copy() for _ in range(self.num_scales)] 78 | target_list = list() 79 | 80 | if self.transforms: 81 | img, mask_list, joints_list = self.transforms( 82 | img, mask_list, joints_list 83 | ) 84 | 85 | for scale_id in range(self.num_scales): 86 | target_t = self.heatmap_generator[scale_id](joints_list[scale_id]) 87 | joints_t = self.joints_generator[scale_id](joints_list[scale_id]) 88 | 89 | target_list.append(target_t.astype(np.float32)) 90 | mask_list[scale_id] = mask_list[scale_id].astype(np.float32) 91 | joints_list[scale_id] = joints_t.astype(np.int32) 92 | 93 | return img, target_list, mask_list, joints_list 94 | 95 | def get_joints(self, anno): 96 | num_people = len(anno) 97 | 98 | if self.scale_aware_sigma: 99 | joints = np.zeros((num_people, self.num_joints, 4)) 100 | else: 101 | joints = np.zeros((num_people, self.num_joints, 3)) 102 | 103 | for i, obj in enumerate(anno): 104 | joints[i, :self.num_joints_without_center, :3] = \ 105 | np.array(obj['keypoints']).reshape([-1, 3]) 106 | if self.with_center: 107 | joints_sum = np.sum(joints[i, :-1, :2], axis=0) 108 | num_vis_joints = len(np.nonzero(joints[i, :-1, 2])[0]) 109 | if num_vis_joints > 0: 110 | joints[i, -1, :2] = joints_sum / num_vis_joints 111 | joints[i, -1, 2] = 1 112 | if self.scale_aware_sigma: 113 | # get person box 114 | box = obj['bbox'] 115 | size = max(box[2], box[3]) 116 | sigma = size / self.base_size * self.base_sigma 117 | if self.int_sigma: 118 | sigma = int(np.round(sigma + 0.5)) 119 | assert sigma > 0, sigma 120 | joints[i, :, 3] = sigma 121 | 122 | return joints 123 | 124 | def get_mask(self, anno, idx): 125 | coco = self.coco 126 | img_info = coco.loadImgs(self.ids[idx])[0] 127 | 128 | m = np.zeros((img_info['height'], img_info['width'])) 129 | 130 | for obj in anno: 131 | if obj['iscrowd']: 132 | rle = pycocotools.mask.frPyObjects( 133 | obj['segmentation'], img_info['height'], img_info['width']) 134 | m += pycocotools.mask.decode(rle) 135 | elif obj['num_keypoints'] == 0: 136 | rles = pycocotools.mask.frPyObjects( 137 | obj['segmentation'], img_info['height'], img_info['width']) 138 | for rle in rles: 139 | m += pycocotools.mask.decode(rle) 140 | 141 | return m < 0.5 142 | 143 | def _init_check(self, heatmap_generator, joints_generator): 144 | assert isinstance(heatmap_generator, (list, tuple)), 'heatmap_generator should be a list or tuple' 145 | assert isinstance(joints_generator, (list, tuple)), 'joints_generator should be a list or tuple' 146 | assert len(heatmap_generator) == len(joints_generator), \ 147 | 'heatmap_generator and joints_generator should have same length,'\ 148 | 'got {} vs {}.'.format( 149 | len(heatmap_generator), len(joints_generator) 150 | ) 151 | return len(heatmap_generator) 152 | -------------------------------------------------------------------------------- /lib/dataset/CrowdPoseDataset.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bowen Cheng (bcheng9@illinois.edu) and Bin Xiao (leoxiaobin@gmail.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from collections import defaultdict 12 | from collections import OrderedDict 13 | import logging 14 | import os 15 | import os.path 16 | 17 | import cv2 18 | import json_tricks as json 19 | import numpy as np 20 | from torch.utils.data import Dataset 21 | 22 | from crowdposetools.cocoeval import COCOeval 23 | from utils import zipreader 24 | 25 | logger = logging.getLogger(__name__) 26 | 27 | 28 | class CrowdPoseDataset(Dataset): 29 | """`CrowdPose`_ Dataset. 30 | 31 | Args: 32 | root (string): Root directory where dataset is located to. 33 | dataset (string): Dataset name(train2017, val2017, test2017). 34 | data_format(string): Data format for reading('jpg', 'zip') 35 | transform (callable, optional): A function/transform that takes in an opencv image 36 | and returns a transformed version. E.g, ``transforms.ToTensor`` 37 | target_transform (callable, optional): A function/transform that takes in the 38 | target and transforms it. 39 | """ 40 | 41 | def __init__(self, root, dataset, data_format, transform=None, 42 | target_transform=None): 43 | from crowdposetools.coco import COCO 44 | self.name = 'CROWDPOSE' 45 | self.root = root 46 | self.dataset = dataset 47 | self.data_format = data_format 48 | self.coco = COCO(self._get_anno_file_name()) 49 | self.ids = list(self.coco.imgs.keys()) 50 | self.transform = transform 51 | self.target_transform = target_transform 52 | 53 | cats = [cat['name'] 54 | for cat in self.coco.loadCats(self.coco.getCatIds())] 55 | self.classes = ['__background__'] + cats 56 | logger.info('=> classes: {}'.format(self.classes)) 57 | self.num_classes = len(self.classes) 58 | self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) 59 | self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) 60 | self._coco_ind_to_class_ind = dict( 61 | [ 62 | (self._class_to_coco_ind[cls], self._class_to_ind[cls]) 63 | for cls in self.classes[1:] 64 | ] 65 | ) 66 | 67 | def _get_anno_file_name(self): 68 | # example: root/json/crowdpose_{train,val,test}.json 69 | return os.path.join( 70 | self.root, 71 | 'json', 72 | 'crowdpose_{}.json'.format( 73 | self.dataset 74 | ) 75 | ) 76 | 77 | def _get_image_path(self, file_name): 78 | images_dir = os.path.join(self.root, 'images') 79 | if self.data_format == 'zip': 80 | return images_dir + '.zip@' + file_name 81 | else: 82 | return os.path.join(images_dir, file_name) 83 | 84 | def __getitem__(self, index): 85 | """ 86 | Args: 87 | index (int): Index 88 | 89 | Returns: 90 | tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``. 91 | """ 92 | coco = self.coco 93 | img_id = self.ids[index] 94 | ann_ids = coco.getAnnIds(imgIds=img_id) 95 | target = coco.loadAnns(ann_ids) 96 | 97 | file_name = coco.loadImgs(img_id)[0]['file_name'] 98 | 99 | if self.data_format == 'zip': 100 | img = zipreader.imread( 101 | self._get_image_path(file_name), 102 | cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION 103 | ) 104 | else: 105 | img = cv2.imread( 106 | self._get_image_path(file_name), 107 | cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION 108 | ) 109 | 110 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 111 | 112 | if self.transform is not None: 113 | img = self.transform(img) 114 | 115 | if self.target_transform is not None: 116 | target = self.target_transform(target) 117 | 118 | return img, target 119 | 120 | def __len__(self): 121 | return len(self.ids) 122 | 123 | def __repr__(self): 124 | fmt_str = 'Dataset ' + self.__class__.__name__ + '\n' 125 | fmt_str += ' Number of datapoints: {}\n'.format(self.__len__()) 126 | fmt_str += ' Root Location: {}\n'.format(self.root) 127 | tmp = ' Transforms (if any): ' 128 | fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) 129 | tmp = ' Target Transforms (if any): ' 130 | fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) 131 | return fmt_str 132 | 133 | def processKeypoints(self, keypoints): 134 | tmp = keypoints.copy() 135 | if keypoints[:, 2].max() > 0: 136 | p = keypoints[keypoints[:, 2] > 0][:, :2].mean(axis=0) 137 | num_keypoints = keypoints.shape[0] 138 | for i in range(num_keypoints): 139 | tmp[i][0:3] = [ 140 | float(keypoints[i][0]), 141 | float(keypoints[i][1]), 142 | float(keypoints[i][2]) 143 | ] 144 | 145 | return tmp 146 | 147 | def evaluate(self, cfg, preds, scores, output_dir, 148 | *args, **kwargs): 149 | ''' 150 | Perform evaluation on COCO keypoint task 151 | :param cfg: cfg dictionary 152 | :param preds: prediction 153 | :param output_dir: output directory 154 | :param args: 155 | :param kwargs: 156 | :return: 157 | ''' 158 | res_folder = os.path.join(output_dir, 'results') 159 | if not os.path.exists(res_folder): 160 | os.makedirs(res_folder) 161 | res_file = os.path.join( 162 | res_folder, 'keypoints_%s_results.json' % self.dataset) 163 | 164 | # preds is a list of: image x person x (keypoints) 165 | # keypoints: num_joints * 4 (x, y, score, tag) 166 | kpts = defaultdict(list) 167 | for idx, _kpts in enumerate(preds): 168 | img_id = self.ids[idx] 169 | file_name = self.coco.loadImgs(img_id)[0]['file_name'] 170 | for idx_kpt, kpt in enumerate(_kpts): 171 | area = (np.max(kpt[:, 0]) - np.min(kpt[:, 0])) * (np.max(kpt[:, 1]) - np.min(kpt[:, 1])) 172 | kpt = self.processKeypoints(kpt) 173 | # if self.with_center: 174 | if cfg.DATASET.WITH_CENTER and not cfg.TEST.IGNORE_CENTER: 175 | kpt = kpt[:-1] 176 | 177 | kpts[int(file_name.split('.')[0])].append( 178 | { 179 | 'keypoints': kpt[:, 0:3], 180 | 'score': scores[idx][idx_kpt], 181 | 'tags': kpt[:, 3], 182 | 'image': int(file_name.split('.')[0]), 183 | 'area': area 184 | } 185 | ) 186 | 187 | # rescoring and oks nms 188 | oks_nmsed_kpts = [] 189 | # image x person x (keypoints) 190 | for img in kpts.keys(): 191 | # person x (keypoints) 192 | img_kpts = kpts[img] 193 | # person x (keypoints) 194 | # do not use nms, keep all detections 195 | keep = [] 196 | if len(keep) == 0: 197 | oks_nmsed_kpts.append(img_kpts) 198 | else: 199 | oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) 200 | 201 | self._write_coco_keypoint_results( 202 | oks_nmsed_kpts, res_file 203 | ) 204 | 205 | # CrowdPose `test` set has annotation. 206 | info_str = self._do_python_keypoint_eval( 207 | res_file, res_folder 208 | ) 209 | name_value = OrderedDict(info_str) 210 | return name_value, name_value['AP'] 211 | 212 | def _write_coco_keypoint_results(self, keypoints, res_file): 213 | data_pack = [ 214 | { 215 | 'cat_id': self._class_to_coco_ind[cls], 216 | 'cls_ind': cls_ind, 217 | 'cls': cls, 218 | 'ann_type': 'keypoints', 219 | 'keypoints': keypoints 220 | } 221 | for cls_ind, cls in enumerate(self.classes) if not cls == '__background__' 222 | ] 223 | 224 | results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) 225 | logger.info('=> Writing results json to %s' % res_file) 226 | with open(res_file, 'w') as f: 227 | json.dump(results, f, sort_keys=True, indent=4) 228 | try: 229 | json.load(open(res_file)) 230 | except Exception: 231 | content = [] 232 | with open(res_file, 'r') as f: 233 | for line in f: 234 | content.append(line) 235 | content[-1] = ']' 236 | with open(res_file, 'w') as f: 237 | for c in content: 238 | f.write(c) 239 | 240 | def _coco_keypoint_results_one_category_kernel(self, data_pack): 241 | cat_id = data_pack['cat_id'] 242 | keypoints = data_pack['keypoints'] 243 | cat_results = [] 244 | num_joints = 14 245 | 246 | for img_kpts in keypoints: 247 | if len(img_kpts) == 0: 248 | continue 249 | 250 | _key_points = np.array( 251 | [img_kpts[k]['keypoints'] for k in range(len(img_kpts))] 252 | ) 253 | key_points = np.zeros( 254 | (_key_points.shape[0], num_joints * 3), 255 | dtype=np.float 256 | ) 257 | 258 | for ipt in range(num_joints): 259 | key_points[:, ipt * 3 + 0] = _key_points[:, ipt, 0] 260 | key_points[:, ipt * 3 + 1] = _key_points[:, ipt, 1] 261 | key_points[:, ipt * 3 + 2] = _key_points[:, ipt, 2] # keypoints score. 262 | 263 | for k in range(len(img_kpts)): 264 | kpt = key_points[k].reshape((num_joints, 3)) 265 | left_top = np.amin(kpt, axis=0) 266 | right_bottom = np.amax(kpt, axis=0) 267 | 268 | w = right_bottom[0] - left_top[0] 269 | h = right_bottom[1] - left_top[1] 270 | 271 | cat_results.append({ 272 | 'image_id': img_kpts[k]['image'], 273 | 'category_id': cat_id, 274 | 'keypoints': list(key_points[k]), 275 | 'score': img_kpts[k]['score'], 276 | 'bbox': list([left_top[0], left_top[1], w, h]) 277 | }) 278 | 279 | return cat_results 280 | 281 | def _do_python_keypoint_eval(self, res_file, res_folder): 282 | coco_dt = self.coco.loadRes(res_file) 283 | coco_eval = COCOeval(self.coco, coco_dt, 'keypoints') 284 | coco_eval.params.useSegm = None 285 | coco_eval.evaluate() 286 | coco_eval.accumulate() 287 | coco_eval.summarize() 288 | stats_names = ['AP', 'Ap .5', 'AP .75', 'AR', 'AR .5', 'AR .75', 'AP (easy)', 'AP (medium)', 'AP (hard)'] 289 | stats_index = [0, 1, 2, 5, 6, 7, 8, 9, 10] 290 | 291 | info_str = [] 292 | for ind, name in enumerate(stats_names): 293 | info_str.append((name, coco_eval.stats[stats_index[ind]])) 294 | # info_str.append(coco_eval.stats[ind]) 295 | 296 | return info_str 297 | -------------------------------------------------------------------------------- /lib/dataset/CrowdPoseKeypoints.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bowen Cheng (bcheng9@illinois.edu) and Bin Xiao (leoxiaobin@gmail.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import logging 12 | 13 | import numpy as np 14 | 15 | #import crowdposetools 16 | #from .CrowdPoseDataset import CrowdPoseDataset 17 | from .target_generators import HeatmapGenerator 18 | 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | class CrowdPoseKeypoints(CrowdPoseDataset): 24 | def __init__(self, 25 | cfg, 26 | dataset_name, 27 | remove_images_without_annotations, 28 | heatmap_generator, 29 | joints_generator, 30 | transforms=None): 31 | super().__init__(cfg.DATASET.ROOT, 32 | dataset_name, 33 | cfg.DATASET.DATA_FORMAT) 34 | 35 | if cfg.DATASET.WITH_CENTER: 36 | assert cfg.DATASET.NUM_JOINTS == 15, 'Number of joint with center for CrowdPose is 15' 37 | else: 38 | assert cfg.DATASET.NUM_JOINTS == 14, 'Number of joint for CrowdPose is 14' 39 | 40 | self.num_scales = self._init_check(heatmap_generator, joints_generator) 41 | 42 | self.num_joints = cfg.DATASET.NUM_JOINTS 43 | self.with_center = cfg.DATASET.WITH_CENTER 44 | self.num_joints_without_center = self.num_joints - 1 \ 45 | if self.with_center else self.num_joints 46 | self.scale_aware_sigma = cfg.DATASET.SCALE_AWARE_SIGMA 47 | self.base_sigma = cfg.DATASET.BASE_SIGMA 48 | self.base_size = cfg.DATASET.BASE_SIZE 49 | self.int_sigma = cfg.DATASET.INT_SIGMA 50 | 51 | if remove_images_without_annotations: 52 | self.ids = [ 53 | img_id 54 | for img_id in self.ids 55 | if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 56 | ] 57 | 58 | self.transforms = transforms 59 | self.heatmap_generator = heatmap_generator 60 | self.joints_generator = joints_generator 61 | 62 | def __getitem__(self, idx): 63 | img, anno = super().__getitem__(idx) 64 | 65 | mask = self.get_mask(anno, idx) 66 | 67 | anno = [ 68 | obj for obj in anno 69 | if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0 70 | ] 71 | 72 | # TODO(bowen): to generate scale-aware sigma, modify `get_joints` to associate a sigma to each joint 73 | joints = self.get_joints(anno) 74 | 75 | mask_list = [mask.copy() for _ in range(self.num_scales)] 76 | joints_list = [joints.copy() for _ in range(self.num_scales)] 77 | target_list = list() 78 | 79 | if self.transforms: 80 | img, mask_list, joints_list = self.transforms( 81 | img, mask_list, joints_list 82 | ) 83 | 84 | for scale_id in range(self.num_scales): 85 | target_t = self.heatmap_generator[scale_id](joints_list[scale_id]) 86 | joints_t = self.joints_generator[scale_id](joints_list[scale_id]) 87 | 88 | target_list.append(target_t.astype(np.float32)) 89 | mask_list[scale_id] = mask_list[scale_id].astype(np.float32) 90 | joints_list[scale_id] = joints_t.astype(np.int32) 91 | 92 | return img, target_list, mask_list, joints_list 93 | 94 | def get_joints(self, anno): 95 | num_people = len(anno) 96 | 97 | if self.scale_aware_sigma: 98 | joints = np.zeros((num_people, self.num_joints, 4)) 99 | else: 100 | joints = np.zeros((num_people, self.num_joints, 3)) 101 | 102 | for i, obj in enumerate(anno): 103 | joints[i, :self.num_joints_without_center, :3] = \ 104 | np.array(obj['keypoints']).reshape([-1, 3]) 105 | if self.with_center: 106 | joints_sum = np.sum(joints[i, :-1, :2], axis=0) 107 | num_vis_joints = len(np.nonzero(joints[i, :-1, 2])[0]) 108 | if num_vis_joints > 0: 109 | joints[i, -1, :2] = joints_sum / num_vis_joints 110 | joints[i, -1, 2] = 1 111 | if self.scale_aware_sigma: 112 | # get person box 113 | box = obj['bbox'] 114 | size = max(box[2], box[3]) 115 | sigma = size / self.base_size * self.base_sigma 116 | if self.int_sigma: 117 | sigma = int(np.round(sigma + 0.5)) 118 | assert sigma > 0, sigma 119 | joints[i, :, 3] = sigma 120 | 121 | return joints 122 | 123 | def get_mask(self, anno, idx): 124 | coco = self.coco 125 | img_info = coco.loadImgs(self.ids[idx])[0] 126 | 127 | m = np.zeros((img_info['height'], img_info['width'])) 128 | 129 | return m < 0.5 130 | 131 | def _init_check(self, heatmap_generator, joints_generator): 132 | assert isinstance(heatmap_generator, (list, tuple)), 'heatmap_generator should be a list or tuple' 133 | assert isinstance(joints_generator, (list, tuple)), 'joints_generator should be a list or tuple' 134 | assert len(heatmap_generator) == len(joints_generator), \ 135 | 'heatmap_generator and joints_generator should have same length,'\ 136 | 'got {} vs {}.'.format( 137 | len(heatmap_generator), len(joints_generator) 138 | ) 139 | return len(heatmap_generator) 140 | -------------------------------------------------------------------------------- /lib/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from .COCOKeypoints import CocoKeypoints as coco 8 | #from .CrowdPoseKeypoints import CrowdPoseKeypoints as crowd_pose 9 | from .build import make_dataloader 10 | from .build import make_test_dataloader 11 | 12 | # dataset dependent configuration for visualization 13 | coco_part_labels = [ 14 | 'nose', 'eye_l', 'eye_r', 'ear_l', 'ear_r', 15 | 'sho_l', 'sho_r', 'elb_l', 'elb_r', 'wri_l', 'wri_r', 16 | 'hip_l', 'hip_r', 'kne_l', 'kne_r', 'ank_l', 'ank_r' 17 | ] 18 | coco_part_idx = { 19 | b: a for a, b in enumerate(coco_part_labels) 20 | } 21 | coco_part_orders = [ 22 | ('nose', 'eye_l'), ('eye_l', 'eye_r'), ('eye_r', 'nose'), 23 | ('eye_l', 'ear_l'), ('eye_r', 'ear_r'), ('ear_l', 'sho_l'), 24 | ('ear_r', 'sho_r'), ('sho_l', 'sho_r'), ('sho_l', 'hip_l'), 25 | ('sho_r', 'hip_r'), ('hip_l', 'hip_r'), ('sho_l', 'elb_l'), 26 | ('elb_l', 'wri_l'), ('sho_r', 'elb_r'), ('elb_r', 'wri_r'), 27 | ('hip_l', 'kne_l'), ('kne_l', 'ank_l'), ('hip_r', 'kne_r'), 28 | ('kne_r', 'ank_r') 29 | ] 30 | ''' 31 | crowd_pose_part_labels = [ 32 | 'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow', 33 | 'left_wrist', 'right_wrist', 'left_hip', 'right_hip', 34 | 'left_knee', 'right_knee', 'left_ankle', 'right_ankle', 35 | 'head', 'neck' 36 | ] 37 | crowd_pose_part_idx = { 38 | b: a for a, b in enumerate(crowd_pose_part_labels) 39 | } 40 | crowd_pose_part_orders = [ 41 | ('head', 'neck'), ('neck', 'left_shoulder'), ('neck', 'right_shoulder'), 42 | ('left_shoulder', 'right_shoulder'), ('left_shoulder', 'left_hip'), 43 | ('right_shoulder', 'right_hip'), ('left_hip', 'right_hip'), ('left_shoulder', 'left_elbow'), 44 | ('left_elbow', 'left_wrist'), ('right_shoulder', 'right_elbow'), ('right_elbow', 'right_wrist'), 45 | ('left_hip', 'left_knee'), ('left_knee', 'left_ankle'), ('right_hip', 'right_knee'), 46 | ('right_knee', 'right_ankle') 47 | ] 48 | ''' 49 | VIS_CONFIG = { 50 | 'COCO': { 51 | 'part_labels': coco_part_labels, 52 | 'part_idx': coco_part_idx, 53 | 'part_orders': coco_part_orders 54 | #}, 55 | #'CROWDPOSE': { 56 | # 'part_labels': crowd_pose_part_labels, 57 | # 'part_idx': crowd_pose_part_idx, 58 | # 'part_orders': crowd_pose_part_orders 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /lib/dataset/__pycache__/COCODataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/__pycache__/COCODataset.cpython-36.pyc -------------------------------------------------------------------------------- /lib/dataset/__pycache__/COCOKeypoints.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/__pycache__/COCOKeypoints.cpython-36.pyc -------------------------------------------------------------------------------- /lib/dataset/__pycache__/CrowdPoseDataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/__pycache__/CrowdPoseDataset.cpython-36.pyc -------------------------------------------------------------------------------- /lib/dataset/__pycache__/CrowdPoseKeypoints.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/__pycache__/CrowdPoseKeypoints.cpython-36.pyc -------------------------------------------------------------------------------- /lib/dataset/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/dataset/__pycache__/build.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/__pycache__/build.cpython-36.pyc -------------------------------------------------------------------------------- /lib/dataset/build.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # Modified by Bowen Cheng (bcheng9@illinois.edu) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import torch.utils.data 13 | 14 | from .COCODataset import CocoDataset as coco 15 | from .COCOKeypoints import CocoKeypoints as coco_kpt 16 | #from .CrowdPoseDataset import CrowdPoseDataset as crowd_pose 17 | #from .CrowdPoseKeypoints import CrowdPoseKeypoints as crowd_pose_kpt 18 | from .transforms import build_transforms 19 | from .target_generators import HeatmapGenerator 20 | from .target_generators import ScaleAwareHeatmapGenerator 21 | from .target_generators import JointsGenerator 22 | 23 | 24 | def build_dataset(cfg, is_train): 25 | transforms = build_transforms(cfg, is_train) 26 | 27 | if cfg.DATASET.SCALE_AWARE_SIGMA: 28 | _HeatmapGenerator = ScaleAwareHeatmapGenerator 29 | else: 30 | _HeatmapGenerator = HeatmapGenerator 31 | 32 | heatmap_generator = [ 33 | _HeatmapGenerator( 34 | output_size, cfg.DATASET.NUM_JOINTS, cfg.DATASET.SIGMA 35 | ) for output_size in cfg.DATASET.OUTPUT_SIZE 36 | ] 37 | joints_generator = [ 38 | JointsGenerator( 39 | cfg.DATASET.MAX_NUM_PEOPLE, 40 | cfg.DATASET.NUM_JOINTS, 41 | output_size, 42 | cfg.MODEL.TAG_PER_JOINT 43 | ) for output_size in cfg.DATASET.OUTPUT_SIZE 44 | ] 45 | 46 | dataset_name = cfg.DATASET.TRAIN if is_train else cfg.DATASET.TEST 47 | 48 | dataset = eval(cfg.DATASET.DATASET)( 49 | cfg, 50 | dataset_name, 51 | is_train, 52 | heatmap_generator, 53 | joints_generator, 54 | transforms 55 | ) 56 | 57 | return dataset 58 | 59 | 60 | def make_dataloader(cfg, is_train=True, distributed=False): 61 | if is_train: 62 | images_per_gpu = cfg.TRAIN.IMAGES_PER_GPU 63 | shuffle = True 64 | else: 65 | images_per_gpu = cfg.TEST.IMAGES_PER_GPU 66 | shuffle = False 67 | images_per_batch = images_per_gpu * len(cfg.GPUS) 68 | 69 | dataset = build_dataset(cfg, is_train) 70 | 71 | if is_train and distributed: 72 | train_sampler = torch.utils.data.distributed.DistributedSampler( 73 | dataset 74 | ) 75 | shuffle = False 76 | else: 77 | train_sampler = None 78 | 79 | data_loader = torch.utils.data.DataLoader( 80 | dataset, 81 | batch_size=images_per_batch, 82 | shuffle=shuffle, 83 | num_workers=cfg.WORKERS, 84 | pin_memory=cfg.PIN_MEMORY, 85 | sampler=train_sampler 86 | ) 87 | 88 | return data_loader 89 | 90 | 91 | def make_test_dataloader(cfg): 92 | transforms = None 93 | dataset = eval(cfg.DATASET.DATASET_TEST)( 94 | cfg.DATASET.ROOT, 95 | cfg.DATASET.TEST, 96 | cfg.DATASET.DATA_FORMAT, 97 | transforms 98 | ) 99 | 100 | data_loader = torch.utils.data.DataLoader( 101 | dataset, 102 | batch_size=1, 103 | shuffle=False, 104 | num_workers=0, 105 | pin_memory=False 106 | ) 107 | 108 | return data_loader, dataset 109 | -------------------------------------------------------------------------------- /lib/dataset/target_generators/__init__.py: -------------------------------------------------------------------------------- 1 | from .target_generators import HeatmapGenerator 2 | from .target_generators import ScaleAwareHeatmapGenerator 3 | from .target_generators import JointsGenerator 4 | 5 | __all__ = ['HeatmapGenerator', 'ScaleAwareHeatmapGenerator', 'JointsGenerator'] 6 | -------------------------------------------------------------------------------- /lib/dataset/target_generators/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/target_generators/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/dataset/target_generators/__pycache__/target_generators.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/target_generators/__pycache__/target_generators.cpython-36.pyc -------------------------------------------------------------------------------- /lib/dataset/target_generators/target_generators.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # Modified by Bowen Cheng (bcheng9@illinois.edu) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import numpy as np 13 | 14 | 15 | class HeatmapGenerator(): 16 | def __init__(self, output_res, num_joints, sigma=-1): 17 | self.output_res = output_res 18 | self.num_joints = num_joints 19 | if sigma < 0: 20 | sigma = self.output_res/64 21 | self.sigma = sigma 22 | size = 6*sigma + 3 23 | x = np.arange(0, size, 1, float) 24 | y = x[:, np.newaxis] 25 | x0, y0 = 3*sigma + 1, 3*sigma + 1 26 | self.g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) 27 | 28 | def __call__(self, joints): 29 | hms = np.zeros((self.num_joints, self.output_res, self.output_res), 30 | dtype=np.float32) 31 | sigma = self.sigma 32 | for p in joints: 33 | for idx, pt in enumerate(p): 34 | if pt[2] > 0: 35 | x, y = int(pt[0]), int(pt[1]) 36 | if x < 0 or y < 0 or \ 37 | x >= self.output_res or y >= self.output_res: 38 | continue 39 | 40 | ul = int(np.round(x - 3 * sigma - 1)), int(np.round(y - 3 * sigma - 1)) 41 | br = int(np.round(x + 3 * sigma + 2)), int(np.round(y + 3 * sigma + 2)) 42 | 43 | c, d = max(0, -ul[0]), min(br[0], self.output_res) - ul[0] 44 | a, b = max(0, -ul[1]), min(br[1], self.output_res) - ul[1] 45 | 46 | cc, dd = max(0, ul[0]), min(br[0], self.output_res) 47 | aa, bb = max(0, ul[1]), min(br[1], self.output_res) 48 | hms[idx, aa:bb, cc:dd] = np.maximum( 49 | hms[idx, aa:bb, cc:dd], self.g[a:b, c:d]) 50 | return hms 51 | 52 | 53 | class ScaleAwareHeatmapGenerator(): 54 | def __init__(self, output_res, num_joints): 55 | self.output_res = output_res 56 | self.num_joints = num_joints 57 | 58 | def get_gaussian_kernel(self, sigma): 59 | size = 6*sigma + 3 60 | x = np.arange(0, size, 1, float) 61 | y = x[:, np.newaxis] 62 | x0, y0 = 3*sigma + 1, 3*sigma + 1 63 | g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) 64 | return g 65 | 66 | def __call__(self, joints): 67 | hms = np.zeros((self.num_joints, self.output_res, self.output_res), 68 | dtype=np.float32) 69 | for p in joints: 70 | sigma = p[0, 3] 71 | g = self.get_gaussian_kernel(sigma) 72 | for idx, pt in enumerate(p): 73 | if pt[2] > 0: 74 | x, y = int(pt[0]), int(pt[1]) 75 | if x < 0 or y < 0 or \ 76 | x >= self.output_res or y >= self.output_res: 77 | continue 78 | 79 | ul = int(np.round(x - 3 * sigma - 1)), int(np.round(y - 3 * sigma - 1)) 80 | br = int(np.round(x + 3 * sigma + 2)), int(np.round(y + 3 * sigma + 2)) 81 | 82 | c, d = max(0, -ul[0]), min(br[0], self.output_res) - ul[0] 83 | a, b = max(0, -ul[1]), min(br[1], self.output_res) - ul[1] 84 | 85 | cc, dd = max(0, ul[0]), min(br[0], self.output_res) 86 | aa, bb = max(0, ul[1]), min(br[1], self.output_res) 87 | hms[idx, aa:bb, cc:dd] = np.maximum( 88 | hms[idx, aa:bb, cc:dd], g[a:b, c:d]) 89 | return hms 90 | 91 | 92 | class JointsGenerator(): 93 | def __init__(self, max_num_people, num_joints, output_res, tag_per_joint): 94 | self.max_num_people = max_num_people 95 | self.num_joints = num_joints 96 | self.output_res = output_res 97 | self.tag_per_joint = tag_per_joint 98 | 99 | def __call__(self, joints): 100 | visible_nodes = np.zeros((self.max_num_people, self.num_joints, 2)) 101 | output_res = self.output_res 102 | for i in range(len(joints)): 103 | tot = 0 104 | for idx, pt in enumerate(joints[i]): 105 | x, y = int(pt[0]), int(pt[1]) 106 | if pt[2] > 0 and x >= 0 and y >= 0 \ 107 | and x < self.output_res and y < self.output_res: 108 | if self.tag_per_joint: 109 | visible_nodes[i][tot] = \ 110 | (idx * output_res**2 + y * output_res + x, 1) 111 | else: 112 | visible_nodes[i][tot] = \ 113 | (y * output_res + x, 1) 114 | tot += 1 115 | return visible_nodes 116 | -------------------------------------------------------------------------------- /lib/dataset/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from .transforms import Compose 2 | from .transforms import RandomAffineTransform 3 | from .transforms import ToTensor 4 | from .transforms import Normalize 5 | from .transforms import RandomHorizontalFlip 6 | 7 | from .build import build_transforms 8 | from .build import FLIP_CONFIG 9 | -------------------------------------------------------------------------------- /lib/dataset/transforms/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/transforms/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/dataset/transforms/__pycache__/build.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/transforms/__pycache__/build.cpython-36.pyc -------------------------------------------------------------------------------- /lib/dataset/transforms/__pycache__/transforms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/dataset/transforms/__pycache__/transforms.cpython-36.pyc -------------------------------------------------------------------------------- /lib/dataset/transforms/build.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # Modified by Bowen Cheng (bcheng9@illinois.edu) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | from . import transforms as T 13 | 14 | 15 | FLIP_CONFIG = { 16 | 'COCO': [ 17 | 0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15 18 | ], 19 | 'COCO_WITH_CENTER': [ 20 | 0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 17 21 | ], 22 | 'CROWDPOSE': [ 23 | 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 13 24 | ], 25 | 'CROWDPOSE_WITH_CENTER': [ 26 | 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 13, 14 27 | ] 28 | } 29 | 30 | 31 | def build_transforms(cfg, is_train=True): 32 | assert is_train is True, 'Please only use build_transforms for training.' 33 | assert isinstance(cfg.DATASET.OUTPUT_SIZE, (list, tuple)), 'DATASET.OUTPUT_SIZE should be list or tuple' 34 | if is_train: 35 | max_rotation = cfg.DATASET.MAX_ROTATION 36 | min_scale = cfg.DATASET.MIN_SCALE 37 | max_scale = cfg.DATASET.MAX_SCALE 38 | max_translate = cfg.DATASET.MAX_TRANSLATE 39 | input_size = cfg.DATASET.INPUT_SIZE 40 | output_size = cfg.DATASET.OUTPUT_SIZE 41 | flip = cfg.DATASET.FLIP 42 | scale_type = cfg.DATASET.SCALE_TYPE 43 | else: 44 | scale_type = cfg.DATASET.SCALE_TYPE 45 | max_rotation = 0 46 | min_scale = 1 47 | max_scale = 1 48 | max_translate = 0 49 | input_size = 512 50 | output_size = [128] 51 | flip = 0 52 | 53 | # coco_flip_index = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] 54 | # if cfg.DATASET.WITH_CENTER: 55 | # coco_flip_index.append(17) 56 | if 'coco' in cfg.DATASET.DATASET: 57 | dataset_name = 'COCO' 58 | elif 'crowd_pose' in cfg.DATASET.DATASET: 59 | dataset_name = 'CROWDPOSE' 60 | else: 61 | raise ValueError('Please implement flip_index for new dataset: %s.' % cfg.DATASET.DATASET) 62 | if cfg.DATASET.WITH_CENTER: 63 | coco_flip_index = FLIP_CONFIG[dataset_name + '_WITH_CENTER'] 64 | else: 65 | coco_flip_index = FLIP_CONFIG[dataset_name] 66 | 67 | transforms = T.Compose( 68 | [ 69 | T.RandomAffineTransform( 70 | input_size, 71 | output_size, 72 | max_rotation, 73 | min_scale, 74 | max_scale, 75 | scale_type, 76 | max_translate, 77 | scale_aware_sigma=cfg.DATASET.SCALE_AWARE_SIGMA 78 | ), 79 | T.RandomHorizontalFlip(coco_flip_index, output_size, flip), 80 | T.ToTensor(), 81 | T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 82 | ] 83 | ) 84 | 85 | return transforms 86 | -------------------------------------------------------------------------------- /lib/dataset/transforms/transforms.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # Modified by Bowen Cheng (bcheng9@illinois.edu) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import random 13 | 14 | import cv2 15 | import numpy as np 16 | import torch 17 | import torchvision 18 | from torchvision.transforms import functional as F 19 | 20 | 21 | class Compose(object): 22 | def __init__(self, transforms): 23 | self.transforms = transforms 24 | 25 | def __call__(self, image, mask, joints): 26 | for t in self.transforms: 27 | image, mask, joints = t(image, mask, joints) 28 | return image, mask, joints 29 | 30 | def __repr__(self): 31 | format_string = self.__class__.__name__ + "(" 32 | for t in self.transforms: 33 | format_string += "\n" 34 | format_string += " {0}".format(t) 35 | format_string += "\n)" 36 | return format_string 37 | 38 | 39 | class ToTensor(object): 40 | def __call__(self, image, mask, joints): 41 | return F.to_tensor(image), mask, joints 42 | 43 | 44 | class Normalize(object): 45 | def __init__(self, mean, std): 46 | self.mean = mean 47 | self.std = std 48 | 49 | def __call__(self, image, mask, joints): 50 | image = F.normalize(image, mean=self.mean, std=self.std) 51 | return image, mask, joints 52 | 53 | 54 | class RandomHorizontalFlip(object): 55 | def __init__(self, flip_index, output_size, prob=0.5): 56 | self.flip_index = flip_index 57 | self.prob = prob 58 | self.output_size = output_size if isinstance(output_size, list) \ 59 | else [output_size] 60 | 61 | def __call__(self, image, mask, joints): 62 | assert isinstance(mask, list) 63 | assert isinstance(joints, list) 64 | assert len(mask) == len(joints) 65 | assert len(mask) == len(self.output_size) 66 | 67 | if random.random() < self.prob: 68 | image = image[:, ::-1] - np.zeros_like(image) 69 | for i, _output_size in enumerate(self.output_size): 70 | mask[i] = mask[i][:, ::-1] - np.zeros_like(mask[i]) 71 | joints[i] = joints[i][:, self.flip_index] 72 | joints[i][:, :, 0] = _output_size - joints[i][:, :, 0] - 1 73 | 74 | return image, mask, joints 75 | 76 | 77 | class RandomAffineTransform(object): 78 | def __init__(self, 79 | input_size, 80 | output_size, 81 | max_rotation, 82 | min_scale, 83 | max_scale, 84 | scale_type, 85 | max_translate, 86 | scale_aware_sigma=False): 87 | self.input_size = input_size 88 | self.output_size = output_size if isinstance(output_size, list) \ 89 | else [output_size] 90 | 91 | self.max_rotation = max_rotation 92 | self.min_scale = min_scale 93 | self.max_scale = max_scale 94 | self.scale_type = scale_type 95 | self.max_translate = max_translate 96 | self.scale_aware_sigma = scale_aware_sigma 97 | 98 | def _get_affine_matrix(self, center, scale, res, rot=0): 99 | # Generate transformation matrix 100 | h = 200 * scale 101 | t = np.zeros((3, 3)) 102 | t[0, 0] = float(res[1]) / h 103 | t[1, 1] = float(res[0]) / h 104 | t[0, 2] = res[1] * (-float(center[0]) / h + .5) 105 | t[1, 2] = res[0] * (-float(center[1]) / h + .5) 106 | t[2, 2] = 1 107 | if not rot == 0: 108 | rot = -rot # To match direction of rotation from cropping 109 | rot_mat = np.zeros((3, 3)) 110 | rot_rad = rot * np.pi / 180 111 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 112 | rot_mat[0, :2] = [cs, -sn] 113 | rot_mat[1, :2] = [sn, cs] 114 | rot_mat[2, 2] = 1 115 | # Need to rotate around center 116 | t_mat = np.eye(3) 117 | t_mat[0, 2] = -res[1]/2 118 | t_mat[1, 2] = -res[0]/2 119 | t_inv = t_mat.copy() 120 | t_inv[:2, 2] *= -1 121 | t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t))) 122 | return t 123 | 124 | def _affine_joints(self, joints, mat): 125 | joints = np.array(joints) 126 | shape = joints.shape 127 | joints = joints.reshape(-1, 2) 128 | return np.dot(np.concatenate( 129 | (joints, joints[:, 0:1]*0+1), axis=1), mat.T).reshape(shape) 130 | 131 | def __call__(self, image, mask, joints): 132 | assert isinstance(mask, list) 133 | assert isinstance(joints, list) 134 | assert len(mask) == len(joints) 135 | assert len(mask) == len(self.output_size) 136 | 137 | height, width = image.shape[:2] 138 | 139 | center = np.array((width/2, height/2)) 140 | if self.scale_type == 'long': 141 | scale = max(height, width)/200 142 | elif self.scale_type == 'short': 143 | scale = min(height, width)/200 144 | else: 145 | raise ValueError('Unkonw scale type: {}'.format(self.scale_type)) 146 | aug_scale = np.random.random() * (self.max_scale - self.min_scale) \ 147 | + self.min_scale 148 | scale *= aug_scale 149 | aug_rot = (np.random.random() * 2 - 1) * self.max_rotation 150 | 151 | if self.max_translate > 0: 152 | dx = np.random.randint( 153 | -self.max_translate*scale, self.max_translate*scale) 154 | dy = np.random.randint( 155 | -self.max_translate*scale, self.max_translate*scale) 156 | center[0] += dx 157 | center[1] += dy 158 | 159 | for i, _output_size in enumerate(self.output_size): 160 | mat_output = self._get_affine_matrix( 161 | center, scale, (_output_size, _output_size), aug_rot 162 | )[:2] 163 | mask[i] = cv2.warpAffine( 164 | (mask[i]*255).astype(np.uint8), mat_output, 165 | (_output_size, _output_size) 166 | ) / 255 167 | mask[i] = (mask[i] > 0.5).astype(np.float32) 168 | 169 | joints[i][:, :, 0:2] = self._affine_joints( 170 | joints[i][:, :, 0:2], mat_output 171 | ) 172 | if self.scale_aware_sigma: 173 | joints[i][:, :, 3] = joints[i][:, :, 3] / aug_scale 174 | 175 | mat_input = self._get_affine_matrix( 176 | center, scale, (self.input_size, self.input_size), aug_rot 177 | )[:2] 178 | image = cv2.warpAffine( 179 | image, mat_input, (self.input_size, self.input_size) 180 | ) 181 | 182 | return image, mask, joints 183 | -------------------------------------------------------------------------------- /lib/fp16_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/fp16_utils/__init__.py -------------------------------------------------------------------------------- /lib/fp16_utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/fp16_utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/fp16_utils/__pycache__/fp16_optimizer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/fp16_utils/__pycache__/fp16_optimizer.cpython-36.pyc -------------------------------------------------------------------------------- /lib/fp16_utils/__pycache__/fp16util.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/fp16_utils/__pycache__/fp16util.cpython-36.pyc -------------------------------------------------------------------------------- /lib/fp16_utils/__pycache__/loss_scaler.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/fp16_utils/__pycache__/loss_scaler.cpython-36.pyc -------------------------------------------------------------------------------- /lib/fp16_utils/fp16util.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Based on: 3 | # apex 4 | # Copyright (c) NVIDIA 5 | # Licence under The BSD 3-Clause "New" or "Revised" License 6 | # https://github.com/NVIDIA/apex 7 | # All rights reserved. 8 | # 9 | # Redistribution and use in source and binary forms, with or without modification, are permitted provided that the 10 | # following conditions are met: 11 | # 12 | # 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following 13 | # disclaimer. 14 | # 15 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following 16 | # disclaimer in the documentation and/or other materials provided with the distribution. 17 | # 18 | # 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote 19 | # products derived from this software without specific prior written permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 22 | # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 26 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # ------------------------------------------------------------------------------ 29 | # Copyright (c) Microsoft 30 | # Licensed under the MIT License. 31 | # Modified by Bowen Cheng 32 | # ------------------------------------------------------------------------------ 33 | 34 | import torch 35 | import torch.nn as nn 36 | from torch.autograd import Variable 37 | from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors 38 | 39 | 40 | class tofp16(nn.Module): 41 | """ 42 | Model wrapper that implements:: 43 | def forward(self, input): 44 | return input.half() 45 | """ 46 | 47 | def __init__(self): 48 | super(tofp16, self).__init__() 49 | 50 | def forward(self, input): 51 | return input.half() 52 | 53 | 54 | class tofp32(nn.Module): 55 | """ 56 | Model wrapper that implements:: 57 | def forward(self, input): 58 | return input.half() 59 | """ 60 | 61 | def __init__(self): 62 | super(tofp32, self).__init__() 63 | 64 | def forward(self, input): 65 | if isinstance(input, list): 66 | return list(map(lambda x: x.float(), input)) 67 | else: 68 | return input.float() 69 | 70 | 71 | def BN_convert_float(module): 72 | ''' 73 | Designed to work with network_to_half. 74 | BatchNorm layers need parameters in single precision. 75 | Find all layers and convert them back to float. This can't 76 | be done with built in .apply as that function will apply 77 | fn to all modules, parameters, and buffers. Thus we wouldn't 78 | be able to guard the float conversion based on the module type. 79 | ''' 80 | if isinstance(module, torch.nn.modules.batchnorm._BatchNorm): 81 | module.float() 82 | for child in module.children(): 83 | BN_convert_float(child) 84 | return module 85 | 86 | 87 | def network_to_half(network): 88 | """ 89 | Convert model to half precision in a batchnorm-safe way. 90 | """ 91 | return nn.Sequential(tofp16(), BN_convert_float(network.half()), tofp32()) 92 | 93 | 94 | def backwards_debug_hook(grad): 95 | raise RuntimeError("master_params recieved a gradient in the backward pass!") 96 | 97 | 98 | def prep_param_lists(model, flat_master=False): 99 | """ 100 | Creates a list of FP32 master parameters for a given model, as in 101 | `Training Neural Networks with Mixed Precision: Real Examples`_. 102 | Args: 103 | model (torch.nn.Module): Existing Pytorch model 104 | flat_master (bool, optional, default=False): Flatten the master parameters into a single tensor, as a performance optimization. 105 | Returns: 106 | A tuple (``model_params``, ``master_params``). ``model_params`` is a list of the model's parameters for later use with :func:`model_grads_to_master_grads` and :func:`master_params_to_model_params`. ``master_params`` is a list of FP32 master gradients. If ``flat_master=True``, ``master_params`` will be a list with one element. 107 | Example:: 108 | model_params, master_params = prep_param_lists(model) 109 | .. warning:: 110 | Currently, if ``flat_master=True``, all the model's parameters must be the same type. If the model has parameters of different types, use ``flat_master=False``, or use :class:`FP16_Optimizer`. 111 | .. _`Training Neural Networks with Mixed Precision: Real Examples`: 112 | http://on-demand.gputechconf.com/gtc/2018/video/S81012/ 113 | """ 114 | model_params = [param for param in model.parameters() if param.requires_grad] 115 | 116 | if flat_master: 117 | # Give the user some more useful error messages 118 | try: 119 | # flatten_dense_tensors returns a contiguous flat array. 120 | # http://pytorch.org/docs/master/_modules/torch/_utils.html 121 | master_params = _flatten_dense_tensors([param.data for param in model_params]).float() 122 | except: 123 | print("Error in prep_param_lists: model may contain a mixture of parameters " 124 | "of different types. Use flat_master=False, or use F16_Optimizer.") 125 | raise 126 | master_params = torch.nn.Parameter(master_params) 127 | master_params.requires_grad = True 128 | # master_params.register_hook(backwards_debug_hook) 129 | if master_params.grad is None: 130 | master_params.grad = master_params.new(*master_params.size()) 131 | return model_params, [master_params] 132 | else: 133 | master_params = [param.clone().float().detach() for param in model_params] 134 | for param in master_params: 135 | param.requires_grad = True 136 | return model_params, master_params 137 | 138 | 139 | def model_grads_to_master_grads(model_params, master_params, flat_master=False): 140 | """ 141 | Copy model gradients to master gradients. 142 | Args: 143 | model_params: List of model parameters created by :func:`prep_param_lists`. 144 | master_params: List of FP32 master parameters created by :func:`prep_param_lists`. If ``master_params`` was created with ``flat_master=True``, ``flat_master=True`` should also be supplied to :func:`model_grads_to_master_grads`. 145 | """ 146 | if flat_master: 147 | # The flattening may incur one more deep copy than is necessary. 148 | master_params[0].grad.data.copy_( 149 | _flatten_dense_tensors([p.grad.data for p in model_params])) 150 | else: 151 | for model, master in zip(model_params, master_params): 152 | if model.grad is not None: 153 | if master.grad is None: 154 | master.grad = Variable(master.data.new(*master.data.size())) 155 | master.grad.data.copy_(model.grad.data) 156 | else: 157 | master.grad = None 158 | 159 | 160 | def master_params_to_model_params(model_params, master_params, flat_master=False): 161 | """ 162 | Copy master parameters to model parameters. 163 | Args: 164 | model_params: List of model parameters created by :func:`prep_param_lists`. 165 | master_params: List of FP32 master parameters created by :func:`prep_param_lists`. If ``master_params`` was created with ``flat_master=True``, ``flat_master=True`` should also be supplied to :func:`master_params_to_model_params`. 166 | """ 167 | if flat_master: 168 | for model, master in zip(model_params, 169 | _unflatten_dense_tensors(master_params[0].data, model_params)): 170 | model.data.copy_(master) 171 | else: 172 | for model, master in zip(model_params, master_params): 173 | model.data.copy_(master.data) 174 | 175 | 176 | # Backward compatibility fixes 177 | def to_python_float(t): 178 | if hasattr(t, 'item'): 179 | return t.item() 180 | else: 181 | return t[0] 182 | 183 | TORCH_MAJOR = int(torch.__version__.split('.')[0]) 184 | TORCH_MINOR = int(torch.__version__.split('.')[1]) 185 | if TORCH_MAJOR == 0 and TORCH_MINOR <= 4: 186 | clip_grad_norm = torch.nn.utils.clip_grad_norm 187 | else: 188 | clip_grad_norm = torch.nn.utils.clip_grad_norm_ 189 | -------------------------------------------------------------------------------- /lib/fp16_utils/loss_scaler.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Based on: 3 | # apex 4 | # Copyright (c) NVIDIA 5 | # Licence under The BSD 3-Clause "New" or "Revised" License 6 | # https://github.com/NVIDIA/apex 7 | # All rights reserved. 8 | # 9 | # Redistribution and use in source and binary forms, with or without modification, are permitted provided that the 10 | # following conditions are met: 11 | # 12 | # 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following 13 | # disclaimer. 14 | # 15 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following 16 | # disclaimer in the documentation and/or other materials provided with the distribution. 17 | # 18 | # 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote 19 | # products derived from this software without specific prior written permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 22 | # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 26 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | # ------------------------------------------------------------------------------ 29 | # Copyright (c) Microsoft 30 | # Licensed under the MIT License. 31 | # Modified by Bowen Cheng 32 | # ------------------------------------------------------------------------------ 33 | 34 | import torch 35 | 36 | 37 | # item() is a recent addition, so this helps with backward compatibility. 38 | def to_python_float(t): 39 | if hasattr(t, 'item'): 40 | return t.item() 41 | else: 42 | return t[0] 43 | 44 | 45 | class LossScaler: 46 | """ 47 | Class that manages a static loss scale. This class is intended to interact with 48 | :class:`FP16_Optimizer`, and should not be directly manipulated by the user. 49 | Use of :class:`LossScaler` is enabled via the ``static_loss_scale`` argument to 50 | :class:`FP16_Optimizer`'s constructor. 51 | Args: 52 | scale (float, optional, default=1.0): The loss scale. 53 | """ 54 | 55 | def __init__(self, scale=1): 56 | self.cur_scale = scale 57 | 58 | # `params` is a list / generator of torch.Variable 59 | def has_overflow(self, params): 60 | return False 61 | 62 | # `x` is a torch.Tensor 63 | def _has_inf_or_nan(x): 64 | return False 65 | 66 | def update_scale(self, overflow): 67 | pass 68 | 69 | @property 70 | def loss_scale(self): 71 | return self.cur_scale 72 | 73 | def scale_gradient(self, module, grad_in, grad_out): 74 | return tuple(self.loss_scale * g for g in grad_in) 75 | 76 | def backward(self, loss): 77 | scaled_loss = loss * self.loss_scale 78 | scaled_loss.backward() 79 | 80 | 81 | class DynamicLossScaler: 82 | """ 83 | Class that manages dynamic loss scaling. It is recommended to use :class:`DynamicLossScaler` 84 | indirectly, by supplying ``dynamic_loss_scale=True`` to the constructor of 85 | :class:`FP16_Optimizer`. However, it's important to understand how :class:`DynamicLossScaler` 86 | operates, because the default options can be changed using the 87 | the ``dynamic_loss_args`` argument to :class:`FP16_Optimizer`'s constructor. 88 | Loss scaling is designed to combat the problem of underflowing gradients encountered at long 89 | times when training fp16 networks. Dynamic loss scaling begins by attempting a very high loss 90 | scale. Ironically, this may result in OVERflowing gradients. If overflowing gradients are 91 | encountered, :class:`DynamicLossScaler` informs :class:`FP16_Optimizer` that an overflow has 92 | occurred. 93 | :class:`FP16_Optimizer` then skips the update step for this particular iteration/minibatch, 94 | and :class:`DynamicLossScaler` adjusts the loss scale to a lower value. 95 | If a certain number of iterations occur without overflowing gradients detected, 96 | :class:`DynamicLossScaler` increases the loss scale once more. 97 | In this way :class:`DynamicLossScaler` attempts to "ride the edge" of 98 | always using the highest loss scale possible without incurring overflow. 99 | Args: 100 | init_scale (float, optional, default=2**32): Initial loss scale attempted by :class:`DynamicLossScaler.` 101 | scale_factor (float, optional, default=2.0): Factor used when adjusting the loss scale. If an overflow is encountered, the loss scale is readjusted to loss scale/``scale_factor``. If ``scale_window`` consecutive iterations take place without an overflow, the loss scale is readjusted to loss_scale*``scale_factor``. 102 | scale_window (int, optional, default=1000): Number of consecutive iterations without an overflow to wait before increasing the loss scale. 103 | """ 104 | 105 | def __init__(self, 106 | init_scale=2 ** 32, 107 | scale_factor=2., 108 | scale_window=1000): 109 | self.cur_scale = init_scale 110 | self.cur_iter = 0 111 | self.last_overflow_iter = -1 112 | self.scale_factor = scale_factor 113 | self.scale_window = scale_window 114 | 115 | # `params` is a list / generator of torch.Variable 116 | def has_overflow(self, params): 117 | for p in params: 118 | # if p.grad is not None and DynamicLossScaler._has_inf_or_nan(p.grad.data): 119 | # return True 120 | if p.grad is not None and self._has_inf_or_nan(p.grad.data): 121 | return True 122 | 123 | return False 124 | 125 | # `x` is a torch.Tensor 126 | # def _has_inf_or_nan(x): 127 | def _has_inf_or_nan(self, x): 128 | try: 129 | # if x is half, the .float() incurs an additional deep copy, but it's necessary if 130 | # Pytorch's .sum() creates a one-element tensor of the same type as x 131 | # (which is true for some recent version of pytorch). 132 | cpu_sum = float(x.float().sum()) 133 | # More efficient version that can be used if .sum() returns a Python scalar 134 | # cpu_sum = float(x.sum()) 135 | except RuntimeError as instance: 136 | # We want to check if inst is actually an overflow exception. 137 | # RuntimeError could come from a different error. 138 | # If so, we still want the exception to propagate. 139 | if "value cannot be converted" not in instance.args[0]: 140 | raise 141 | return True 142 | else: 143 | if cpu_sum == float('inf') or cpu_sum == -float('inf') or cpu_sum != cpu_sum: 144 | return True 145 | return False 146 | 147 | # `overflow` is boolean indicating whether the gradient overflowed 148 | def update_scale(self, overflow): 149 | if overflow: 150 | # self.cur_scale /= self.scale_factor 151 | self.cur_scale = max(self.cur_scale / self.scale_factor, 1) 152 | self.last_overflow_iter = self.cur_iter 153 | else: 154 | if (self.cur_iter - self.last_overflow_iter) % self.scale_window == 0: 155 | self.cur_scale *= self.scale_factor 156 | self.cur_iter += 1 157 | 158 | @property 159 | def loss_scale(self): 160 | return self.cur_scale 161 | 162 | def scale_gradient(self, module, grad_in, grad_out): 163 | return tuple(self.loss_scale * g for g in grad_in) 164 | 165 | def backward(self, loss): 166 | scaled_loss = loss * self.loss_scale 167 | scaled_loss.backward() 168 | 169 | 170 | ############################################################## 171 | # Example usage below here -- assuming it's in a separate file 172 | ############################################################## 173 | """ 174 | TO-DO separate out into an example. 175 | if __name__ == "__main__": 176 | import torch 177 | from torch.autograd import Variable 178 | from dynamic_loss_scaler import DynamicLossScaler 179 | # N is batch size; D_in is input dimension; 180 | # H is hidden dimension; D_out is output dimension. 181 | N, D_in, H, D_out = 64, 1000, 100, 10 182 | # Create random Tensors to hold inputs and outputs, and wrap them in Variables. 183 | x = Variable(torch.randn(N, D_in), requires_grad=False) 184 | y = Variable(torch.randn(N, D_out), requires_grad=False) 185 | w1 = Variable(torch.randn(D_in, H), requires_grad=True) 186 | w2 = Variable(torch.randn(H, D_out), requires_grad=True) 187 | parameters = [w1, w2] 188 | learning_rate = 1e-6 189 | optimizer = torch.optim.SGD(parameters, lr=learning_rate) 190 | loss_scaler = DynamicLossScaler() 191 | for t in range(500): 192 | y_pred = x.mm(w1).clamp(min=0).mm(w2) 193 | loss = (y_pred - y).pow(2).sum() * loss_scaler.loss_scale 194 | print('Iter {} loss scale: {}'.format(t, loss_scaler.loss_scale)) 195 | print('Iter {} scaled loss: {}'.format(t, loss.data[0])) 196 | print('Iter {} unscaled loss: {}'.format(t, loss.data[0] / loss_scaler.loss_scale)) 197 | # Run backprop 198 | optimizer.zero_grad() 199 | loss.backward() 200 | # Check for overflow 201 | has_overflow = DynamicLossScaler.has_overflow(parameters) 202 | # If no overflow, unscale grad and update as usual 203 | if not has_overflow: 204 | for param in parameters: 205 | param.grad.data.mul_(1. / loss_scaler.loss_scale) 206 | optimizer.step() 207 | # Otherwise, don't do anything -- ie, skip iteration 208 | else: 209 | print('OVERFLOW!') 210 | # Update loss scale for next iteration 211 | loss_scaler.update_scale(has_overflow) 212 | """ 213 | -------------------------------------------------------------------------------- /lib/models/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import models.pose_higher_hrnet 12 | #import models.pose_efficientnet 13 | #import models.pose_efficientnet_all -------------------------------------------------------------------------------- /lib/models/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/models/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/models/__pycache__/efficientnet_blocks.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/models/__pycache__/efficientnet_blocks.cpython-36.pyc -------------------------------------------------------------------------------- /lib/models/__pycache__/pose_efficientnet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/models/__pycache__/pose_efficientnet.cpython-36.pyc -------------------------------------------------------------------------------- /lib/models/__pycache__/pose_efficientnet_all.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/models/__pycache__/pose_efficientnet_all.cpython-36.pyc -------------------------------------------------------------------------------- /lib/models/__pycache__/pose_higher_hrnet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/models/__pycache__/pose_higher_hrnet.cpython-36.pyc -------------------------------------------------------------------------------- /lib/models/efficientnet_blocks.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | def conv(in_channels, out_channels, kernel_size=3, padding=1, bn=True, dilation=1, stride=1, relu=True, bias=True): 8 | modules = [nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias)] 9 | if bn: 10 | modules.append(nn.BatchNorm2d(out_channels)) 11 | if relu: 12 | modules.append(nn.ReLU(inplace=True)) 13 | return nn.Sequential(*modules) 14 | 15 | def conv_bn(inp, oup, stride, use_batch_norm=True): 16 | #ReLU = nn.ReLU if onnx_compatible else nn.ReLU6 17 | 18 | if use_batch_norm: 19 | return nn.Sequential( 20 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 21 | nn.BatchNorm2d(oup), 22 | nn.ReLU(inplace=True), 23 | ) 24 | else: 25 | return nn.Sequential( 26 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 27 | nn.ReLU(inplace=True), 28 | ) 29 | 30 | def conv1(in_channels,out_channels,stride): 31 | return nn.Sequential( 32 | nn.Conv2d(in_channels,out_channels,3,stride,1,bias=False), 33 | nn.BatchNorm2d(out_channels), 34 | nn.ReLU6(inplace=True), 35 | ) 36 | 37 | def conv_pw(in_channels, out_channels): 38 | return nn.Sequential( 39 | nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False), 40 | nn.BatchNorm2d(out_channels), 41 | nn.ReLU6(inplace=True), 42 | ) 43 | 44 | def conv_dw_no_bn(in_channels, out_channels, kernel_size=3, padding=1, stride=1, dilation=1): 45 | return nn.Sequential( 46 | nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, dilation=dilation, groups=in_channels, bias=False), 47 | nn.ELU(inplace=True), 48 | 49 | nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False), 50 | nn.ELU(inplace=True), 51 | ) 52 | 53 | def conv_bn_act(in_, out_, kernel_size, 54 | stride=1, groups=1, bias=True, 55 | eps=1e-3, momentum=0.01): 56 | return nn.Sequential( 57 | SamePadConv2d(in_, out_, kernel_size, stride, groups=groups, bias=bias), 58 | nn.BatchNorm2d(out_, eps, momentum), 59 | Swish() 60 | ) 61 | 62 | 63 | class SamePadConv2d(nn.Conv2d): 64 | """ 65 | Conv with TF padding='same' 66 | https://github.com/pytorch/pytorch/issues/3867#issuecomment-349279036 67 | """ 68 | 69 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True, padding_mode="zeros"): 70 | super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias, padding_mode) 71 | 72 | def get_pad_odd(self, in_, weight, stride, dilation): 73 | effective_filter_size_rows = (weight - 1) * dilation + 1 74 | out_rows = (in_ + stride - 1) // stride 75 | padding_needed = max(0, (out_rows - 1) * stride + effective_filter_size_rows - in_) 76 | padding_rows = max(0, (out_rows - 1) * stride + (weight - 1) * dilation + 1 - in_) 77 | rows_odd = (padding_rows % 2 != 0) 78 | return padding_rows, rows_odd 79 | 80 | def forward(self, x): 81 | padding_rows, rows_odd = self.get_pad_odd(x.shape[2], self.weight.shape[2], self.stride[0], self.dilation[0]) 82 | padding_cols, cols_odd = self.get_pad_odd(x.shape[3], self.weight.shape[3], self.stride[1], self.dilation[1]) 83 | 84 | if rows_odd or cols_odd: 85 | x = F.pad(x, [0, int(cols_odd), 0, int(rows_odd)]) 86 | 87 | return F.conv2d(x, self.weight, self.bias, self.stride, 88 | padding=(padding_rows // 2, padding_cols // 2), 89 | dilation=self.dilation, groups=self.groups) 90 | 91 | 92 | class Swish(nn.Module): 93 | def forward(self, x): 94 | return x * torch.sigmoid(x) 95 | 96 | 97 | class Flatten(nn.Module): 98 | def forward(self, x): 99 | return x.view(x.shape[0], -1) 100 | 101 | 102 | class SEModule(nn.Module): 103 | def __init__(self, in_, squeeze_ch): 104 | super().__init__() 105 | self.se = nn.Sequential( 106 | nn.AdaptiveAvgPool2d(1), 107 | nn.Conv2d(in_, squeeze_ch, kernel_size=1, stride=1, padding=0, bias=True), 108 | Swish(), 109 | nn.Conv2d(squeeze_ch, in_, kernel_size=1, stride=1, padding=0, bias=True), 110 | ) 111 | 112 | def forward(self, x): 113 | return x * torch.sigmoid(self.se(x)) 114 | 115 | 116 | class DropConnect(nn.Module): 117 | def __init__(self, ratio): 118 | super().__init__() 119 | self.ratio = 1.0 - ratio 120 | 121 | def forward(self, x): 122 | if not self.training: 123 | return x 124 | 125 | random_tensor = self.ratio 126 | random_tensor += torch.rand([x.shape[0], 1, 1, 1], dtype=torch.float, device=x.device) 127 | random_tensor.requires_grad_(False) 128 | return x / self.ratio * random_tensor.floor() 129 | 130 | -------------------------------------------------------------------------------- /lib/utils/__pycache__/transforms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/utils/__pycache__/transforms.cpython-36.pyc -------------------------------------------------------------------------------- /lib/utils/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/utils/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /lib/utils/__pycache__/vis.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/utils/__pycache__/vis.cpython-36.pyc -------------------------------------------------------------------------------- /lib/utils/__pycache__/zipreader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/lib/utils/__pycache__/zipreader.cpython-36.pyc -------------------------------------------------------------------------------- /lib/utils/transforms.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # Modified by Bowen Cheng (bcheng9@illinois.edu) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import numpy as np 13 | import cv2 14 | 15 | 16 | def flip_back(output_flipped, matched_parts): 17 | ''' 18 | ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width) 19 | ''' 20 | assert output_flipped.ndim == 4,\ 21 | 'output_flipped should be [batch_size, num_joints, height, width]' 22 | 23 | output_flipped = output_flipped[:, :, :, ::-1] 24 | 25 | for pair in matched_parts: 26 | tmp = output_flipped[:, pair[0], :, :].copy() 27 | output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :] 28 | output_flipped[:, pair[1], :, :] = tmp 29 | 30 | return output_flipped 31 | 32 | 33 | def fliplr_joints(joints, joints_vis, width, matched_parts): 34 | """ 35 | flip coords 36 | """ 37 | # Flip horizontal 38 | joints[:, 0] = width - joints[:, 0] - 1 39 | 40 | # Change left-right parts 41 | for pair in matched_parts: 42 | joints[pair[0], :], joints[pair[1], :] = \ 43 | joints[pair[1], :], joints[pair[0], :].copy() 44 | joints_vis[pair[0], :], joints_vis[pair[1], :] = \ 45 | joints_vis[pair[1], :], joints_vis[pair[0], :].copy() 46 | 47 | return joints*joints_vis, joints_vis 48 | 49 | 50 | def transform_preds(coords, center, scale, output_size): 51 | # target_coords = np.zeros(coords.shape) 52 | target_coords = coords.copy() 53 | trans = get_affine_transform(center, scale, 0, output_size, inv=1) 54 | for p in range(coords.shape[0]): 55 | target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) 56 | return target_coords 57 | 58 | 59 | def get_affine_transform(center, 60 | scale, 61 | rot, 62 | output_size, 63 | shift=np.array([0, 0], dtype=np.float32), 64 | inv=0): 65 | if not isinstance(scale, np.ndarray) and not isinstance(scale, list): 66 | print(scale) 67 | scale = np.array([scale, scale]) 68 | 69 | scale_tmp = scale * 200.0 70 | src_w = scale_tmp[0] 71 | dst_w = output_size[0] 72 | dst_h = output_size[1] 73 | 74 | rot_rad = np.pi * rot / 180 75 | src_dir = get_dir([0, src_w * -0.5], rot_rad) 76 | dst_dir = np.array([0, dst_w * -0.5], np.float32) 77 | 78 | src = np.zeros((3, 2), dtype=np.float32) 79 | dst = np.zeros((3, 2), dtype=np.float32) 80 | src[0, :] = center + scale_tmp * shift 81 | src[1, :] = center + src_dir + scale_tmp * shift 82 | dst[0, :] = [dst_w * 0.5, dst_h * 0.5] 83 | dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir 84 | 85 | src[2:, :] = get_3rd_point(src[0, :], src[1, :]) 86 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) 87 | 88 | if inv: 89 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) 90 | else: 91 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) 92 | 93 | return trans 94 | 95 | 96 | def affine_transform(pt, t): 97 | new_pt = np.array([pt[0], pt[1], 1.]).T 98 | new_pt = np.dot(t, new_pt) 99 | return new_pt[:2] 100 | 101 | 102 | def get_3rd_point(a, b): 103 | direct = a - b 104 | return b + np.array([-direct[1], direct[0]], dtype=np.float32) 105 | 106 | 107 | def get_dir(src_point, rot_rad): 108 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 109 | 110 | src_result = [0, 0] 111 | src_result[0] = src_point[0] * cs - src_point[1] * sn 112 | src_result[1] = src_point[0] * sn + src_point[1] * cs 113 | 114 | return src_result 115 | 116 | 117 | def crop(img, center, scale, output_size, rot=0): 118 | trans = get_affine_transform(center, scale, rot, output_size) 119 | 120 | dst_img = cv2.warpAffine(img, 121 | trans, 122 | (int(output_size[0]), int(output_size[1])), 123 | flags=cv2.INTER_LINEAR) 124 | 125 | return dst_img 126 | 127 | 128 | def resize(image, input_size): 129 | h, w, _ = image.shape 130 | 131 | center = np.array([int(w/2.0+0.5), int(h/2.0+0.5)]) 132 | if w < h: 133 | w_resized = input_size 134 | h_resized = int((input_size / w * h + 63) // 64 * 64) 135 | scale_w = w / 200.0 136 | scale_h = h_resized / w_resized * w / 200.0 137 | else: 138 | h_resized = input_size 139 | w_resized = int((input_size / h * w + 63) // 64 * 64) 140 | scale_h = h / 200.0 141 | scale_w = w_resized / h_resized * h / 200.0 142 | 143 | scale = np.array([scale_w, scale_h]) 144 | trans = get_affine_transform(center, scale, 0, (w_resized, h_resized)) 145 | 146 | image_resized = cv2.warpAffine( 147 | image, 148 | trans, 149 | (int(w_resized), int(h_resized)) 150 | ) 151 | 152 | return image_resized, center, scale 153 | 154 | 155 | def get_multi_scale_size(image, input_size, current_scale, min_scale): 156 | h, w, _ = image.shape 157 | center = np.array([int(w / 2.0 + 0.5), int(h / 2.0 + 0.5)]) 158 | 159 | # calculate the size for min_scale 160 | min_input_size = int((min_scale * input_size + 63)//64 * 64) 161 | if w < h: 162 | w_resized = int(min_input_size * current_scale / min_scale) 163 | h_resized = int( 164 | int((min_input_size/w*h+63)//64*64)*current_scale/min_scale 165 | ) 166 | scale_w = w / 200.0 167 | scale_h = h_resized / w_resized * w / 200.0 168 | else: 169 | h_resized = int(min_input_size * current_scale / min_scale) 170 | w_resized = int( 171 | int((min_input_size/h*w+63)//64*64)*current_scale/min_scale 172 | ) 173 | scale_h = h / 200.0 174 | scale_w = w_resized / h_resized * h / 200.0 175 | 176 | return (w_resized, h_resized), center, np.array([scale_w, scale_h]) 177 | 178 | 179 | def resize_align_multi_scale(image, input_size, current_scale, min_scale): 180 | size_resized, center, scale = get_multi_scale_size( 181 | image, input_size, current_scale, min_scale 182 | ) 183 | trans = get_affine_transform(center, scale, 0, size_resized) 184 | 185 | image_resized = cv2.warpAffine( 186 | image, 187 | trans, 188 | size_resized 189 | # (int(w_resized), int(h_resized)) 190 | ) 191 | 192 | return image_resized, center, scale 193 | 194 | 195 | def get_final_preds(grouped_joints, center, scale, heatmap_size): 196 | final_results = [] 197 | for person in grouped_joints[0]: 198 | joints = np.zeros((person.shape[0], 3)) 199 | joints = transform_preds(person, center, scale, heatmap_size) 200 | final_results.append(joints) 201 | 202 | return final_results 203 | -------------------------------------------------------------------------------- /lib/utils/utils.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import logging 13 | import time 14 | from collections import namedtuple 15 | from pathlib import Path 16 | 17 | import torch 18 | import torch.optim as optim 19 | import torch.nn as nn 20 | 21 | 22 | def setup_logger(final_output_dir, rank, phase): 23 | time_str = time.strftime('%Y-%m-%d-%H-%M') 24 | log_file = '{}_{}_rank{}.log'.format(phase, time_str, rank) 25 | final_log_file = os.path.join(final_output_dir, log_file) 26 | head = '%(asctime)-15s %(message)s' 27 | # logging.basicConfig(format=head) 28 | logging.basicConfig(filename=str(final_log_file), 29 | format=head) 30 | logger = logging.getLogger() 31 | logger.setLevel(logging.INFO) 32 | console = logging.StreamHandler() 33 | logging.getLogger('').addHandler(console) 34 | 35 | return logger, time_str 36 | 37 | 38 | def create_logger(cfg, cfg_name, phase='train'): 39 | root_output_dir = Path(cfg.OUTPUT_DIR) 40 | # set up logger 41 | if not root_output_dir.exists() and cfg.RANK == 0: 42 | print('=> creating {}'.format(root_output_dir)) 43 | root_output_dir.mkdir() 44 | else: 45 | while not root_output_dir.exists(): 46 | print('=> wait for {} created'.format(root_output_dir)) 47 | time.sleep(30) 48 | 49 | dataset = cfg.DATASET.DATASET 50 | dataset = dataset.replace(':', '_') 51 | model = cfg.MODEL.NAME 52 | cfg_name = os.path.basename(cfg_name).split('.')[0] 53 | 54 | final_output_dir = root_output_dir / dataset / model / cfg_name 55 | 56 | if cfg.RANK == 0: 57 | print('=> creating {}'.format(final_output_dir)) 58 | final_output_dir.mkdir(parents=True, exist_ok=True) 59 | else: 60 | while not final_output_dir.exists(): 61 | print('=> wait for {} created'.format(final_output_dir)) 62 | time.sleep(5) 63 | 64 | logger, time_str = setup_logger(final_output_dir, cfg.RANK, phase) 65 | 66 | tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / \ 67 | (cfg_name + '_' + time_str) 68 | 69 | print('=> creating {}'.format(tensorboard_log_dir)) 70 | tensorboard_log_dir.mkdir(parents=True, exist_ok=True) 71 | 72 | return logger, str(final_output_dir), str(tensorboard_log_dir) 73 | 74 | 75 | def get_optimizer(cfg, model): 76 | optimizer = None 77 | if cfg.TRAIN.OPTIMIZER == 'sgd': 78 | optimizer = optim.SGD( 79 | model.parameters(), 80 | lr=cfg.TRAIN.LR, 81 | momentum=cfg.TRAIN.MOMENTUM, 82 | weight_decay=cfg.TRAIN.WD, 83 | nesterov=cfg.TRAIN.NESTEROV 84 | ) 85 | elif cfg.TRAIN.OPTIMIZER == 'adam': 86 | optimizer = optim.Adam( 87 | model.parameters(), 88 | lr=cfg.TRAIN.LR 89 | ) 90 | 91 | return optimizer 92 | 93 | 94 | def save_checkpoint(states, is_best, output_dir, 95 | filename='checkpoint.pth.tar'): 96 | torch.save(states, os.path.join(output_dir, filename)) 97 | 98 | if is_best and 'state_dict' in states: 99 | torch.save( 100 | states['best_state_dict'], 101 | os.path.join(output_dir, 'model_best.pth.tar') 102 | ) 103 | 104 | 105 | def get_model_summary(model, *input_tensors, item_length=26, verbose=True): 106 | """ 107 | :param model: 108 | :param input_tensors: 109 | :param item_length: 110 | :return: 111 | """ 112 | 113 | summary = [] 114 | 115 | ModuleDetails = namedtuple( 116 | "Layer", ["name", "input_size", "output_size", "num_parameters", "multiply_adds"]) 117 | hooks = [] 118 | layer_instances = {} 119 | 120 | def add_hooks(module): 121 | 122 | def hook(module, input, output): 123 | class_name = str(module.__class__.__name__) 124 | 125 | instance_index = 1 126 | if class_name not in layer_instances: 127 | layer_instances[class_name] = instance_index 128 | else: 129 | instance_index = layer_instances[class_name] + 1 130 | layer_instances[class_name] = instance_index 131 | 132 | layer_name = class_name + "_" + str(instance_index) 133 | 134 | params = 0 135 | 136 | if class_name.find("Conv") != -1 or class_name.find("BatchNorm") != -1 or \ 137 | class_name.find("Linear") != -1: 138 | for param_ in module.parameters(): 139 | params += param_.view(-1).size(0) 140 | 141 | flops = "Not Available" 142 | if class_name.find("Conv") != -1 and hasattr(module, "weight"): 143 | flops = ( 144 | torch.prod( 145 | torch.LongTensor(list(module.weight.data.size()))) * 146 | torch.prod( 147 | torch.LongTensor(list(output.size())[2:]))).item() 148 | elif isinstance(module, nn.Linear): 149 | flops = (torch.prod(torch.LongTensor(list(output.size()))) \ 150 | * input[0].size(1)).item() 151 | 152 | if isinstance(input[0], list): 153 | input = input[0] 154 | if isinstance(output, list): 155 | output = output[0] 156 | 157 | summary.append( 158 | ModuleDetails( 159 | name=layer_name, 160 | input_size=list(input[0].size()), 161 | output_size=list(output.size()), 162 | num_parameters=params, 163 | multiply_adds=flops) 164 | ) 165 | 166 | if not isinstance(module, nn.ModuleList) \ 167 | and not isinstance(module, nn.Sequential) \ 168 | and module != model: 169 | hooks.append(module.register_forward_hook(hook)) 170 | 171 | model.eval() 172 | model.apply(add_hooks) 173 | 174 | space_len = item_length 175 | 176 | model(*input_tensors) 177 | for hook in hooks: 178 | hook.remove() 179 | 180 | details = '' 181 | if verbose: 182 | details = "Model Summary" + \ 183 | os.linesep + \ 184 | "Name{}Input Size{}Output Size{}Parameters{}Multiply Adds (Flops){}".format( 185 | ' ' * (space_len - len("Name")), 186 | ' ' * (space_len - len("Input Size")), 187 | ' ' * (space_len - len("Output Size")), 188 | ' ' * (space_len - len("Parameters")), 189 | ' ' * (space_len - len("Multiply Adds (Flops)"))) \ 190 | + os.linesep + '-' * space_len * 5 + os.linesep 191 | params_sum = 0 192 | flops_sum = 0 193 | for layer in summary: 194 | params_sum += layer.num_parameters 195 | if layer.multiply_adds != "Not Available": 196 | flops_sum += layer.multiply_adds 197 | if verbose: 198 | details += "{}{}{}{}{}{}{}{}{}{}".format( 199 | layer.name, 200 | ' ' * (space_len - len(layer.name)), 201 | layer.input_size, 202 | ' ' * (space_len - len(str(layer.input_size))), 203 | layer.output_size, 204 | ' ' * (space_len - len(str(layer.output_size))), 205 | layer.num_parameters, 206 | ' ' * (space_len - len(str(layer.num_parameters))), 207 | layer.multiply_adds, 208 | ' ' * (space_len - len(str(layer.multiply_adds)))) \ 209 | + os.linesep + '-' * space_len * 5 + os.linesep 210 | 211 | details += os.linesep \ 212 | + "Total Parameters: {:,}".format(params_sum) \ 213 | + os.linesep + '-' * space_len * 5 + os.linesep 214 | details += "Total Multiply Adds (For Convolution and Linear Layers only): {:,}".format(flops_sum) \ 215 | + os.linesep + '-' * space_len * 5 + os.linesep 216 | details += "Number of Layers" + os.linesep 217 | for layer in layer_instances: 218 | details += "{} : {} layers ".format(layer, layer_instances[layer]) 219 | 220 | return details 221 | 222 | 223 | class AverageMeter(object): 224 | """Computes and stores the average and current value""" 225 | def __init__(self): 226 | self.reset() 227 | 228 | def reset(self): 229 | self.val = 0 230 | self.avg = 0 231 | self.sum = 0 232 | self.count = 0 233 | 234 | def update(self, val, n=1): 235 | self.val = val 236 | self.sum += val * n 237 | self.count += n 238 | self.avg = self.sum / self.count if self.count != 0 else 0 239 | -------------------------------------------------------------------------------- /lib/utils/vis.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # Modified by Bowen Cheng (bcheng9@illinois.edu) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import math 13 | 14 | import cv2 15 | import numpy as np 16 | import torchvision 17 | 18 | from dataset import VIS_CONFIG 19 | 20 | 21 | def add_joints(image, joints, color, dataset='COCO'): 22 | part_idx = VIS_CONFIG[dataset]['part_idx'] 23 | part_orders = VIS_CONFIG[dataset]['part_orders'] 24 | 25 | def link(a, b, color): 26 | if part_idx[a] < joints.shape[0] and part_idx[b] < joints.shape[0]: 27 | jointa = joints[part_idx[a]] 28 | jointb = joints[part_idx[b]] 29 | if jointa[2] > 0 and jointb[2] > 0: 30 | cv2.line( 31 | image, 32 | (int(jointa[0]), int(jointa[1])), 33 | (int(jointb[0]), int(jointb[1])), 34 | color, 35 | 2 36 | ) 37 | 38 | # add joints 39 | for joint in joints: 40 | if joint[2] > 0: 41 | cv2.circle(image, (int(joint[0]), int(joint[1])), 1, color, 2) 42 | 43 | # add link 44 | for pair in part_orders: 45 | link(pair[0], pair[1], color) 46 | 47 | return image 48 | 49 | 50 | def save_valid_image(image, joints, file_name, dataset='COCO'): 51 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 52 | 53 | for person in joints: 54 | color = np.random.randint(0, 255, size=3) 55 | color = [int(i) for i in color] 56 | add_joints(image, person, color, dataset=dataset) 57 | 58 | cv2.imwrite(file_name, image) 59 | 60 | 61 | def make_heatmaps(image, heatmaps): 62 | heatmaps = heatmaps.mul(255)\ 63 | .clamp(0, 255)\ 64 | .byte()\ 65 | .cpu().numpy() 66 | 67 | num_joints, height, width = heatmaps.shape 68 | image_resized = cv2.resize(image, (int(width), int(height))) 69 | 70 | image_grid = np.zeros((height, (num_joints+1)*width, 3), dtype=np.uint8) 71 | 72 | for j in range(num_joints): 73 | # add_joints(image_resized, joints[:, j, :]) 74 | heatmap = heatmaps[j, :, :] 75 | colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) 76 | image_fused = colored_heatmap*0.7 + image_resized*0.3 77 | 78 | width_begin = width * (j+1) 79 | width_end = width * (j+2) 80 | image_grid[:, width_begin:width_end, :] = image_fused 81 | 82 | image_grid[:, 0:width, :] = image_resized 83 | 84 | return image_grid 85 | 86 | 87 | def make_tagmaps(image, tagmaps): 88 | num_joints, height, width = tagmaps.shape 89 | image_resized = cv2.resize(image, (int(width), int(height))) 90 | 91 | image_grid = np.zeros((height, (num_joints+1)*width, 3), dtype=np.uint8) 92 | 93 | for j in range(num_joints): 94 | tagmap = tagmaps[j, :, :] 95 | min = float(tagmap.min()) 96 | max = float(tagmap.max()) 97 | tagmap = tagmap.add(-min)\ 98 | .div(max - min + 1e-5)\ 99 | .mul(255)\ 100 | .clamp(0, 255)\ 101 | .byte()\ 102 | .cpu()\ 103 | .numpy() 104 | 105 | colored_tagmap = cv2.applyColorMap(tagmap, cv2.COLORMAP_JET) 106 | image_fused = colored_tagmap*0.9 + image_resized*0.1 107 | 108 | width_begin = width * (j+1) 109 | width_end = width * (j+2) 110 | image_grid[:, width_begin:width_end, :] = image_fused 111 | 112 | image_grid[:, 0:width, :] = image_resized 113 | 114 | return image_grid 115 | 116 | 117 | def save_batch_image_with_joints(batch_image, batch_joints, batch_joints_vis, 118 | file_name, nrow=8, padding=2): 119 | ''' 120 | batch_image: [batch_size, channel, height, width] 121 | batch_joints: [batch_size, num_joints, 3], 122 | batch_joints_vis: [batch_size, num_joints, 1], 123 | } 124 | ''' 125 | grid = torchvision.utils.make_grid(batch_image, nrow, padding, True) 126 | ndarr = grid.mul(255).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy() 127 | ndarr = cv2.cvtColor(ndarr, cv2.COLOR_RGB2BGR) 128 | 129 | nmaps = batch_image.size(0) 130 | xmaps = min(nrow, nmaps) 131 | ymaps = int(math.ceil(float(nmaps) / xmaps)) 132 | height = int(batch_image.size(2) + padding) 133 | width = int(batch_image.size(3) + padding) 134 | k = 0 135 | for y in range(ymaps): 136 | for x in range(xmaps): 137 | if k >= nmaps: 138 | break 139 | joints = batch_joints[k] 140 | joints_vis = batch_joints_vis[k] 141 | 142 | for joint, joint_vis in zip(joints, joints_vis): 143 | joint[0] = x * width + padding + joint[0] 144 | joint[1] = y * height + padding + joint[1] 145 | if joint_vis[0]: 146 | cv2.circle( 147 | ndarr, 148 | (int(joint[0]), int(joint[1])), 149 | 2, 150 | [255, 0, 0], 151 | 2 152 | ) 153 | k = k + 1 154 | cv2.imwrite(file_name, ndarr) 155 | 156 | 157 | def save_batch_maps( 158 | batch_image, 159 | batch_maps, 160 | batch_mask, 161 | file_name, 162 | map_type='heatmap', 163 | normalize=True 164 | ): 165 | if normalize: 166 | batch_image = batch_image.clone() 167 | min = float(batch_image.min()) 168 | max = float(batch_image.max()) 169 | 170 | batch_image.add_(-min).div_(max - min + 1e-5) 171 | 172 | batch_size = batch_maps.size(0) 173 | num_joints = batch_maps.size(1) 174 | map_height = batch_maps.size(2) 175 | map_width = batch_maps.size(3) 176 | 177 | grid_image = np.zeros( 178 | (batch_size*map_height, (num_joints+1)*map_width, 3), 179 | dtype=np.uint8 180 | ) 181 | 182 | for i in range(batch_size): 183 | image = batch_image[i].mul(255)\ 184 | .clamp(0, 255)\ 185 | .byte()\ 186 | .permute(1, 2, 0)\ 187 | .cpu().numpy() 188 | 189 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 190 | maps = batch_maps[i] 191 | 192 | if map_type == 'heatmap': 193 | image_with_hms = make_heatmaps(image, maps) 194 | elif map_type == 'tagmap': 195 | image_with_hms = make_tagmaps(image, maps) 196 | 197 | height_begin = map_height * i 198 | height_end = map_height * (i + 1) 199 | 200 | grid_image[height_begin:height_end, :, :] = image_with_hms 201 | if batch_mask is not None: 202 | mask = np.expand_dims(batch_mask[i].byte().cpu().numpy(), -1) 203 | grid_image[height_begin:height_end, :map_width, :] = \ 204 | grid_image[height_begin:height_end, :map_width, :] * mask 205 | 206 | cv2.imwrite(file_name, grid_image) 207 | 208 | 209 | def save_debug_images( 210 | config, 211 | batch_images, 212 | batch_heatmaps, 213 | batch_masks, 214 | batch_outputs, 215 | prefix 216 | ): 217 | if not config.DEBUG.DEBUG: 218 | return 219 | 220 | num_joints = config.DATASET.NUM_JOINTS 221 | batch_pred_heatmaps = batch_outputs[:, :num_joints, :, :] 222 | batch_pred_tagmaps = batch_outputs[:, num_joints:, :, :] 223 | 224 | if config.DEBUG.SAVE_HEATMAPS_GT and batch_heatmaps is not None: 225 | file_name = '{}_hm_gt.jpg'.format(prefix) 226 | save_batch_maps( 227 | batch_images, batch_heatmaps, batch_masks, file_name, 'heatmap' 228 | ) 229 | if config.DEBUG.SAVE_HEATMAPS_PRED: 230 | file_name = '{}_hm_pred.jpg'.format(prefix) 231 | save_batch_maps( 232 | batch_images, batch_pred_heatmaps, batch_masks, file_name, 'heatmap' 233 | ) 234 | if config.DEBUG.SAVE_TAGMAPS_PRED: 235 | file_name = '{}_tag_pred.jpg'.format(prefix) 236 | save_batch_maps( 237 | batch_images, batch_pred_tagmaps, batch_masks, file_name, 'tagmap' 238 | ) 239 | -------------------------------------------------------------------------------- /lib/utils/zipreader.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import zipfile 13 | import xml.etree.ElementTree as ET 14 | 15 | import cv2 16 | import numpy as np 17 | 18 | _im_zfile = [] 19 | _xml_path_zip = [] 20 | _xml_zfile = [] 21 | 22 | 23 | def imread(filename, flags=cv2.IMREAD_COLOR): 24 | global _im_zfile 25 | path = filename 26 | pos_at = path.index('@') 27 | if pos_at == -1: 28 | print("character '@' is not found from the given path '%s'"%(path)) 29 | assert 0 30 | path_zip = path[0: pos_at] 31 | path_img = path[pos_at + 1:] 32 | if not os.path.isfile(path_zip): 33 | print("zip file '%s' is not found"%(path_zip)) 34 | assert 0 35 | for i in range(len(_im_zfile)): 36 | if _im_zfile[i]['path'] == path_zip: 37 | data = _im_zfile[i]['zipfile'].read(path_img) 38 | return cv2.imdecode(np.frombuffer(data, np.uint8), flags) 39 | 40 | _im_zfile.append({ 41 | 'path': path_zip, 42 | 'zipfile': zipfile.ZipFile(path_zip, 'r') 43 | }) 44 | data = _im_zfile[-1]['zipfile'].read(path_img) 45 | 46 | return cv2.imdecode(np.frombuffer(data, np.uint8), flags) 47 | 48 | 49 | def xmlread(filename): 50 | global _xml_path_zip 51 | global _xml_zfile 52 | path = filename 53 | pos_at = path.index('@') 54 | if pos_at == -1: 55 | print("character '@' is not found from the given path '%s'"%(path)) 56 | assert 0 57 | path_zip = path[0: pos_at] 58 | path_xml = path[pos_at + 2:] 59 | if not os.path.isfile(path_zip): 60 | print("zip file '%s' is not found"%(path_zip)) 61 | assert 0 62 | for i in xrange(len(_xml_path_zip)): 63 | if _xml_path_zip[i] == path_zip: 64 | data = _xml_zfile[i].open(path_xml) 65 | return ET.fromstring(data.read()) 66 | _xml_path_zip.append(path_zip) 67 | print("read new xml file '%s'"%(path_zip)) 68 | _xml_zfile.append(zipfile.ZipFile(path_zip, 'r')) 69 | data = _xml_zfile[-1].open(path_xml) 70 | return ET.fromstring(data.read()) 71 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pyyaml 2 | scipy 3 | EasyDict==1.7 4 | opencv-python 5 | Cython 6 | pandas 7 | json_tricks 8 | scikit-image 9 | tensorboardX 10 | yacs 11 | cffi 12 | munkres 13 | tqdm -------------------------------------------------------------------------------- /tools/__pycache__/_init_paths.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TeCSAR-UNCC/EfficientHRNet/802cc956d87d753a09d366379b7546630a7d38b0/tools/__pycache__/_init_paths.cpython-36.pyc -------------------------------------------------------------------------------- /tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os.path as osp 12 | import sys 13 | 14 | 15 | def add_path(path): 16 | if path not in sys.path: 17 | sys.path.insert(0, path) 18 | 19 | 20 | this_dir = osp.dirname(__file__) 21 | 22 | lib_path = osp.join(this_dir, '..', 'lib') 23 | add_path(lib_path) 24 | -------------------------------------------------------------------------------- /tools/crowdpose_concat_train_val.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bowen Cheng (bcheng9@illinois.edu) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import argparse 12 | import json 13 | import os 14 | 15 | 16 | def parse_args(): 17 | parser = argparse.ArgumentParser(description='Concat CrowdPose train and val') 18 | 19 | parser.add_argument('--data_dir', 20 | help='data directory containing json annotation file', 21 | default='data/crowd_pose/json', 22 | type=str) 23 | 24 | args = parser.parse_args() 25 | 26 | return args 27 | 28 | 29 | def main(): 30 | args = parse_args() 31 | 32 | train_dataset = json.load(open(os.path.join(args.data_dir, 'crowdpose_train.json'))) 33 | val_dataset = json.load(open(os.path.join(args.data_dir, 'crowdpose_val.json'))) 34 | 35 | trainval_dataset = {} 36 | trainval_dataset['categories'] = train_dataset['categories'] 37 | trainval_dataset['images'] = [] 38 | trainval_dataset['images'].extend(train_dataset['images']) 39 | trainval_dataset['images'].extend(val_dataset['images']) 40 | trainval_dataset['annotations'] = [] 41 | trainval_dataset['annotations'].extend(train_dataset['annotations']) 42 | trainval_dataset['annotations'].extend(val_dataset['annotations']) 43 | 44 | with open(os.path.join(args.data_dir, 'crowdpose_trainval.json'), 'w') as f: 45 | json.dump(trainval_dataset, f) 46 | 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /tools/dist_train.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # Modified by Bowen Cheng (bcheng9@illinois.edu) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import argparse 13 | import os 14 | import pprint 15 | import shutil 16 | import warnings 17 | 18 | import torch 19 | import torch.backends.cudnn as cudnn 20 | import torch.distributed as dist 21 | import torch.multiprocessing as mp 22 | import torch.nn as nn 23 | import torch.nn.parallel 24 | import torch.optim 25 | import torch.utils.data 26 | import torch.utils.data.distributed 27 | from tensorboardX import SummaryWriter 28 | 29 | import _init_paths 30 | import models 31 | 32 | from config import cfg 33 | from config import update_config 34 | from core.loss import MultiLossFactory 35 | from core.trainer import do_train 36 | from dataset import make_dataloader 37 | from fp16_utils.fp16util import network_to_half 38 | from fp16_utils.fp16_optimizer import FP16_Optimizer 39 | from utils.utils import create_logger 40 | from utils.utils import get_optimizer 41 | from utils.utils import save_checkpoint 42 | from utils.utils import setup_logger 43 | import re 44 | #import hickle as hkl 45 | import torch 46 | import torch.nn.functional as F 47 | from torch.autograd import Variable 48 | #from torchviz import make_dot 49 | 50 | #os.environ["CUDA_VISIBLE_DEVICES"]="0" 51 | #os.environ['MASTER_PORT'] = '2000901' 52 | 53 | 54 | def parse_args(): 55 | parser = argparse.ArgumentParser(description='Train keypoints network') 56 | # general 57 | parser.add_argument('--cfg', 58 | help='experiment configure file name', 59 | required=True, 60 | type=str) 61 | 62 | parser.add_argument('opts', 63 | help="Modify config options using the command-line", 64 | default=None, 65 | nargs=argparse.REMAINDER) 66 | 67 | # distributed training 68 | parser.add_argument('--gpu', 69 | help='gpu id for multiprocessing training', 70 | type=str) 71 | parser.add_argument('--world-size', 72 | default=1, 73 | type=int, 74 | help='number of nodes for distributed training') 75 | parser.add_argument('--dist-url', 76 | default='tcp://127.0.0.1:24456', 77 | type=str, 78 | help='url used to set up distributed training') 79 | parser.add_argument('--rank', 80 | default=0, 81 | type=int, 82 | help='node rank for distributed training') 83 | 84 | args = parser.parse_args() 85 | 86 | return args 87 | 88 | 89 | def main(): 90 | args = parse_args() 91 | update_config(cfg, args) 92 | 93 | cfg.defrost() 94 | cfg.RANK = args.rank 95 | cfg.freeze() 96 | 97 | logger, final_output_dir, tb_log_dir = create_logger( 98 | cfg, args.cfg, 'train' 99 | ) 100 | 101 | logger.info(pprint.pformat(args)) 102 | logger.info(cfg) 103 | 104 | if args.gpu is not None: 105 | warnings.warn('You have chosen a specific GPU. This will completely ' 106 | 'disable data parallelism.') 107 | 108 | if args.dist_url == "env://" and args.world_size == -1: 109 | args.world_size = int(os.environ["WORLD_SIZE"]) 110 | 111 | args.distributed = args.world_size > 1 or cfg.MULTIPROCESSING_DISTRIBUTED 112 | 113 | ngpus_per_node = torch.cuda.device_count() 114 | if cfg.MULTIPROCESSING_DISTRIBUTED: 115 | # Since we have ngpus_per_node processes per node, the total world_size 116 | # needs to be adjusted accordingly 117 | args.world_size = ngpus_per_node * args.world_size 118 | # Use torch.multiprocessing.spawn to launch distributed processes: the 119 | # main_worker process function 120 | mp.spawn( 121 | main_worker, 122 | nprocs=ngpus_per_node, 123 | args=(ngpus_per_node, args, final_output_dir, tb_log_dir) 124 | ) 125 | else: 126 | # Simply call main_worker function 127 | main_worker( 128 | ','.join([str(i) for i in cfg.GPUS]), 129 | ngpus_per_node, 130 | args, 131 | final_output_dir, 132 | tb_log_dir 133 | ) 134 | 135 | 136 | def main_worker( 137 | gpu, ngpus_per_node, args, final_output_dir, tb_log_dir 138 | ): 139 | # cudnn related setting 140 | cudnn.benchmark = cfg.CUDNN.BENCHMARK 141 | torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC 142 | torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED 143 | 144 | if cfg.FP16.ENABLED: 145 | assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled." 146 | 147 | if cfg.FP16.STATIC_LOSS_SCALE != 1.0: 148 | if not cfg.FP16.ENABLED: 149 | print("Warning: if --fp16 is not used, static_loss_scale will be ignored.") 150 | 151 | args.gpu = gpu 152 | 153 | if args.gpu is not None: 154 | print("Use GPU: {} for training".format(args.gpu)) 155 | 156 | if args.distributed: 157 | if args.dist_url == "env://" and args.rank == -1: 158 | args.rank = int(os.environ["RANK"]) 159 | if cfg.MULTIPROCESSING_DISTRIBUTED: 160 | # For multiprocessing distributed training, rank needs to be the 161 | # global rank among all the processes 162 | args.rank = args.rank * ngpus_per_node + gpu 163 | print('Init process group: dist_url: {}, world_size: {}, rank: {}'. 164 | format(args.dist_url, args.world_size, args.rank)) 165 | dist.init_process_group( 166 | backend=cfg.DIST_BACKEND, 167 | init_method=args.dist_url, 168 | world_size=args.world_size, 169 | rank=args.rank, 170 | ) 171 | 172 | update_config(cfg, args) 173 | 174 | # setup logger 175 | logger, _ = setup_logger(final_output_dir, args.rank, 'train') 176 | 177 | model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( 178 | cfg, is_train=True 179 | ) 180 | 181 | # copy model file 182 | if not cfg.MULTIPROCESSING_DISTRIBUTED or ( 183 | cfg.MULTIPROCESSING_DISTRIBUTED 184 | and args.rank % ngpus_per_node == 0 185 | ): 186 | this_dir = os.path.dirname(__file__) 187 | shutil.copy2( 188 | os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), 189 | final_output_dir 190 | ) 191 | 192 | writer_dict = { 193 | 'writer': SummaryWriter(log_dir=tb_log_dir), 194 | 'train_global_steps': 0, 195 | 'valid_global_steps': 0, 196 | } 197 | 198 | if not cfg.MULTIPROCESSING_DISTRIBUTED or ( 199 | cfg.MULTIPROCESSING_DISTRIBUTED 200 | and args.rank % ngpus_per_node == 0 201 | ): 202 | dump_input = torch.rand( 203 | (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE) 204 | ) 205 | #writer_dict['writer'].add_graph(model, (dump_input, )) 206 | # logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) 207 | 208 | if cfg.FP16.ENABLED: 209 | model = network_to_half(model) 210 | 211 | if cfg.MODEL.SYNC_BN and not args.distributed: 212 | print('Warning: Sync BatchNorm is only supported in distributed training.') 213 | 214 | if args.distributed: 215 | if cfg.MODEL.SYNC_BN: 216 | model = nn.SyncBatchNorm.convert_sync_batchnorm(model) 217 | # For multiprocessing distributed, DistributedDataParallel constructor 218 | # should always set the single device scope, otherwise, 219 | # DistributedDataParallel will use all available devices. 220 | if args.gpu is not None: 221 | torch.cuda.set_device(args.gpu) 222 | model.cuda(args.gpu) 223 | # When using a single GPU per process and per 224 | # DistributedDataParallel, we need to divide the batch size 225 | # ourselves based on the total number of GPUs we have 226 | # args.workers = int(args.workers / ngpus_per_node) 227 | model = torch.nn.parallel.DistributedDataParallel( 228 | model, device_ids=[args.gpu], find_unused_parameters=True, 229 | ) 230 | else: 231 | model.cuda() 232 | # DistributedDataParallel will divide and allocate batch_size to all 233 | # available GPUs if device_ids are not set 234 | model = torch.nn.parallel.DistributedDataParallel(model) 235 | elif args.gpu is not None: 236 | torch.cuda.set_device(args.gpu) 237 | model = model.cuda(args.gpu) 238 | else: 239 | model = torch.nn.DataParallel(model).cuda() 240 | 241 | # define loss function (criterion) and optimizer 242 | loss_factory = MultiLossFactory(cfg).cuda() 243 | 244 | # Data loading code 245 | train_loader = make_dataloader( 246 | cfg, is_train=True, distributed=args.distributed 247 | ) 248 | logger.info(train_loader.dataset) 249 | 250 | best_perf = -1 251 | best_model = False 252 | last_epoch = -1 253 | optimizer = get_optimizer(cfg, model) 254 | 255 | if cfg.FP16.ENABLED: 256 | optimizer = FP16_Optimizer( 257 | optimizer, 258 | static_loss_scale=cfg.FP16.STATIC_LOSS_SCALE, 259 | dynamic_loss_scale=cfg.FP16.DYNAMIC_LOSS_SCALE 260 | ) 261 | 262 | begin_epoch = cfg.TRAIN.BEGIN_EPOCH 263 | checkpoint_file = os.path.join( 264 | final_output_dir, 'checkpoint.pth.tar') 265 | if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): 266 | logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) 267 | checkpoint = torch.load(checkpoint_file, map_location='cpu') 268 | begin_epoch = checkpoint['epoch'] 269 | best_perf = checkpoint['perf'] 270 | last_epoch = checkpoint['epoch'] 271 | model.load_state_dict(checkpoint['state_dict']) 272 | 273 | optimizer.load_state_dict(checkpoint['optimizer']) 274 | logger.info("=> loaded checkpoint '{}' (epoch {})".format( 275 | checkpoint_file, checkpoint['epoch'])) 276 | 277 | if cfg.FP16.ENABLED: 278 | lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( 279 | optimizer.optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, 280 | last_epoch=last_epoch 281 | ) 282 | ''' 283 | else: 284 | lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=1, T_mult=2, eta_min=0.00001, last_epoch=last_epoch) 285 | ''' 286 | else: 287 | lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( 288 | optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, 289 | last_epoch=last_epoch) 290 | 291 | #)#inputs = torch.randn(1,3,672,672) 292 | #y = model(Variable(inputs)) 293 | #print(y) 294 | for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): 295 | # train one epoch 296 | do_train(cfg, model, train_loader, loss_factory, optimizer, epoch, 297 | final_output_dir, tb_log_dir, writer_dict, fp16=cfg.FP16.ENABLED) 298 | 299 | # In PyTorch 1.1.0 and later, you should call `lr_scheduler.step()` after `optimizer.step()`. 300 | lr_scheduler.step() 301 | 302 | perf_indicator = epoch 303 | if perf_indicator >= best_perf: 304 | best_perf = perf_indicator 305 | best_model = True 306 | else: 307 | best_model = False 308 | 309 | if not cfg.MULTIPROCESSING_DISTRIBUTED or ( 310 | cfg.MULTIPROCESSING_DISTRIBUTED 311 | and args.rank == 0 312 | ): 313 | logger.info('=> saving checkpoint to {}'.format(final_output_dir)) 314 | save_checkpoint({ 315 | 'epoch': epoch + 1, 316 | 'model': cfg.MODEL.NAME, 317 | 'state_dict': model.state_dict(), 318 | 'best_state_dict': model.module.state_dict(), 319 | 'perf': perf_indicator, 320 | 'optimizer': optimizer.state_dict(), 321 | }, best_model, final_output_dir) 322 | 323 | final_model_state_file = os.path.join( 324 | final_output_dir, 'final_state{}.pth.tar'.format(gpu) 325 | ) 326 | 327 | logger.info('saving final model state to {}'.format( 328 | final_model_state_file)) 329 | torch.save(model.module.state_dict(), final_model_state_file) 330 | writer_dict['writer'].close() 331 | 332 | 333 | if __name__ == '__main__': 334 | main() 335 | -------------------------------------------------------------------------------- /tools/valid.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # Modified by Bowen Cheng (bcheng9@illinois.edu) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import argparse 13 | import os 14 | import pprint 15 | 16 | import torch 17 | import torch.backends.cudnn as cudnn 18 | import torch.nn.parallel 19 | import torch.optim 20 | import torch.utils.data 21 | import torch.utils.data.distributed 22 | import torchvision.transforms 23 | import torch.multiprocessing 24 | from tqdm import tqdm 25 | 26 | import _init_paths 27 | import models 28 | 29 | from config import cfg 30 | from config import check_config 31 | from config import update_config 32 | from core.inference import get_multi_stage_outputs 33 | from core.inference import aggregate_results 34 | from core.group import HeatmapParser 35 | from dataset import make_test_dataloader 36 | from fp16_utils.fp16util import network_to_half 37 | from utils.utils import create_logger 38 | from utils.utils import get_model_summary 39 | from utils.vis import save_debug_images 40 | from utils.vis import save_valid_image 41 | from utils.transforms import resize_align_multi_scale 42 | from utils.transforms import get_final_preds 43 | from utils.transforms import get_multi_scale_size 44 | 45 | torch.multiprocessing.set_sharing_strategy('file_system') 46 | 47 | 48 | def parse_args(): 49 | parser = argparse.ArgumentParser(description='Test keypoints network') 50 | # general 51 | parser.add_argument('--cfg', 52 | help='experiment configure file name', 53 | required=True, 54 | type=str) 55 | 56 | parser.add_argument('opts', 57 | help="Modify config options using the command-line", 58 | default=None, 59 | nargs=argparse.REMAINDER) 60 | 61 | args = parser.parse_args() 62 | 63 | return args 64 | 65 | 66 | # markdown format output 67 | def _print_name_value(logger, name_value, full_arch_name): 68 | names = name_value.keys() 69 | values = name_value.values() 70 | num_values = len(name_value) 71 | logger.info( 72 | '| Arch ' + 73 | ' '.join(['| {}'.format(name) for name in names]) + 74 | ' |' 75 | ) 76 | logger.info('|---' * (num_values+1) + '|') 77 | 78 | if len(full_arch_name) > 15: 79 | full_arch_name = full_arch_name[:8] + '...' 80 | logger.info( 81 | '| ' + full_arch_name + ' ' + 82 | ' '.join(['| {:.3f}'.format(value) for value in values]) + 83 | ' |' 84 | ) 85 | 86 | 87 | def main(): 88 | args = parse_args() 89 | update_config(cfg, args) 90 | check_config(cfg) 91 | 92 | logger, final_output_dir, tb_log_dir = create_logger( 93 | cfg, args.cfg, 'valid' 94 | ) 95 | 96 | logger.info(pprint.pformat(args)) 97 | logger.info(cfg) 98 | 99 | # cudnn related setting 100 | cudnn.benchmark = cfg.CUDNN.BENCHMARK 101 | torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC 102 | torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED 103 | 104 | model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( 105 | cfg, is_train=False 106 | ) 107 | 108 | dump_input = torch.rand( 109 | (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE) 110 | ) 111 | logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE)) 112 | 113 | if cfg.FP16.ENABLED: 114 | model = network_to_half(model) 115 | 116 | if cfg.TEST.MODEL_FILE: 117 | logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) 118 | model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=True) 119 | else: 120 | model_state_file = os.path.join( 121 | final_output_dir, 'model_best.pth.tar' 122 | ) 123 | logger.info('=> loading model from {}'.format(model_state_file)) 124 | model.load_state_dict(torch.load(model_state_file)) 125 | 126 | model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() 127 | model.eval() 128 | 129 | data_loader, test_dataset = make_test_dataloader(cfg) 130 | 131 | if cfg.MODEL.NAME == 'pose_hourglass': 132 | transforms = torchvision.transforms.Compose( 133 | [ 134 | torchvision.transforms.ToTensor(), 135 | ] 136 | ) 137 | else: 138 | transforms = torchvision.transforms.Compose( 139 | [ 140 | torchvision.transforms.ToTensor(), 141 | torchvision.transforms.Normalize( 142 | mean=[0.485, 0.456, 0.406], 143 | std=[0.229, 0.224, 0.225] 144 | ) 145 | ] 146 | ) 147 | 148 | parser = HeatmapParser(cfg) 149 | all_preds = [] 150 | all_scores = [] 151 | 152 | pbar = tqdm(total=len(test_dataset)) if cfg.TEST.LOG_PROGRESS else None 153 | for i, (images, annos) in enumerate(data_loader): 154 | assert 1 == images.size(0), 'Test batch size should be 1' 155 | 156 | image = images[0].cpu().numpy() 157 | # size at scale 1.0 158 | base_size, center, scale = get_multi_scale_size( 159 | image, cfg.DATASET.INPUT_SIZE, 1.0, min(cfg.TEST.SCALE_FACTOR) 160 | ) 161 | 162 | with torch.no_grad(): 163 | final_heatmaps = None 164 | tags_list = [] 165 | for idx, s in enumerate(sorted(cfg.TEST.SCALE_FACTOR, reverse=True)): 166 | input_size = cfg.DATASET.INPUT_SIZE 167 | image_resized, center, scale = resize_align_multi_scale( 168 | image, input_size, s, min(cfg.TEST.SCALE_FACTOR) 169 | ) 170 | image_resized = transforms(image_resized) 171 | image_resized = image_resized.unsqueeze(0).cuda() 172 | 173 | outputs, heatmaps, tags = get_multi_stage_outputs( 174 | cfg, model, image_resized, cfg.TEST.FLIP_TEST, 175 | cfg.TEST.PROJECT2IMAGE, base_size 176 | ) 177 | 178 | final_heatmaps, tags_list = aggregate_results( 179 | cfg, s, final_heatmaps, tags_list, heatmaps, tags 180 | ) 181 | 182 | final_heatmaps = final_heatmaps / float(len(cfg.TEST.SCALE_FACTOR)) 183 | tags = torch.cat(tags_list, dim=4) 184 | grouped, scores = parser.parse( 185 | final_heatmaps, tags, cfg.TEST.ADJUST, cfg.TEST.REFINE 186 | ) 187 | 188 | final_results = get_final_preds( 189 | grouped, center, scale, 190 | [final_heatmaps.size(3), final_heatmaps.size(2)] 191 | ) 192 | 193 | if cfg.TEST.LOG_PROGRESS: 194 | pbar.update() 195 | 196 | if i % cfg.PRINT_FREQ == 0: 197 | prefix = '{}_{}'.format(os.path.join(final_output_dir, 'result_valid'), i) 198 | # logger.info('=> write {}'.format(prefix)) 199 | save_valid_image(image, final_results, '{}.jpg'.format(prefix), dataset=test_dataset.name) 200 | # save_debug_images(cfg, image_resized, None, None, outputs, prefix) 201 | 202 | all_preds.append(final_results) 203 | all_scores.append(scores) 204 | 205 | if cfg.TEST.LOG_PROGRESS: 206 | pbar.close() 207 | 208 | name_values, _ = test_dataset.evaluate( 209 | cfg, all_preds, all_scores, final_output_dir 210 | ) 211 | 212 | if isinstance(name_values, list): 213 | for name_value in name_values: 214 | _print_name_value(logger, name_value, cfg.MODEL.NAME) 215 | else: 216 | _print_name_value(logger, name_values, cfg.MODEL.NAME) 217 | 218 | 219 | if __name__ == '__main__': 220 | main() 221 | --------------------------------------------------------------------------------