├── README.md ├── experiments ├── coco │ ├── hrnet │ │ ├── heatmap │ │ │ ├── w32_128x128_adam_lr1e-3.yaml │ │ │ ├── w32_256x192_adam_lr1e-3.yaml │ │ │ ├── w32_384x288_adam_lr1e-3.yaml │ │ │ ├── w32_64x64_adam_lr1e-3.yaml │ │ │ ├── w48_128x128_adam_lr1e-3.yaml │ │ │ ├── w48_256x192_adam_lr1e-3.yaml │ │ │ ├── w48_384x288_adam_lr1e-3.yaml │ │ │ └── w48_64x64_adam_lr1e-3.yaml │ │ ├── sa_simdr │ │ │ ├── w48_256x192_adam_lr1e-3_split2_sigma4.yaml │ │ │ └── w48_384x288_adam_lr1e-3_split1_5_sigma4.yaml │ │ └── simdr │ │ │ ├── nmt_w32_128x128_adam_lr1e-3_split2.yaml │ │ │ ├── nmt_w32_256x192_adam_lr1e-3.yaml │ │ │ ├── nmt_w32_64x64_adam_lr1e-3.yaml │ │ │ ├── nmt_w48_128x128_adam_lr1e-3.yaml │ │ │ ├── nmt_w48_256x192_adam_lr1e-3.yaml │ │ │ └── nmt_w48_64x64_adam_lr1e-3.yaml │ └── resnet │ │ ├── heatmap │ │ ├── res101_128x128_d256x3_adam_lr1e-3.yaml │ │ ├── res101_256x192_d256x3_adam_lr1e-3.yaml │ │ ├── res101_384x288_d256x3_adam_lr1e-3.yaml │ │ ├── res101_64x64_d256x3_adam_lr1e-3.yaml │ │ ├── res152_128x128_d256x3_adam_lr1e-3.yaml │ │ ├── res152_256x192_d256x3_adam_lr1e-3.yaml │ │ ├── res152_384x288_d256x3_adam_lr1e-3.yaml │ │ ├── res50_128x128_d256x3_adam_lr1e-3.yaml │ │ ├── res50_256x192_d256x3_adam_lr1e-3.yaml │ │ ├── res50_384x288_d256x3_adam_lr1e-3.yaml │ │ └── res50_64x64_d256x3_adam_lr1e-3.yaml │ │ ├── sa_simdr │ │ └── original │ │ │ └── res50_384x288_d256x3_adam_lr1e-3_deconv3_split2_sigma6.yaml │ │ └── simdr │ │ ├── original │ │ ├── nmt_res101_128x128_d256x3_adam_lr1e-3_split_2_ls2e1_deconv3.yaml │ │ ├── nmt_res101_256x192_d256x3_adam_lr1e-3_split_2_ls4e1_deconv3.yaml │ │ ├── nmt_res101_64x64_d256x3_adam_lr1e-3_split_3_deconv3.yaml │ │ ├── nmt_res50_128x128_d256x3_adam_lr1e-3_split_3_deconv3.yaml │ │ ├── nmt_res50_256x192_d256x3_adam_lr1e-3_deconv3_split2.yaml │ │ ├── nmt_res50_384x288_d256x3_adam_lr1e-3_deconv3.yaml │ │ ├── nmt_res50_64x64_d256x3_adam_lr1e-3_split_3_deconv3.yaml │ │ ├── norm_nmt_res101_384x288_d256x3_adam_lr1e-3_split_2_ls1e1_deconv3.yaml │ │ └── norm_nmt_res152_384x288_d256x3_adam_lr1e-3.yaml │ │ └── upsample_free │ │ ├── nmt_res50_128x128_d256x3_adam_lr1e-3_split_3_cpj32.yaml │ │ ├── nmt_res50_256x192_d256x3_adam_lr1e-3_split_2_cpj28.yaml │ │ └── nmt_res50_64x64_d256x3_adam_lr1e-3_split_3_cpj32.yaml └── mpii │ └── hrnet │ ├── heatmap │ ├── w32_256x256_adam_lr1e-3.yaml │ └── w32_64x64_adam_lr1e-3.yaml │ ├── sa_simdr │ └── w32_256x256_adam_lr1e-3_split2_sigma6.yaml │ └── simdr │ ├── norm_w32_256x256_adam_lr1e-3_ls2e1.yaml │ └── norm_w32_64x64_adam_lr1e-3_ls2e1_split3.yaml ├── lib ├── Makefile ├── config │ ├── __init__.py │ ├── default.py │ └── models.py ├── core │ ├── evaluate.py │ ├── function.py │ ├── inference.py │ └── loss.py ├── dataset │ ├── JointsDataset.py │ ├── __init__.py │ ├── coco.py │ └── mpii.py ├── models │ ├── __init__.py │ ├── pose_hrnet.py │ ├── pose_resnet.py │ └── pose_resnet_upfree.py ├── nms │ ├── cpu_nms.c │ ├── cpu_nms.cpython-36m-x86_64-linux-gnu.so │ ├── cpu_nms.pyx │ ├── gpu_nms.cpp │ ├── gpu_nms.cpython-36m-x86_64-linux-gnu.so │ ├── gpu_nms.cu │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── nms.py │ ├── nms_kernel.cu │ └── setup_linux.py └── utils │ ├── __init__.py │ ├── transforms.py │ ├── utils.py │ ├── vis.py │ └── zipreader.py ├── requirements.txt └── tools ├── _init_paths.py ├── test.py └── train.py /experiments/coco/hrnet/heatmap/w32_128x128_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '/data/dataset/COCO_2017' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: '/data/pretrained/imagenet/hrnet_w32-36af842e.pth' 30 | COORD_REPRESENTATION: 'heatmap' 31 | IMAGE_SIZE: 32 | - 128 33 | - 128 34 | HEATMAP_SIZE: 35 | - 32 36 | - 32 37 | SIGMA: 1 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/heatmap/w32_256x192_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '/data/dataset/COCO_2017' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: '/data/pretrained/imagenet/hrnet_w32-36af842e.pth' 30 | COORD_REPRESENTATION: 'heatmap' 31 | IMAGE_SIZE: 32 | - 192 33 | - 256 34 | HEATMAP_SIZE: 35 | - 48 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '/data/pretrained/pose/hrnet/pose_hrnet_w32_256x192.pth' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/heatmap/w32_384x288_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: 'data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth' 30 | COORD_REPRESENTATION: 'heatmap' 31 | IMAGE_SIZE: 32 | - 288 33 | - 384 34 | HEATMAP_SIZE: 35 | - 72 36 | - 96 37 | SIGMA: 3 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/heatmap/w32_64x64_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '/data/dataset/COCO_2017' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: '/data/pretrained/imagenet/hrnet_w32-36af842e.pth' 30 | COORD_REPRESENTATION: 'heatmap' 31 | IMAGE_SIZE: 32 | - 64 33 | - 64 34 | HEATMAP_SIZE: 35 | - 16 36 | - 16 37 | SIGMA: 1 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/heatmap/w48_128x128_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '/data/dataset/COCO_2017' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: '/data/pretrained/imagenet/hrnet_w48-8ef0771d.pth' 30 | COORD_REPRESENTATION: 'heatmap' 31 | IMAGE_SIZE: 32 | - 128 33 | - 128 34 | HEATMAP_SIZE: 35 | - 32 36 | - 32 37 | SIGMA: 1 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 48 61 | - 96 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 48 73 | - 96 74 | - 192 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 48 87 | - 96 88 | - 192 89 | - 384 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 256 111 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 112 | # COCO_BBOX_FILE: '/data/dataset/COCO_2017/person_detection_results/COCO_test-dev2017_detections_AP_H_609_person.json' 113 | BBOX_THRE: 1.0 114 | IMAGE_THRE: 0.0 115 | IN_VIS_THRE: 0.2 116 | MODEL_FILE: '' 117 | NMS_THRE: 1.0 118 | OKS_THRE: 0.9 119 | USE_GT_BBOX: true 120 | FLIP_TEST: true 121 | POST_PROCESS: true 122 | SHIFT_HEATMAP: true 123 | DEBUG: 124 | DEBUG: true 125 | SAVE_BATCH_IMAGES_GT: true 126 | SAVE_BATCH_IMAGES_PRED: true 127 | SAVE_HEATMAPS_GT: true 128 | SAVE_HEATMAPS_PRED: true 129 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/heatmap/w48_256x192_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '/data/dataset/COCO_2017' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth' 30 | COORD_REPRESENTATION: 'heatmap' 31 | IMAGE_SIZE: 32 | - 192 33 | - 256 34 | HEATMAP_SIZE: 35 | - 48 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 48 61 | - 96 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 48 73 | - 96 74 | - 192 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 48 87 | - 96 88 | - 192 89 | - 384 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 112 | # COCO_BBOX_FILE: '/data/dataset/COCO_2017/person_detection_results/COCO_test-dev2017_detections_AP_H_609_person.json' 113 | BBOX_THRE: 1.0 114 | IMAGE_THRE: 0.0 115 | IN_VIS_THRE: 0.2 116 | MODEL_FILE: '' 117 | NMS_THRE: 1.0 118 | OKS_THRE: 0.9 119 | USE_GT_BBOX: true 120 | FLIP_TEST: true 121 | POST_PROCESS: true 122 | SHIFT_HEATMAP: true 123 | DEBUG: 124 | DEBUG: true 125 | SAVE_BATCH_IMAGES_GT: true 126 | SAVE_BATCH_IMAGES_PRED: true 127 | SAVE_HEATMAPS_GT: true 128 | SAVE_HEATMAPS_PRED: true 129 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/heatmap/w48_384x288_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3,4,5,6,7) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '/data/dataset/COCO_2017' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth' 30 | COORD_REPRESENTATION: 'heatmap' 31 | IMAGE_SIZE: 32 | - 288 33 | - 384 34 | HEATMAP_SIZE: 35 | - 72 36 | - 96 37 | SIGMA: 3 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 48 61 | - 96 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 48 73 | - 96 74 | - 192 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 48 87 | - 96 88 | - 192 89 | - 384 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 24 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 128 111 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '/data/pretrained/pose/hrnet/pose_hrnet_w48_384x288.pth' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: false 119 | FLIP_TEST: false 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: false 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/heatmap/w48_64x64_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '/data/dataset/COCO_2017' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: '/data/pretrained/imagenet/hrnet_w48-8ef0771d.pth' 30 | COORD_REPRESENTATION: 'heatmap' 31 | IMAGE_SIZE: 32 | - 64 33 | - 64 34 | HEATMAP_SIZE: 35 | - 16 36 | - 16 37 | SIGMA: 1 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 48 61 | - 96 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 48 73 | - 96 74 | - 192 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 48 87 | - 96 88 | - 192 89 | - 384 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 256 111 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 112 | # COCO_BBOX_FILE: '/data/dataset/COCO_2017/person_detection_results/COCO_test-dev2017_detections_AP_H_609_person.json' 113 | BBOX_THRE: 1.0 114 | IMAGE_THRE: 0.0 115 | IN_VIS_THRE: 0.2 116 | MODEL_FILE: '' 117 | NMS_THRE: 1.0 118 | OKS_THRE: 0.9 119 | USE_GT_BBOX: true 120 | FLIP_TEST: true 121 | POST_PROCESS: true 122 | SHIFT_HEATMAP: true 123 | DEBUG: 124 | DEBUG: true 125 | SAVE_BATCH_IMAGES_GT: true 126 | SAVE_BATCH_IMAGES_PRED: true 127 | SAVE_HEATMAPS_GT: true 128 | SAVE_HEATMAPS_PRED: true 129 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/sa_simdr/w48_256x192_adam_lr1e-3_split2_sigma4.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '/data/dataset/COCO_2017' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | SIMDR_SPLIT_RATIO: 2.0 29 | HEAD_INPUT: 3072 30 | NUM_JOINTS: 17 31 | PRETRAINED: '/data/pretrained/imagenet/hrnet_w48-8ef0771d.pth' 32 | COORD_REPRESENTATION: 'sa-simdr' 33 | IMAGE_SIZE: 34 | - 192 35 | - 256 36 | HEATMAP_SIZE: 37 | - 192 38 | - 256 39 | SIGMA: 4 40 | EXTRA: 41 | PRETRAINED_LAYERS: 42 | - 'conv1' 43 | - 'bn1' 44 | - 'conv2' 45 | - 'bn2' 46 | - 'layer1' 47 | - 'transition1' 48 | - 'stage2' 49 | - 'transition2' 50 | - 'stage3' 51 | - 'transition3' 52 | - 'stage4' 53 | FINAL_CONV_KERNEL: 1 54 | STAGE2: 55 | NUM_MODULES: 1 56 | NUM_BRANCHES: 2 57 | BLOCK: BASIC 58 | NUM_BLOCKS: 59 | - 4 60 | - 4 61 | NUM_CHANNELS: 62 | - 48 63 | - 96 64 | FUSE_METHOD: SUM 65 | STAGE3: 66 | NUM_MODULES: 4 67 | NUM_BRANCHES: 3 68 | BLOCK: BASIC 69 | NUM_BLOCKS: 70 | - 4 71 | - 4 72 | - 4 73 | NUM_CHANNELS: 74 | - 48 75 | - 96 76 | - 192 77 | FUSE_METHOD: SUM 78 | STAGE4: 79 | NUM_MODULES: 3 80 | NUM_BRANCHES: 4 81 | BLOCK: BASIC 82 | NUM_BLOCKS: 83 | - 4 84 | - 4 85 | - 4 86 | - 4 87 | NUM_CHANNELS: 88 | - 48 89 | - 96 90 | - 192 91 | - 384 92 | FUSE_METHOD: SUM 93 | LOSS: 94 | USE_TARGET_WEIGHT: true 95 | TYPE: 'KLDiscretLoss' 96 | TRAIN: 97 | BATCH_SIZE_PER_GPU: 32 98 | SHUFFLE: true 99 | BEGIN_EPOCH: 0 100 | END_EPOCH: 210 101 | OPTIMIZER: adam 102 | LR: 0.001 103 | LR_FACTOR: 0.1 104 | LR_STEP: 105 | - 170 106 | - 200 107 | WD: 0.0001 108 | GAMMA1: 0.99 109 | GAMMA2: 0.0 110 | MOMENTUM: 0.9 111 | NESTEROV: false 112 | TEST: 113 | BATCH_SIZE_PER_GPU: 32 114 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 115 | BBOX_THRE: 1.0 116 | IMAGE_THRE: 0.0 117 | IN_VIS_THRE: 0.01 118 | MODEL_FILE: '/data/pretrained/pose/simdr/pretrained_model/coco/sa_simdr/pose_hrnet_w48_256x192_split2_sigma4.pth' 119 | NMS_THRE: 1.0 120 | OKS_THRE: 0.9 121 | USE_GT_BBOX: true 122 | FLIP_TEST: true 123 | POST_PROCESS: false 124 | SHIFT_HEATMAP: true 125 | DEBUG: 126 | DEBUG: true 127 | SAVE_BATCH_IMAGES_GT: true 128 | SAVE_BATCH_IMAGES_PRED: true 129 | SAVE_HEATMAPS_GT: true 130 | SAVE_HEATMAPS_PRED: true 131 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/sa_simdr/w48_384x288_adam_lr1e-3_split1_5_sigma4.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '/data/dataset/COCO_2017' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | SIMDR_SPLIT_RATIO: 1.5 29 | HEAD_INPUT: 6912 30 | NUM_JOINTS: 17 31 | PRETRAINED: '/data/pretrained/imagenet/hrnet_w48-8ef0771d.pth' 32 | COORD_REPRESENTATION: 'sa-simdr' 33 | IMAGE_SIZE: 34 | - 288 35 | - 384 36 | HEATMAP_SIZE: 37 | - 288 38 | - 384 39 | SIGMA: 4 40 | EXTRA: 41 | PRETRAINED_LAYERS: 42 | - 'conv1' 43 | - 'bn1' 44 | - 'conv2' 45 | - 'bn2' 46 | - 'layer1' 47 | - 'transition1' 48 | - 'stage2' 49 | - 'transition2' 50 | - 'stage3' 51 | - 'transition3' 52 | - 'stage4' 53 | FINAL_CONV_KERNEL: 1 54 | STAGE2: 55 | NUM_MODULES: 1 56 | NUM_BRANCHES: 2 57 | BLOCK: BASIC 58 | NUM_BLOCKS: 59 | - 4 60 | - 4 61 | NUM_CHANNELS: 62 | - 48 63 | - 96 64 | FUSE_METHOD: SUM 65 | STAGE3: 66 | NUM_MODULES: 4 67 | NUM_BRANCHES: 3 68 | BLOCK: BASIC 69 | NUM_BLOCKS: 70 | - 4 71 | - 4 72 | - 4 73 | NUM_CHANNELS: 74 | - 48 75 | - 96 76 | - 192 77 | FUSE_METHOD: SUM 78 | STAGE4: 79 | NUM_MODULES: 3 80 | NUM_BRANCHES: 4 81 | BLOCK: BASIC 82 | NUM_BLOCKS: 83 | - 4 84 | - 4 85 | - 4 86 | - 4 87 | NUM_CHANNELS: 88 | - 48 89 | - 96 90 | - 192 91 | - 384 92 | FUSE_METHOD: SUM 93 | LOSS: 94 | USE_TARGET_WEIGHT: true 95 | TYPE: 'KLDiscretLoss' 96 | TRAIN: 97 | BATCH_SIZE_PER_GPU: 32 98 | SHUFFLE: true 99 | BEGIN_EPOCH: 0 100 | END_EPOCH: 210 101 | OPTIMIZER: adam 102 | LR: 0.001 103 | LR_FACTOR: 0.1 104 | LR_STEP: 105 | - 170 106 | - 200 107 | WD: 0.0001 108 | GAMMA1: 0.99 109 | GAMMA2: 0.0 110 | MOMENTUM: 0.9 111 | NESTEROV: false 112 | TEST: 113 | BATCH_SIZE_PER_GPU: 32 114 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 115 | BBOX_THRE: 1.0 116 | IMAGE_THRE: 0.0 117 | IN_VIS_THRE: 0.005 118 | MODEL_FILE: '/data/pretrained/pose/simdr/pretrained_model/coco/sa_simdr/pose_hrnet_w48_384x288_split1_5_sigma4.pth' 119 | NMS_THRE: 1.0 120 | OKS_THRE: 0.9 121 | USE_GT_BBOX: true 122 | FLIP_TEST: true 123 | POST_PROCESS: false 124 | SHIFT_HEATMAP: true 125 | DEBUG: 126 | DEBUG: true 127 | SAVE_BATCH_IMAGES_GT: true 128 | SAVE_BATCH_IMAGES_PRED: true 129 | SAVE_HEATMAPS_GT: true 130 | SAVE_HEATMAPS_PRED: true 131 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/simdr/nmt_w32_128x128_adam_lr1e-3_split2.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '/data/dataset/COCO_2017' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | SIMDR_SPLIT_RATIO: 2.0 29 | HEAD_INPUT: 1024 30 | NUM_JOINTS: 17 31 | PRETRAINED: '/data/pretrained/imagenet/hrnet_w32-36af842e.pth' 32 | COORD_REPRESENTATION: 'simdr' 33 | IMAGE_SIZE: 34 | - 128 35 | - 128 36 | HEATMAP_SIZE: 37 | - 128 38 | - 128 39 | SIGMA: 1 40 | EXTRA: 41 | PRETRAINED_LAYERS: 42 | - 'conv1' 43 | - 'bn1' 44 | - 'conv2' 45 | - 'bn2' 46 | - 'layer1' 47 | - 'transition1' 48 | - 'stage2' 49 | - 'transition2' 50 | - 'stage3' 51 | - 'transition3' 52 | - 'stage4' 53 | FINAL_CONV_KERNEL: 1 54 | STAGE2: 55 | NUM_MODULES: 1 56 | NUM_BRANCHES: 2 57 | BLOCK: BASIC 58 | NUM_BLOCKS: 59 | - 4 60 | - 4 61 | NUM_CHANNELS: 62 | - 32 63 | - 64 64 | FUSE_METHOD: SUM 65 | STAGE3: 66 | NUM_MODULES: 4 67 | NUM_BRANCHES: 3 68 | BLOCK: BASIC 69 | NUM_BLOCKS: 70 | - 4 71 | - 4 72 | - 4 73 | NUM_CHANNELS: 74 | - 32 75 | - 64 76 | - 128 77 | FUSE_METHOD: SUM 78 | STAGE4: 79 | NUM_MODULES: 3 80 | NUM_BRANCHES: 4 81 | BLOCK: BASIC 82 | NUM_BLOCKS: 83 | - 4 84 | - 4 85 | - 4 86 | - 4 87 | NUM_CHANNELS: 88 | - 32 89 | - 64 90 | - 128 91 | - 256 92 | FUSE_METHOD: SUM 93 | LOSS: 94 | USE_TARGET_WEIGHT: true 95 | TYPE: 'NMTCritierion' 96 | LABEL_SMOOTHING: 0.2 97 | TRAIN: 98 | BATCH_SIZE_PER_GPU: 32 99 | SHUFFLE: true 100 | BEGIN_EPOCH: 0 101 | END_EPOCH: 210 102 | OPTIMIZER: adam 103 | LR: 0.001 104 | LR_FACTOR: 0.1 105 | LR_STEP: 106 | - 170 107 | - 200 108 | WD: 0.0001 109 | GAMMA1: 0.99 110 | GAMMA2: 0.0 111 | MOMENTUM: 0.9 112 | NESTEROV: false 113 | TEST: 114 | BATCH_SIZE_PER_GPU: 32 115 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 116 | BBOX_THRE: 1.0 117 | IMAGE_THRE: 0.0 118 | IN_VIS_THRE: 0.02 119 | MODEL_FILE: '/data/pretrained/pose/simdr/pretrained_model/coco/simdr/pose_hrnet_w32_128x128.pth' 120 | NMS_THRE: 1.0 121 | OKS_THRE: 0.9 122 | USE_GT_BBOX: true 123 | FLIP_TEST: true 124 | POST_PROCESS: false 125 | SHIFT_HEATMAP: true 126 | DEBUG: 127 | DEBUG: true 128 | SAVE_BATCH_IMAGES_GT: true 129 | SAVE_BATCH_IMAGES_PRED: true 130 | SAVE_HEATMAPS_GT: true 131 | SAVE_HEATMAPS_PRED: true 132 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/simdr/nmt_w32_256x192_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '/data/dataset/COCO_2017' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | SIMDR_SPLIT_RATIO: 2.0 29 | HEAD_INPUT: 3072 30 | NUM_JOINTS: 17 31 | PRETRAINED: '/data/pretrained/imagenet/hrnet_w32-36af842e.pth' 32 | COORD_REPRESENTATION: 'simdr' 33 | IMAGE_SIZE: 34 | - 192 35 | - 256 36 | HEATMAP_SIZE: 37 | - 192 38 | - 256 39 | SIGMA: 2 40 | EXTRA: 41 | PRETRAINED_LAYERS: 42 | - 'conv1' 43 | - 'bn1' 44 | - 'conv2' 45 | - 'bn2' 46 | - 'layer1' 47 | - 'transition1' 48 | - 'stage2' 49 | - 'transition2' 50 | - 'stage3' 51 | - 'transition3' 52 | - 'stage4' 53 | FINAL_CONV_KERNEL: 1 54 | STAGE2: 55 | NUM_MODULES: 1 56 | NUM_BRANCHES: 2 57 | BLOCK: BASIC 58 | NUM_BLOCKS: 59 | - 4 60 | - 4 61 | NUM_CHANNELS: 62 | - 32 63 | - 64 64 | FUSE_METHOD: SUM 65 | STAGE3: 66 | NUM_MODULES: 4 67 | NUM_BRANCHES: 3 68 | BLOCK: BASIC 69 | NUM_BLOCKS: 70 | - 4 71 | - 4 72 | - 4 73 | NUM_CHANNELS: 74 | - 32 75 | - 64 76 | - 128 77 | FUSE_METHOD: SUM 78 | STAGE4: 79 | NUM_MODULES: 3 80 | NUM_BRANCHES: 4 81 | BLOCK: BASIC 82 | NUM_BLOCKS: 83 | - 4 84 | - 4 85 | - 4 86 | - 4 87 | NUM_CHANNELS: 88 | - 32 89 | - 64 90 | - 128 91 | - 256 92 | FUSE_METHOD: SUM 93 | LOSS: 94 | USE_TARGET_WEIGHT: true 95 | TYPE: 'NMTCritierion' 96 | TRAIN: 97 | BATCH_SIZE_PER_GPU: 32 98 | SHUFFLE: true 99 | BEGIN_EPOCH: 0 100 | END_EPOCH: 210 101 | OPTIMIZER: adam 102 | LR: 0.001 103 | LR_FACTOR: 0.1 104 | LR_STEP: 105 | - 170 106 | - 200 107 | WD: 0.0001 108 | GAMMA1: 0.99 109 | GAMMA2: 0.0 110 | MOMENTUM: 0.9 111 | NESTEROV: false 112 | TEST: 113 | BATCH_SIZE_PER_GPU: 32 114 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 115 | BBOX_THRE: 1.0 116 | IMAGE_THRE: 0.0 117 | IN_VIS_THRE: 0.01 118 | MODEL_FILE: '/data/pretrained/pose/simdr/pretrained_model/coco/simdr/pose_hrnet_w32_256x192.pth' 119 | NMS_THRE: 1.0 120 | OKS_THRE: 0.9 121 | USE_GT_BBOX: true 122 | FLIP_TEST: true 123 | POST_PROCESS: false 124 | SHIFT_HEATMAP: true 125 | DEBUG: 126 | DEBUG: true 127 | SAVE_BATCH_IMAGES_GT: true 128 | SAVE_BATCH_IMAGES_PRED: true 129 | SAVE_HEATMAPS_GT: true 130 | SAVE_HEATMAPS_PRED: true 131 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/simdr/nmt_w32_64x64_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '/data/dataset/COCO_2017' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | SIMDR_SPLIT_RATIO: 3.0 29 | HEAD_INPUT: 256 30 | NUM_JOINTS: 17 31 | PRETRAINED: '/data/pretrained/imagenet/hrnet_w32-36af842e.pth' 32 | COORD_REPRESENTATION: 'simdr' 33 | IMAGE_SIZE: 34 | - 64 35 | - 64 36 | HEATMAP_SIZE: 37 | - 64 38 | - 64 39 | SIGMA: 1 40 | EXTRA: 41 | PRETRAINED_LAYERS: 42 | - 'conv1' 43 | - 'bn1' 44 | - 'conv2' 45 | - 'bn2' 46 | - 'layer1' 47 | - 'transition1' 48 | - 'stage2' 49 | - 'transition2' 50 | - 'stage3' 51 | - 'transition3' 52 | - 'stage4' 53 | FINAL_CONV_KERNEL: 1 54 | STAGE2: 55 | NUM_MODULES: 1 56 | NUM_BRANCHES: 2 57 | BLOCK: BASIC 58 | NUM_BLOCKS: 59 | - 4 60 | - 4 61 | NUM_CHANNELS: 62 | - 32 63 | - 64 64 | FUSE_METHOD: SUM 65 | STAGE3: 66 | NUM_MODULES: 4 67 | NUM_BRANCHES: 3 68 | BLOCK: BASIC 69 | NUM_BLOCKS: 70 | - 4 71 | - 4 72 | - 4 73 | NUM_CHANNELS: 74 | - 32 75 | - 64 76 | - 128 77 | FUSE_METHOD: SUM 78 | STAGE4: 79 | NUM_MODULES: 3 80 | NUM_BRANCHES: 4 81 | BLOCK: BASIC 82 | NUM_BLOCKS: 83 | - 4 84 | - 4 85 | - 4 86 | - 4 87 | NUM_CHANNELS: 88 | - 32 89 | - 64 90 | - 128 91 | - 256 92 | FUSE_METHOD: SUM 93 | LOSS: 94 | USE_TARGET_WEIGHT: true 95 | TYPE: 'NMTCritierion' 96 | TRAIN: 97 | BATCH_SIZE_PER_GPU: 32 98 | SHUFFLE: true 99 | BEGIN_EPOCH: 0 100 | END_EPOCH: 210 101 | OPTIMIZER: adam 102 | LR: 0.001 103 | LR_FACTOR: 0.1 104 | LR_STEP: 105 | - 170 106 | - 200 107 | WD: 0.0001 108 | GAMMA1: 0.99 109 | GAMMA2: 0.0 110 | MOMENTUM: 0.9 111 | NESTEROV: false 112 | TEST: 113 | BATCH_SIZE_PER_GPU: 32 114 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 115 | BBOX_THRE: 1.0 116 | IMAGE_THRE: 0.0 117 | IN_VIS_THRE: 0.02 118 | MODEL_FILE: '/data/pretrained/pose/simdr/pretrained_model/coco/simdr/pose_hrnet_w32_64x64.pth' 119 | NMS_THRE: 1.0 120 | OKS_THRE: 0.9 121 | USE_GT_BBOX: true 122 | FLIP_TEST: true 123 | POST_PROCESS: false 124 | SHIFT_HEATMAP: true 125 | DEBUG: 126 | DEBUG: true 127 | SAVE_BATCH_IMAGES_GT: true 128 | SAVE_BATCH_IMAGES_PRED: true 129 | SAVE_HEATMAPS_GT: true 130 | SAVE_HEATMAPS_PRED: true 131 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/simdr/nmt_w48_128x128_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '/data/dataset/COCO_2017' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | SIMDR_SPLIT_RATIO: 2.0 29 | HEAD_INPUT: 1024 30 | NUM_JOINTS: 17 31 | PRETRAINED: '/data/pretrained/imagenet/hrnet_w48-8ef0771d.pth' 32 | COORD_REPRESENTATION: 'simdr' 33 | IMAGE_SIZE: 34 | - 128 35 | - 128 36 | HEATMAP_SIZE: 37 | - 128 38 | - 128 39 | SIGMA: 1 40 | EXTRA: 41 | PRETRAINED_LAYERS: 42 | - 'conv1' 43 | - 'bn1' 44 | - 'conv2' 45 | - 'bn2' 46 | - 'layer1' 47 | - 'transition1' 48 | - 'stage2' 49 | - 'transition2' 50 | - 'stage3' 51 | - 'transition3' 52 | - 'stage4' 53 | FINAL_CONV_KERNEL: 1 54 | STAGE2: 55 | NUM_MODULES: 1 56 | NUM_BRANCHES: 2 57 | BLOCK: BASIC 58 | NUM_BLOCKS: 59 | - 4 60 | - 4 61 | NUM_CHANNELS: 62 | - 48 63 | - 96 64 | FUSE_METHOD: SUM 65 | STAGE3: 66 | NUM_MODULES: 4 67 | NUM_BRANCHES: 3 68 | BLOCK: BASIC 69 | NUM_BLOCKS: 70 | - 4 71 | - 4 72 | - 4 73 | NUM_CHANNELS: 74 | - 48 75 | - 96 76 | - 192 77 | FUSE_METHOD: SUM 78 | STAGE4: 79 | NUM_MODULES: 3 80 | NUM_BRANCHES: 4 81 | BLOCK: BASIC 82 | NUM_BLOCKS: 83 | - 4 84 | - 4 85 | - 4 86 | - 4 87 | NUM_CHANNELS: 88 | - 48 89 | - 96 90 | - 192 91 | - 384 92 | FUSE_METHOD: SUM 93 | LOSS: 94 | USE_TARGET_WEIGHT: true 95 | TYPE: 'NMTCritierion' 96 | TRAIN: 97 | BATCH_SIZE_PER_GPU: 32 98 | SHUFFLE: true 99 | BEGIN_EPOCH: 0 100 | END_EPOCH: 210 101 | OPTIMIZER: adam 102 | LR: 0.001 103 | LR_FACTOR: 0.1 104 | LR_STEP: 105 | - 170 106 | - 200 107 | WD: 0.0001 108 | GAMMA1: 0.99 109 | GAMMA2: 0.0 110 | MOMENTUM: 0.9 111 | NESTEROV: false 112 | TEST: 113 | BATCH_SIZE_PER_GPU: 32 114 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 115 | BBOX_THRE: 1.0 116 | IMAGE_THRE: 0.0 117 | IN_VIS_THRE: 0.02 118 | MODEL_FILE: '/data/pretrained/pose/simdr/pretrained_model/coco/simdr/pose_hrnet_w48_128x128.pth' 119 | NMS_THRE: 1.0 120 | OKS_THRE: 0.9 121 | USE_GT_BBOX: true 122 | FLIP_TEST: true 123 | POST_PROCESS: false 124 | SHIFT_HEATMAP: true 125 | DEBUG: 126 | DEBUG: true 127 | SAVE_BATCH_IMAGES_GT: true 128 | SAVE_BATCH_IMAGES_PRED: true 129 | SAVE_HEATMAPS_GT: true 130 | SAVE_HEATMAPS_PRED: true 131 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/simdr/nmt_w48_256x192_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '/data/dataset/COCO_2017' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | SIMDR_SPLIT_RATIO: 2.0 29 | HEAD_INPUT: 3072 30 | NUM_JOINTS: 17 31 | PRETRAINED: '/data/pretrained/imagenet/hrnet_w48-8ef0771d.pth' 32 | COORD_REPRESENTATION: 'simdr' 33 | IMAGE_SIZE: 34 | - 192 35 | - 256 36 | HEATMAP_SIZE: 37 | - 192 38 | - 256 39 | SIGMA: 2 40 | EXTRA: 41 | PRETRAINED_LAYERS: 42 | - 'conv1' 43 | - 'bn1' 44 | - 'conv2' 45 | - 'bn2' 46 | - 'layer1' 47 | - 'transition1' 48 | - 'stage2' 49 | - 'transition2' 50 | - 'stage3' 51 | - 'transition3' 52 | - 'stage4' 53 | FINAL_CONV_KERNEL: 1 54 | STAGE2: 55 | NUM_MODULES: 1 56 | NUM_BRANCHES: 2 57 | BLOCK: BASIC 58 | NUM_BLOCKS: 59 | - 4 60 | - 4 61 | NUM_CHANNELS: 62 | - 48 63 | - 96 64 | FUSE_METHOD: SUM 65 | STAGE3: 66 | NUM_MODULES: 4 67 | NUM_BRANCHES: 3 68 | BLOCK: BASIC 69 | NUM_BLOCKS: 70 | - 4 71 | - 4 72 | - 4 73 | NUM_CHANNELS: 74 | - 48 75 | - 96 76 | - 192 77 | FUSE_METHOD: SUM 78 | STAGE4: 79 | NUM_MODULES: 3 80 | NUM_BRANCHES: 4 81 | BLOCK: BASIC 82 | NUM_BLOCKS: 83 | - 4 84 | - 4 85 | - 4 86 | - 4 87 | NUM_CHANNELS: 88 | - 48 89 | - 96 90 | - 192 91 | - 384 92 | FUSE_METHOD: SUM 93 | LOSS: 94 | USE_TARGET_WEIGHT: true 95 | TYPE: 'NMTCritierion' 96 | TRAIN: 97 | BATCH_SIZE_PER_GPU: 32 98 | SHUFFLE: true 99 | BEGIN_EPOCH: 0 100 | END_EPOCH: 210 101 | OPTIMIZER: adam 102 | LR: 0.001 103 | LR_FACTOR: 0.1 104 | LR_STEP: 105 | - 170 106 | - 200 107 | WD: 0.0001 108 | GAMMA1: 0.99 109 | GAMMA2: 0.0 110 | MOMENTUM: 0.9 111 | NESTEROV: false 112 | TEST: 113 | BATCH_SIZE_PER_GPU: 32 114 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 115 | BBOX_THRE: 1.0 116 | IMAGE_THRE: 0.0 117 | IN_VIS_THRE: 0.01 118 | MODEL_FILE: '/data/pretrained/pose/simdr/pretrained_model/coco/simdr/pose_hrnet_w48_256x192.pth' 119 | NMS_THRE: 1.0 120 | OKS_THRE: 0.9 121 | USE_GT_BBOX: true 122 | FLIP_TEST: true 123 | POST_PROCESS: false 124 | SHIFT_HEATMAP: true 125 | DEBUG: 126 | DEBUG: true 127 | SAVE_BATCH_IMAGES_GT: true 128 | SAVE_BATCH_IMAGES_PRED: true 129 | SAVE_HEATMAPS_GT: true 130 | SAVE_HEATMAPS_PRED: true 131 | -------------------------------------------------------------------------------- /experiments/coco/hrnet/simdr/nmt_w48_64x64_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: '/data/dataset/COCO_2017' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | SIMDR_SPLIT_RATIO: 3.0 29 | HEAD_INPUT: 256 30 | NUM_JOINTS: 17 31 | PRETRAINED: '/data/pretrained/imagenet/hrnet_w48-8ef0771d.pth' 32 | COORD_REPRESENTATION: 'simdr' 33 | IMAGE_SIZE: 34 | - 64 35 | - 64 36 | HEATMAP_SIZE: 37 | - 64 38 | - 64 39 | SIGMA: 1 40 | EXTRA: 41 | PRETRAINED_LAYERS: 42 | - 'conv1' 43 | - 'bn1' 44 | - 'conv2' 45 | - 'bn2' 46 | - 'layer1' 47 | - 'transition1' 48 | - 'stage2' 49 | - 'transition2' 50 | - 'stage3' 51 | - 'transition3' 52 | - 'stage4' 53 | FINAL_CONV_KERNEL: 1 54 | STAGE2: 55 | NUM_MODULES: 1 56 | NUM_BRANCHES: 2 57 | BLOCK: BASIC 58 | NUM_BLOCKS: 59 | - 4 60 | - 4 61 | NUM_CHANNELS: 62 | - 48 63 | - 96 64 | FUSE_METHOD: SUM 65 | STAGE3: 66 | NUM_MODULES: 4 67 | NUM_BRANCHES: 3 68 | BLOCK: BASIC 69 | NUM_BLOCKS: 70 | - 4 71 | - 4 72 | - 4 73 | NUM_CHANNELS: 74 | - 48 75 | - 96 76 | - 192 77 | FUSE_METHOD: SUM 78 | STAGE4: 79 | NUM_MODULES: 3 80 | NUM_BRANCHES: 4 81 | BLOCK: BASIC 82 | NUM_BLOCKS: 83 | - 4 84 | - 4 85 | - 4 86 | - 4 87 | NUM_CHANNELS: 88 | - 48 89 | - 96 90 | - 192 91 | - 384 92 | FUSE_METHOD: SUM 93 | LOSS: 94 | USE_TARGET_WEIGHT: true 95 | TYPE: 'NMTCritierion' 96 | TRAIN: 97 | BATCH_SIZE_PER_GPU: 32 98 | SHUFFLE: true 99 | BEGIN_EPOCH: 0 100 | END_EPOCH: 210 101 | OPTIMIZER: adam 102 | LR: 0.001 103 | LR_FACTOR: 0.1 104 | LR_STEP: 105 | - 170 106 | - 200 107 | WD: 0.0001 108 | GAMMA1: 0.99 109 | GAMMA2: 0.0 110 | MOMENTUM: 0.9 111 | NESTEROV: false 112 | TEST: 113 | BATCH_SIZE_PER_GPU: 32 114 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 115 | BBOX_THRE: 1.0 116 | IMAGE_THRE: 0.0 117 | IN_VIS_THRE: 0.02 118 | MODEL_FILE: '/data/pretrained/pose/simdr/pretrained_model/coco/simdr/pose_hrnet_w48_64x64.pth' 119 | NMS_THRE: 1.0 120 | OKS_THRE: 0.9 121 | USE_GT_BBOX: true 122 | FLIP_TEST: true 123 | POST_PROCESS: false 124 | SHIFT_HEATMAP: true 125 | DEBUG: 126 | DEBUG: true 127 | SAVE_BATCH_IMAGES_GT: true 128 | SAVE_BATCH_IMAGES_PRED: true 129 | SAVE_HEATMAPS_GT: true 130 | SAVE_HEATMAPS_PRED: true 131 | -------------------------------------------------------------------------------- /experiments/coco/resnet/heatmap/res101_128x128_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: '/data/pretrained/imagenet/resnet101-5d3b4d8f.pth' 25 | IMAGE_SIZE: 26 | - 128 27 | - 128 28 | HEATMAP_SIZE: 29 | - 32 30 | - 32 31 | SIGMA: 1 32 | NUM_JOINTS: 17 33 | COORD_REPRESENTATION: 'heatmap' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 101 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/coco/resnet/heatmap/res101_256x192_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth' 25 | IMAGE_SIZE: 26 | - 192 27 | - 256 28 | HEATMAP_SIZE: 29 | - 48 30 | - 64 31 | SIGMA: 2 32 | NUM_JOINTS: 17 33 | COORD_REPRESENTATION: 'heatmap' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 101 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/coco/resnet/heatmap/res101_384x288_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth' 25 | IMAGE_SIZE: 26 | - 288 27 | - 384 28 | HEATMAP_SIZE: 29 | - 72 30 | - 96 31 | SIGMA: 3 32 | NUM_JOINTS: 17 33 | COORD_REPRESENTATION: 'heatmap' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 101 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/coco/resnet/heatmap/res101_64x64_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: '/data/pretrained/imagenet/resnet101-5d3b4d8f.pth' 25 | IMAGE_SIZE: 26 | - 64 27 | - 64 28 | HEATMAP_SIZE: 29 | - 16 30 | - 16 31 | SIGMA: 1 32 | NUM_JOINTS: 17 33 | COORD_REPRESENTATION: 'heatmap' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 101 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/coco/resnet/heatmap/res152_128x128_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth' 25 | IMAGE_SIZE: 26 | - 128 27 | - 128 28 | HEATMAP_SIZE: 29 | - 32 30 | - 32 31 | SIGMA: 1 32 | NUM_JOINTS: 17 33 | COORD_REPRESENTATION: 'heatmap' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 152 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: /data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/coco/resnet/heatmap/res152_256x192_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth' 25 | IMAGE_SIZE: 26 | - 192 27 | - 256 28 | HEATMAP_SIZE: 29 | - 48 30 | - 64 31 | SIGMA: 2 32 | NUM_JOINTS: 17 33 | COORD_REPRESENTATION: 'heatmap' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 152 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: /data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/coco/resnet/heatmap/res152_384x288_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth' 25 | IMAGE_SIZE: 26 | - 288 27 | - 384 28 | HEATMAP_SIZE: 29 | - 72 30 | - 96 31 | SIGMA: 3 32 | NUM_JOINTS: 17 33 | COORD_REPRESENTATION: 'heatmap' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 152 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: /data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/coco/resnet/heatmap/res50_128x128_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: '/data/pretrained/imagenet/resnet50-19c8e357.pth' 25 | IMAGE_SIZE: 26 | - 128 27 | - 128 28 | HEATMAP_SIZE: 29 | - 32 30 | - 32 31 | SIGMA: 1 32 | NUM_JOINTS: 17 33 | COORD_REPRESENTATION: 'heatmap' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 50 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: /data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/coco/resnet/heatmap/res50_256x192_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: '/data/pretrained/imagenet/resnet50-19c8e357.pth' 25 | IMAGE_SIZE: 26 | - 192 27 | - 256 28 | HEATMAP_SIZE: 29 | - 48 30 | - 64 31 | SIGMA: 2 32 | NUM_JOINTS: 17 33 | COORD_REPRESENTATION: 'heatmap' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 50 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 64 67 | COCO_BBOX_FILE: /data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/coco/resnet/heatmap/res50_384x288_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: '/data/pretrained/imagenet/resnet50-19c8e357.pth' 25 | IMAGE_SIZE: 26 | - 288 27 | - 384 28 | HEATMAP_SIZE: 29 | - 72 30 | - 96 31 | SIGMA: 3 32 | NUM_JOINTS: 17 33 | COORD_REPRESENTATION: 'heatmap' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 50 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | # COCO_BBOX_FILE: '/data/dataset/COCO_2017/person_detection_results/COCO_test-dev2017_detections_AP_H_609_person.json' 68 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 69 | BBOX_THRE: 1.0 70 | IMAGE_THRE: 0.0 71 | IN_VIS_THRE: 0.2 72 | MODEL_FILE: '' 73 | NMS_THRE: 1.0 74 | OKS_THRE: 0.9 75 | FLIP_TEST: true 76 | POST_PROCESS: true 77 | SHIFT_HEATMAP: true 78 | USE_GT_BBOX: true 79 | DEBUG: 80 | DEBUG: true 81 | SAVE_BATCH_IMAGES_GT: true 82 | SAVE_BATCH_IMAGES_PRED: true 83 | SAVE_HEATMAPS_GT: true 84 | SAVE_HEATMAPS_PRED: true 85 | -------------------------------------------------------------------------------- /experiments/coco/resnet/heatmap/res50_64x64_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: '/data/pretrained/imagenet/resnet50-19c8e357.pth' 25 | IMAGE_SIZE: 26 | - 64 27 | - 64 28 | HEATMAP_SIZE: 29 | - 16 30 | - 16 31 | SIGMA: 1 32 | NUM_JOINTS: 17 33 | COORD_REPRESENTATION: 'heatmap' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 50 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: /data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/coco/resnet/sa_simdr/original/res50_384x288_d256x3_adam_lr1e-3_deconv3_split2_sigma6.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | SIMDR_SPLIT_RATIO: 2.0 25 | HEAD_INPUT: 6912 26 | PRETRAINED: '/data/pretrained/imagenet/resnet50-19c8e357.pth' 27 | IMAGE_SIZE: 28 | - 288 29 | - 384 30 | HEATMAP_SIZE: 31 | - 288 32 | - 384 33 | SIGMA: 6 34 | NUM_JOINTS: 17 35 | COORD_REPRESENTATION: 'sa-simdr' 36 | EXTRA: 37 | FINAL_CONV_KERNEL: 1 38 | DECONV_WITH_BIAS: false 39 | NUM_DECONV_LAYERS: 3 40 | NUM_DECONV_FILTERS: 41 | - 256 42 | - 256 43 | - 256 44 | NUM_DECONV_KERNELS: 45 | - 4 46 | - 4 47 | - 4 48 | NUM_LAYERS: 50 49 | LOSS: 50 | USE_TARGET_WEIGHT: true 51 | TYPE: 'KLDiscretLoss' 52 | TRAIN: 53 | BATCH_SIZE_PER_GPU: 32 54 | SHUFFLE: true 55 | BEGIN_EPOCH: 0 56 | END_EPOCH: 140 57 | OPTIMIZER: 'adam' 58 | LR: 0.001 59 | LR_FACTOR: 0.1 60 | LR_STEP: 61 | - 90 62 | - 120 63 | WD: 0.0001 64 | GAMMA1: 0.99 65 | GAMMA2: 0.0 66 | MOMENTUM: 0.9 67 | NESTEROV: false 68 | TEST: 69 | BATCH_SIZE_PER_GPU: 32 70 | COCO_BBOX_FILE: /data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json 71 | BBOX_THRE: 1.0 72 | IMAGE_THRE: 0.0 73 | IN_VIS_THRE: 0.005 74 | MODEL_FILE: '/data/pretrained/pose/simdr/pretrained_model/coco/sa_simdr/pose_resnet_50_384x288.pth' 75 | NMS_THRE: 1.0 76 | OKS_THRE: 0.9 77 | FLIP_TEST: true 78 | POST_PROCESS: false 79 | SHIFT_HEATMAP: true 80 | USE_GT_BBOX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /experiments/coco/resnet/simdr/original/nmt_res101_128x128_d256x3_adam_lr1e-3_split_2_ls2e1_deconv3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | SIMDR_SPLIT_RATIO: 2.0 25 | HEAD_INPUT: 1024 26 | PRETRAINED: '/data/pretrained/imagenet/resnet101-5d3b4d8f.pth' 27 | IMAGE_SIZE: 28 | - 128 29 | - 128 30 | HEATMAP_SIZE: 31 | - 128 32 | - 128 33 | SIGMA: 1 34 | NUM_JOINTS: 17 35 | COORD_REPRESENTATION: 'simdr' 36 | EXTRA: 37 | FINAL_CONV_KERNEL: 1 38 | DECONV_WITH_BIAS: false 39 | NUM_DECONV_LAYERS: 3 40 | NUM_DECONV_FILTERS: 41 | - 256 42 | - 256 43 | - 256 44 | NUM_DECONV_KERNELS: 45 | - 4 46 | - 4 47 | - 4 48 | NUM_LAYERS: 101 49 | LOSS: 50 | USE_TARGET_WEIGHT: true 51 | TYPE: 'NMTCritierion' 52 | LABEL_SMOOTHING: 0.2 53 | TRAIN: 54 | BATCH_SIZE_PER_GPU: 32 55 | SHUFFLE: true 56 | BEGIN_EPOCH: 0 57 | END_EPOCH: 140 58 | OPTIMIZER: 'adam' 59 | LR: 0.001 60 | LR_FACTOR: 0.1 61 | LR_STEP: 62 | - 90 63 | - 120 64 | WD: 0.0001 65 | GAMMA1: 0.99 66 | GAMMA2: 0.0 67 | MOMENTUM: 0.9 68 | NESTEROV: false 69 | TEST: 70 | BATCH_SIZE_PER_GPU: 32 71 | COCO_BBOX_FILE: /data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json 72 | # COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 73 | BBOX_THRE: 1.0 74 | IMAGE_THRE: 0.0 75 | IN_VIS_THRE: 0.02 76 | MODEL_FILE: '' 77 | NMS_THRE: 1.0 78 | OKS_THRE: 0.9 79 | FLIP_TEST: true 80 | POST_PROCESS: false 81 | SHIFT_HEATMAP: true 82 | USE_GT_BBOX: true 83 | DEBUG: 84 | DEBUG: true 85 | SAVE_BATCH_IMAGES_GT: true 86 | SAVE_BATCH_IMAGES_PRED: true 87 | SAVE_HEATMAPS_GT: true 88 | SAVE_HEATMAPS_PRED: true 89 | -------------------------------------------------------------------------------- /experiments/coco/resnet/simdr/original/nmt_res101_256x192_d256x3_adam_lr1e-3_split_2_ls4e1_deconv3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | SIMDR_SPLIT_RATIO: 2.0 25 | HEAD_INPUT: 3072 26 | PRETRAINED: '/data/pretrained/imagenet/resnet101-5d3b4d8f.pth' 27 | IMAGE_SIZE: 28 | - 192 29 | - 256 30 | HEATMAP_SIZE: 31 | - 192 32 | - 256 33 | SIGMA: 2 34 | NUM_JOINTS: 17 35 | COORD_REPRESENTATION: 'simdr' 36 | EXTRA: 37 | FINAL_CONV_KERNEL: 1 38 | DECONV_WITH_BIAS: false 39 | NUM_DECONV_LAYERS: 3 40 | NUM_DECONV_FILTERS: 41 | - 256 42 | - 256 43 | - 256 44 | NUM_DECONV_KERNELS: 45 | - 4 46 | - 4 47 | - 4 48 | NUM_LAYERS: 101 49 | LOSS: 50 | USE_TARGET_WEIGHT: true 51 | TYPE: 'NMTCritierion' 52 | LABEL_SMOOTHING: 0.4 53 | TRAIN: 54 | BATCH_SIZE_PER_GPU: 32 55 | SHUFFLE: true 56 | BEGIN_EPOCH: 0 57 | END_EPOCH: 140 58 | OPTIMIZER: 'adam' 59 | LR: 0.001 60 | LR_FACTOR: 0.1 61 | LR_STEP: 62 | - 90 63 | - 120 64 | WD: 0.0001 65 | GAMMA1: 0.99 66 | GAMMA2: 0.0 67 | MOMENTUM: 0.9 68 | NESTEROV: false 69 | TEST: 70 | BATCH_SIZE_PER_GPU: 32 71 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 72 | BBOX_THRE: 1.0 73 | IMAGE_THRE: 0.0 74 | IN_VIS_THRE: 0.01 75 | MODEL_FILE: '/data/pretrained/pose/simdr/pretrained_model/coco/simdr/pose_resnet_101_256x192.pth' 76 | NMS_THRE: 1.0 77 | OKS_THRE: 0.9 78 | FLIP_TEST: true 79 | POST_PROCESS: false 80 | SHIFT_HEATMAP: true 81 | USE_GT_BBOX: true 82 | DEBUG: 83 | DEBUG: true 84 | SAVE_BATCH_IMAGES_GT: true 85 | SAVE_BATCH_IMAGES_PRED: true 86 | SAVE_HEATMAPS_GT: true 87 | SAVE_HEATMAPS_PRED: true 88 | -------------------------------------------------------------------------------- /experiments/coco/resnet/simdr/original/nmt_res101_64x64_d256x3_adam_lr1e-3_split_3_deconv3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | SIMDR_SPLIT_RATIO: 3.0 25 | HEAD_INPUT: 256 26 | PRETRAINED: '/data/pretrained/imagenet/resnet101-5d3b4d8f.pth' 27 | IMAGE_SIZE: 28 | - 64 29 | - 64 30 | HEATMAP_SIZE: 31 | - 64 32 | - 64 33 | SIGMA: 1 34 | NUM_JOINTS: 17 35 | COORD_REPRESENTATION: 'simdr' 36 | EXTRA: 37 | FINAL_CONV_KERNEL: 1 38 | DECONV_WITH_BIAS: false 39 | NUM_DECONV_LAYERS: 3 40 | NUM_DECONV_FILTERS: 41 | - 256 42 | - 256 43 | - 256 44 | NUM_DECONV_KERNELS: 45 | - 4 46 | - 4 47 | - 4 48 | NUM_LAYERS: 101 49 | LOSS: 50 | USE_TARGET_WEIGHT: true 51 | TYPE: 'NMTCritierion' 52 | TRAIN: 53 | BATCH_SIZE_PER_GPU: 32 54 | SHUFFLE: true 55 | BEGIN_EPOCH: 0 56 | END_EPOCH: 140 57 | OPTIMIZER: 'adam' 58 | LR: 0.001 59 | LR_FACTOR: 0.1 60 | LR_STEP: 61 | - 90 62 | - 120 63 | WD: 0.0001 64 | GAMMA1: 0.99 65 | GAMMA2: 0.0 66 | MOMENTUM: 0.9 67 | NESTEROV: false 68 | TEST: 69 | BATCH_SIZE_PER_GPU: 32 70 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 71 | # COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 72 | BBOX_THRE: 1.0 73 | IMAGE_THRE: 0.0 74 | IN_VIS_THRE: 0.02 75 | MODEL_FILE: '' 76 | NMS_THRE: 1.0 77 | OKS_THRE: 0.9 78 | FLIP_TEST: true 79 | POST_PROCESS: false 80 | SHIFT_HEATMAP: true 81 | USE_GT_BBOX: true 82 | DEBUG: 83 | DEBUG: true 84 | SAVE_BATCH_IMAGES_GT: true 85 | SAVE_BATCH_IMAGES_PRED: true 86 | SAVE_HEATMAPS_GT: true 87 | SAVE_HEATMAPS_PRED: true 88 | -------------------------------------------------------------------------------- /experiments/coco/resnet/simdr/original/nmt_res50_128x128_d256x3_adam_lr1e-3_split_3_deconv3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | SIMDR_SPLIT_RATIO: 3.0 25 | HEAD_INPUT: 1024 26 | PRETRAINED: '/data/pretrained/imagenet/resnet50-19c8e357.pth' 27 | IMAGE_SIZE: 28 | - 128 29 | - 128 30 | HEATMAP_SIZE: 31 | - 1 32 | - 128 33 | SIGMA: 1 34 | NUM_JOINTS: 17 35 | COORD_REPRESENTATION: 'simdr' 36 | EXTRA: 37 | FINAL_CONV_KERNEL: 1 38 | DECONV_WITH_BIAS: false 39 | NUM_DECONV_LAYERS: 3 40 | NUM_DECONV_FILTERS: 41 | - 256 42 | - 256 43 | - 256 44 | NUM_DECONV_KERNELS: 45 | - 4 46 | - 4 47 | - 4 48 | NUM_LAYERS: 50 49 | LOSS: 50 | USE_TARGET_WEIGHT: true 51 | TYPE: 'NMTCritierion' 52 | TRAIN: 53 | BATCH_SIZE_PER_GPU: 32 54 | SHUFFLE: true 55 | BEGIN_EPOCH: 0 56 | END_EPOCH: 140 57 | OPTIMIZER: 'adam' 58 | LR: 0.001 59 | LR_FACTOR: 0.1 60 | LR_STEP: 61 | - 90 62 | - 120 63 | WD: 0.0001 64 | GAMMA1: 0.99 65 | GAMMA2: 0.0 66 | MOMENTUM: 0.9 67 | NESTEROV: false 68 | TEST: 69 | BATCH_SIZE_PER_GPU: 32 70 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 71 | BBOX_THRE: 1.0 72 | IMAGE_THRE: 0.0 73 | IN_VIS_THRE: 0.02 74 | MODEL_FILE: '' 75 | NMS_THRE: 1.0 76 | OKS_THRE: 0.9 77 | FLIP_TEST: true 78 | POST_PROCESS: false 79 | SHIFT_HEATMAP: true 80 | USE_GT_BBOX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /experiments/coco/resnet/simdr/original/nmt_res50_256x192_d256x3_adam_lr1e-3_deconv3_split2.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | SIMDR_SPLIT_RATIO: 2.0 25 | HEAD_INPUT: 3072 26 | PRETRAINED: '/data/pretrained/imagenet/resnet50-19c8e357.pth' 27 | IMAGE_SIZE: 28 | - 192 29 | - 256 30 | HEATMAP_SIZE: 31 | - 192 32 | - 256 33 | SIGMA: 2 34 | NUM_JOINTS: 17 35 | COORD_REPRESENTATION: 'simdr' 36 | EXTRA: 37 | FINAL_CONV_KERNEL: 1 38 | DECONV_WITH_BIAS: false 39 | NUM_DECONV_LAYERS: 3 40 | NUM_DECONV_FILTERS: 41 | - 256 42 | - 256 43 | - 256 44 | NUM_DECONV_KERNELS: 45 | - 4 46 | - 4 47 | - 4 48 | NUM_LAYERS: 50 49 | LOSS: 50 | USE_TARGET_WEIGHT: true 51 | TYPE: 'NMTCritierion' 52 | TRAIN: 53 | BATCH_SIZE_PER_GPU: 32 54 | SHUFFLE: true 55 | BEGIN_EPOCH: 0 56 | END_EPOCH: 140 57 | OPTIMIZER: 'adam' 58 | LR: 0.001 59 | LR_FACTOR: 0.1 60 | LR_STEP: 61 | - 90 62 | - 120 63 | WD: 0.0001 64 | GAMMA1: 0.99 65 | GAMMA2: 0.0 66 | MOMENTUM: 0.9 67 | NESTEROV: false 68 | TEST: 69 | BATCH_SIZE_PER_GPU: 32 70 | COCO_BBOX_FILE: /data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json 71 | BBOX_THRE: 1.0 72 | IMAGE_THRE: 0.0 73 | IN_VIS_THRE: 0.01 74 | MODEL_FILE: '/data/pretrained/pose/simdr/pretrained_model/coco/simdr/pose_resnet_50_256x192.pth' 75 | NMS_THRE: 1.0 76 | OKS_THRE: 0.9 77 | FLIP_TEST: true 78 | POST_PROCESS: false 79 | SHIFT_HEATMAP: true 80 | USE_GT_BBOX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /experiments/coco/resnet/simdr/original/nmt_res50_384x288_d256x3_adam_lr1e-3_deconv3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | SIMDR_SPLIT_RATIO: 2.0 25 | HEAD_INPUT: 6912 26 | PRETRAINED: '/data/pretrained/imagenet/resnet50-19c8e357.pth' 27 | IMAGE_SIZE: 28 | - 288 29 | - 384 30 | HEATMAP_SIZE: 31 | - 288 32 | - 384 33 | SIGMA: 3 34 | NUM_JOINTS: 17 35 | COORD_REPRESENTATION: 'simdr' 36 | EXTRA: 37 | FINAL_CONV_KERNEL: 1 38 | DECONV_WITH_BIAS: false 39 | NUM_DECONV_LAYERS: 3 40 | NUM_DECONV_FILTERS: 41 | - 256 42 | - 256 43 | - 256 44 | NUM_DECONV_KERNELS: 45 | - 4 46 | - 4 47 | - 4 48 | NUM_LAYERS: 50 49 | LOSS: 50 | USE_TARGET_WEIGHT: true 51 | TYPE: 'NMTCritierion' 52 | TRAIN: 53 | BATCH_SIZE_PER_GPU: 32 54 | SHUFFLE: true 55 | BEGIN_EPOCH: 0 56 | END_EPOCH: 140 57 | OPTIMIZER: 'adam' 58 | LR: 0.001 59 | LR_FACTOR: 0.1 60 | LR_STEP: 61 | - 90 62 | - 120 63 | WD: 0.0001 64 | GAMMA1: 0.99 65 | GAMMA2: 0.0 66 | MOMENTUM: 0.9 67 | NESTEROV: false 68 | TEST: 69 | BATCH_SIZE_PER_GPU: 32 70 | COCO_BBOX_FILE: /data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json 71 | BBOX_THRE: 1.0 72 | IMAGE_THRE: 0.0 73 | IN_VIS_THRE: 0.01 74 | MODEL_FILE: '/data/pretrained/pose/simdr/pretrained_model/coco/simdr/pose_resnet_50_384x288.pth' 75 | NMS_THRE: 1.0 76 | OKS_THRE: 0.9 77 | FLIP_TEST: true 78 | POST_PROCESS: false 79 | SHIFT_HEATMAP: true 80 | USE_GT_BBOX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /experiments/coco/resnet/simdr/original/nmt_res50_64x64_d256x3_adam_lr1e-3_split_3_deconv3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | SIMDR_SPLIT_RATIO: 3 25 | HEAD_INPUT: 256 26 | PRETRAINED: '/data/pretrained/imagenet/resnet50-19c8e357.pth' 27 | IMAGE_SIZE: 28 | - 64 29 | - 64 30 | HEATMAP_SIZE: 31 | - 1 32 | - 64 33 | SIGMA: 1 34 | NUM_JOINTS: 17 35 | COORD_REPRESENTATION: 'simdr' 36 | EXTRA: 37 | FINAL_CONV_KERNEL: 1 38 | DECONV_WITH_BIAS: false 39 | NUM_DECONV_LAYERS: 3 40 | NUM_DECONV_FILTERS: 41 | - 256 42 | - 256 43 | - 256 44 | NUM_DECONV_KERNELS: 45 | - 4 46 | - 4 47 | - 4 48 | NUM_LAYERS: 50 49 | LOSS: 50 | USE_TARGET_WEIGHT: true 51 | TYPE: 'NMTCritierion' 52 | TRAIN: 53 | BATCH_SIZE_PER_GPU: 32 54 | SHUFFLE: true 55 | BEGIN_EPOCH: 0 56 | END_EPOCH: 140 57 | OPTIMIZER: 'adam' 58 | LR: 0.001 59 | LR_FACTOR: 0.1 60 | LR_STEP: 61 | - 90 62 | - 120 63 | WD: 0.0001 64 | GAMMA1: 0.99 65 | GAMMA2: 0.0 66 | MOMENTUM: 0.9 67 | NESTEROV: false 68 | TEST: 69 | BATCH_SIZE_PER_GPU: 32 70 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 71 | BBOX_THRE: 1.0 72 | IMAGE_THRE: 0.0 73 | IN_VIS_THRE: 0.02 74 | MODEL_FILE: '' 75 | NMS_THRE: 1.0 76 | OKS_THRE: 0.9 77 | FLIP_TEST: true 78 | POST_PROCESS: false 79 | SHIFT_HEATMAP: true 80 | USE_GT_BBOX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /experiments/coco/resnet/simdr/original/norm_nmt_res101_384x288_d256x3_adam_lr1e-3_split_2_ls1e1_deconv3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | SIMDR_SPLIT_RATIO: 2.0 25 | HEAD_INPUT: 6912 26 | PRETRAINED: '/data/pretrained/imagenet/resnet101-5d3b4d8f.pth' 27 | IMAGE_SIZE: 28 | - 288 29 | - 384 30 | HEATMAP_SIZE: 31 | - 288 32 | - 384 33 | SIGMA: 3 34 | NUM_JOINTS: 17 35 | COORD_REPRESENTATION: 'simdr' 36 | EXTRA: 37 | FINAL_CONV_KERNEL: 1 38 | DECONV_WITH_BIAS: false 39 | NUM_DECONV_LAYERS: 3 40 | NUM_DECONV_FILTERS: 41 | - 256 42 | - 256 43 | - 256 44 | NUM_DECONV_KERNELS: 45 | - 4 46 | - 4 47 | - 4 48 | NUM_LAYERS: 101 49 | LOSS: 50 | USE_TARGET_WEIGHT: true 51 | TYPE: 'NMTNORMCritierion' 52 | TRAIN: 53 | BATCH_SIZE_PER_GPU: 32 54 | SHUFFLE: true 55 | BEGIN_EPOCH: 0 56 | END_EPOCH: 140 57 | OPTIMIZER: 'adam' 58 | LR: 0.001 59 | LR_FACTOR: 0.1 60 | LR_STEP: 61 | - 90 62 | - 120 63 | WD: 0.0001 64 | GAMMA1: 0.99 65 | GAMMA2: 0.0 66 | MOMENTUM: 0.9 67 | NESTEROV: false 68 | TEST: 69 | BATCH_SIZE_PER_GPU: 32 70 | COCO_BBOX_FILE: /data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json 71 | BBOX_THRE: 1.0 72 | IMAGE_THRE: 0.0 73 | IN_VIS_THRE: 0.01 74 | MODEL_FILE: '/data/pretrained/pose/simdr/pretrained_model/coco/simdr/pose_resnet_101_384x288.pth' 75 | NMS_THRE: 1.0 76 | OKS_THRE: 0.9 77 | FLIP_TEST: true 78 | POST_PROCESS: false 79 | SHIFT_HEATMAP: true 80 | USE_GT_BBOX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /experiments/coco/resnet/simdr/original/norm_nmt_res152_384x288_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | SIMDR_SPLIT_RATIO: 2.0 25 | HEAD_INPUT: 6912 26 | PRETRAINED: '/data/pretrained/imagenet/resnet152-b121ed2d.pth' 27 | IMAGE_SIZE: 28 | - 288 29 | - 384 30 | HEATMAP_SIZE: 31 | - 288 32 | - 384 33 | SIGMA: 3 34 | NUM_JOINTS: 17 35 | COORD_REPRESENTATION: 'simdr' 36 | EXTRA: 37 | FINAL_CONV_KERNEL: 1 38 | DECONV_WITH_BIAS: false 39 | NUM_DECONV_LAYERS: 3 40 | NUM_DECONV_FILTERS: 41 | - 256 42 | - 256 43 | - 256 44 | NUM_DECONV_KERNELS: 45 | - 4 46 | - 4 47 | - 4 48 | NUM_LAYERS: 152 49 | LOSS: 50 | USE_TARGET_WEIGHT: true 51 | TYPE: 'NMTNORMCritierion' 52 | TRAIN: 53 | BATCH_SIZE_PER_GPU: 32 54 | SHUFFLE: true 55 | BEGIN_EPOCH: 0 56 | END_EPOCH: 140 57 | OPTIMIZER: 'adam' 58 | LR: 0.001 59 | LR_FACTOR: 0.1 60 | LR_STEP: 61 | - 90 62 | - 120 63 | WD: 0.0001 64 | GAMMA1: 0.99 65 | GAMMA2: 0.0 66 | MOMENTUM: 0.9 67 | NESTEROV: false 68 | TEST: 69 | BATCH_SIZE_PER_GPU: 64 70 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 71 | BBOX_THRE: 1.0 72 | IMAGE_THRE: 0.0 73 | IN_VIS_THRE: 0.01 74 | MODEL_FILE: '/data/pretrained/pose/simdr/pretrained_model/coco/simdr/pose_resnet_152_384x288.pth' 75 | NMS_THRE: 1.0 76 | OKS_THRE: 0.9 77 | FLIP_TEST: true 78 | POST_PROCESS: false 79 | SHIFT_HEATMAP: true 80 | USE_GT_BBOX: false 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /experiments/coco/resnet/simdr/upsample_free/nmt_res50_128x128_d256x3_adam_lr1e-3_split_3_cpj32.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet_upfree' 24 | SIMDR_SPLIT_RATIO: 3.0 25 | HEAD_INPUT: 512 26 | PRETRAINED: '/data/pretrained/imagenet/resnet50-19c8e357.pth' 27 | IMAGE_SIZE: 28 | - 128 29 | - 128 30 | HEATMAP_SIZE: 31 | - 128 32 | - 128 33 | SIGMA: 1 34 | NUM_JOINTS: 17 35 | COORD_REPRESENTATION: 'simdr' 36 | EXTRA: 37 | FINAL_CONV_KERNEL: 1 38 | CHANNEL_PER_JOINT: 32 39 | NUM_LAYERS: 50 40 | LOSS: 41 | USE_TARGET_WEIGHT: true 42 | TYPE: 'NMTCritierion' 43 | TRAIN: 44 | BATCH_SIZE_PER_GPU: 32 45 | SHUFFLE: true 46 | BEGIN_EPOCH: 0 47 | END_EPOCH: 140 48 | OPTIMIZER: 'adam' 49 | LR: 0.001 50 | LR_FACTOR: 0.1 51 | LR_STEP: 52 | - 90 53 | - 120 54 | WD: 0.0001 55 | GAMMA1: 0.99 56 | GAMMA2: 0.0 57 | MOMENTUM: 0.9 58 | NESTEROV: false 59 | TEST: 60 | BATCH_SIZE_PER_GPU: 32 61 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 62 | BBOX_THRE: 1.0 63 | IMAGE_THRE: 0.0 64 | IN_VIS_THRE: 0.02 65 | MODEL_FILE: '/data/pretrained/pose/simdr/pretrained_model/coco/simdr/pose_upfree_resnet_50_128x128_cpj32.pth' 66 | NMS_THRE: 1.0 67 | OKS_THRE: 0.9 68 | FLIP_TEST: true 69 | POST_PROCESS: false 70 | SHIFT_HEATMAP: true 71 | USE_GT_BBOX: true 72 | DEBUG: 73 | DEBUG: true 74 | SAVE_BATCH_IMAGES_GT: true 75 | SAVE_BATCH_IMAGES_PRED: true 76 | SAVE_HEATMAPS_GT: true 77 | SAVE_HEATMAPS_PRED: true 78 | -------------------------------------------------------------------------------- /experiments/coco/resnet/simdr/upsample_free/nmt_res50_256x192_d256x3_adam_lr1e-3_split_2_cpj28.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet_upfree' 24 | SIMDR_SPLIT_RATIO: 2.0 25 | HEAD_INPUT: 1344 26 | PRETRAINED: '/data/pretrained/imagenet/resnet50-19c8e357.pth' 27 | IMAGE_SIZE: 28 | - 192 29 | - 256 30 | HEATMAP_SIZE: 31 | - 192 32 | - 256 33 | SIGMA: 1 34 | NUM_JOINTS: 17 35 | COORD_REPRESENTATION: 'simdr' 36 | EXTRA: 37 | FINAL_CONV_KERNEL: 1 38 | CHANNEL_PER_JOINT: 28 39 | NUM_LAYERS: 50 40 | LOSS: 41 | USE_TARGET_WEIGHT: true 42 | TYPE: 'NMTCritierion' 43 | TRAIN: 44 | BATCH_SIZE_PER_GPU: 32 45 | SHUFFLE: true 46 | BEGIN_EPOCH: 0 47 | END_EPOCH: 140 48 | OPTIMIZER: 'adam' 49 | LR: 0.001 50 | LR_FACTOR: 0.1 51 | LR_STEP: 52 | - 90 53 | - 120 54 | WD: 0.0001 55 | GAMMA1: 0.99 56 | GAMMA2: 0.0 57 | MOMENTUM: 0.9 58 | NESTEROV: false 59 | TEST: 60 | BATCH_SIZE_PER_GPU: 32 61 | COCO_BBOX_FILE: /data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json 62 | BBOX_THRE: 1.0 63 | IMAGE_THRE: 0.0 64 | IN_VIS_THRE: 0.01 65 | MODEL_FILE: '/data/pretrained/pose/simdr/pretrained_model/coco/simdr/pose_upfree_resnet_50_256x192_cpj28.pth' 66 | NMS_THRE: 1.0 67 | OKS_THRE: 0.9 68 | FLIP_TEST: true 69 | POST_PROCESS: false 70 | SHIFT_HEATMAP: true 71 | USE_GT_BBOX: true 72 | DEBUG: 73 | DEBUG: true 74 | SAVE_BATCH_IMAGES_GT: true 75 | SAVE_BATCH_IMAGES_PRED: true 76 | SAVE_HEATMAPS_GT: true 77 | SAVE_HEATMAPS_PRED: true 78 | -------------------------------------------------------------------------------- /experiments/coco/resnet/simdr/upsample_free/nmt_res50_64x64_d256x3_adam_lr1e-3_split_3_cpj32.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: '/data/dataset/COCO_2017' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet_upfree' 24 | SIMDR_SPLIT_RATIO: 3.0 25 | HEAD_INPUT: 128 26 | PRETRAINED: '/data/pretrained/imagenet/resnet50-19c8e357.pth' 27 | IMAGE_SIZE: 28 | - 64 29 | - 64 30 | HEATMAP_SIZE: 31 | - 64 32 | - 64 33 | SIGMA: 1 34 | NUM_JOINTS: 17 35 | COORD_REPRESENTATION: 'simdr' 36 | EXTRA: 37 | FINAL_CONV_KERNEL: 1 38 | CHANNEL_PER_JOINT: 32 39 | NUM_LAYERS: 50 40 | LOSS: 41 | USE_TARGET_WEIGHT: true 42 | TYPE: 'NMTCritierion' 43 | TRAIN: 44 | BATCH_SIZE_PER_GPU: 32 45 | SHUFFLE: true 46 | BEGIN_EPOCH: 0 47 | END_EPOCH: 140 48 | OPTIMIZER: 'adam' 49 | LR: 0.001 50 | LR_FACTOR: 0.1 51 | LR_STEP: 52 | - 90 53 | - 120 54 | WD: 0.0001 55 | GAMMA1: 0.99 56 | GAMMA2: 0.0 57 | MOMENTUM: 0.9 58 | NESTEROV: false 59 | TEST: 60 | BATCH_SIZE_PER_GPU: 32 61 | COCO_BBOX_FILE: '/data/dataset/COCO_2017/COCO_val2017_detections_AP_H_56_person.json' 62 | BBOX_THRE: 1.0 63 | IMAGE_THRE: 0.0 64 | IN_VIS_THRE: 0.02 65 | MODEL_FILE: '/data/pretrained/pose/simdr/pretrained_model/coco/simdr/pose_upfree_resnet_50_64x64_cpj32.pth' 66 | NMS_THRE: 1.0 67 | OKS_THRE: 0.9 68 | FLIP_TEST: true 69 | POST_PROCESS: false 70 | SHIFT_HEATMAP: true 71 | USE_GT_BBOX: true 72 | DEBUG: 73 | DEBUG: true 74 | SAVE_BATCH_IMAGES_GT: true 75 | SAVE_BATCH_IMAGES_PRED: true 76 | SAVE_HEATMAPS_GT: true 77 | SAVE_HEATMAPS_PRED: true 78 | -------------------------------------------------------------------------------- /experiments/mpii/hrnet/heatmap/w32_256x256_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: '/data/dataset/MPIHP' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 16 29 | PRETRAINED: '/data/pretrained/imagenet/hrnet_w32-36af842e.pth' 30 | COORD_REPRESENTATION: 'heatmap' 31 | IMAGE_SIZE: 32 | - 256 33 | - 256 34 | HEATMAP_SIZE: 35 | - 64 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | MODEL_FILE: '/data/pretrained/pose/hrnet/pose_hrnet_w32_256x256.pth' 112 | FLIP_TEST: true 113 | POST_PROCESS: true 114 | SHIFT_HEATMAP: true 115 | PCKH_THRE: 0.5 116 | DEBUG: 117 | DEBUG: true 118 | SAVE_BATCH_IMAGES_GT: true 119 | SAVE_BATCH_IMAGES_PRED: true 120 | SAVE_HEATMAPS_GT: true 121 | SAVE_HEATMAPS_PRED: true 122 | -------------------------------------------------------------------------------- /experiments/mpii/hrnet/heatmap/w32_64x64_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: '/data/dataset/MPIHP' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 16 29 | PRETRAINED: '/data/pretrained/imagenet/hrnet_w32-36af842e.pth' 30 | COORD_REPRESENTATION: 'heatmap' 31 | IMAGE_SIZE: 32 | - 64 33 | - 64 34 | HEATMAP_SIZE: 35 | - 16 36 | - 16 37 | SIGMA: 1 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | MODEL_FILE: '' 112 | FLIP_TEST: true 113 | POST_PROCESS: true 114 | SHIFT_HEATMAP: true 115 | PCKH_THRE: 0.5 116 | DEBUG: 117 | DEBUG: true 118 | SAVE_BATCH_IMAGES_GT: true 119 | SAVE_BATCH_IMAGES_PRED: true 120 | SAVE_HEATMAPS_GT: true 121 | SAVE_HEATMAPS_PRED: true 122 | -------------------------------------------------------------------------------- /experiments/mpii/hrnet/sa_simdr/w32_256x256_adam_lr1e-3_split2_sigma6.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: '/data/dataset/MPIHP' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | SIMDR_SPLIT_RATIO: 2.0 29 | HEAD_INPUT: 4096 30 | NUM_JOINTS: 16 31 | PRETRAINED: '/data/pretrained/imagenet/hrnet_w32-36af842e.pth' 32 | COORD_REPRESENTATION: 'sa-simdr' 33 | IMAGE_SIZE: 34 | - 256 35 | - 256 36 | HEATMAP_SIZE: 37 | - 256 38 | - 256 39 | SIGMA: 6 40 | EXTRA: 41 | PRETRAINED_LAYERS: 42 | - 'conv1' 43 | - 'bn1' 44 | - 'conv2' 45 | - 'bn2' 46 | - 'layer1' 47 | - 'transition1' 48 | - 'stage2' 49 | - 'transition2' 50 | - 'stage3' 51 | - 'transition3' 52 | - 'stage4' 53 | FINAL_CONV_KERNEL: 1 54 | STAGE2: 55 | NUM_MODULES: 1 56 | NUM_BRANCHES: 2 57 | BLOCK: BASIC 58 | NUM_BLOCKS: 59 | - 4 60 | - 4 61 | NUM_CHANNELS: 62 | - 32 63 | - 64 64 | FUSE_METHOD: SUM 65 | STAGE3: 66 | NUM_MODULES: 4 67 | NUM_BRANCHES: 3 68 | BLOCK: BASIC 69 | NUM_BLOCKS: 70 | - 4 71 | - 4 72 | - 4 73 | NUM_CHANNELS: 74 | - 32 75 | - 64 76 | - 128 77 | FUSE_METHOD: SUM 78 | STAGE4: 79 | NUM_MODULES: 3 80 | NUM_BRANCHES: 4 81 | BLOCK: BASIC 82 | NUM_BLOCKS: 83 | - 4 84 | - 4 85 | - 4 86 | - 4 87 | NUM_CHANNELS: 88 | - 32 89 | - 64 90 | - 128 91 | - 256 92 | FUSE_METHOD: SUM 93 | LOSS: 94 | USE_TARGET_WEIGHT: true 95 | TYPE: 'KLDiscretLoss' 96 | TRAIN: 97 | BATCH_SIZE_PER_GPU: 32 98 | SHUFFLE: true 99 | BEGIN_EPOCH: 0 100 | END_EPOCH: 210 101 | OPTIMIZER: adam 102 | LR: 0.001 103 | LR_FACTOR: 0.1 104 | LR_STEP: 105 | - 170 106 | - 200 107 | WD: 0.0001 108 | GAMMA1: 0.99 109 | GAMMA2: 0.0 110 | MOMENTUM: 0.9 111 | NESTEROV: false 112 | TEST: 113 | BATCH_SIZE_PER_GPU: 32 114 | MODEL_FILE: '/data/pretrained/pose/simdr/pretrained_model/mpii/sa_simdr/pose_hrnet_w32_256x256_sa_simdr_split2_sigma6.pth' 115 | FLIP_TEST: true 116 | POST_PROCESS: false 117 | SHIFT_HEATMAP: true 118 | PCKH_THRE: 0.5 119 | DEBUG: 120 | DEBUG: true 121 | SAVE_BATCH_IMAGES_GT: true 122 | SAVE_BATCH_IMAGES_PRED: true 123 | SAVE_HEATMAPS_GT: true 124 | SAVE_HEATMAPS_PRED: true 125 | -------------------------------------------------------------------------------- /experiments/mpii/hrnet/simdr/norm_w32_256x256_adam_lr1e-3_ls2e1.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: '/data/dataset/MPIHP' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | SIMDR_SPLIT_RATIO: 2.0 29 | HEAD_INPUT: 4096 30 | NUM_JOINTS: 16 31 | PRETRAINED: '/data/pretrained/imagenet/hrnet_w32-36af842e.pth' 32 | COORD_REPRESENTATION: 'simdr' 33 | IMAGE_SIZE: 34 | - 256 35 | - 256 36 | HEATMAP_SIZE: 37 | - 256 38 | - 256 39 | SIGMA: 2 40 | EXTRA: 41 | PRETRAINED_LAYERS: 42 | - 'conv1' 43 | - 'bn1' 44 | - 'conv2' 45 | - 'bn2' 46 | - 'layer1' 47 | - 'transition1' 48 | - 'stage2' 49 | - 'transition2' 50 | - 'stage3' 51 | - 'transition3' 52 | - 'stage4' 53 | FINAL_CONV_KERNEL: 1 54 | STAGE2: 55 | NUM_MODULES: 1 56 | NUM_BRANCHES: 2 57 | BLOCK: BASIC 58 | NUM_BLOCKS: 59 | - 4 60 | - 4 61 | NUM_CHANNELS: 62 | - 32 63 | - 64 64 | FUSE_METHOD: SUM 65 | STAGE3: 66 | NUM_MODULES: 4 67 | NUM_BRANCHES: 3 68 | BLOCK: BASIC 69 | NUM_BLOCKS: 70 | - 4 71 | - 4 72 | - 4 73 | NUM_CHANNELS: 74 | - 32 75 | - 64 76 | - 128 77 | FUSE_METHOD: SUM 78 | STAGE4: 79 | NUM_MODULES: 3 80 | NUM_BRANCHES: 4 81 | BLOCK: BASIC 82 | NUM_BLOCKS: 83 | - 4 84 | - 4 85 | - 4 86 | - 4 87 | NUM_CHANNELS: 88 | - 32 89 | - 64 90 | - 128 91 | - 256 92 | FUSE_METHOD: SUM 93 | LOSS: 94 | USE_TARGET_WEIGHT: true 95 | TYPE: 'NMTNORMCritierion' 96 | LABEL_SMOOTHING: 0.2 97 | TRAIN: 98 | BATCH_SIZE_PER_GPU: 32 99 | SHUFFLE: true 100 | BEGIN_EPOCH: 0 101 | END_EPOCH: 210 102 | OPTIMIZER: adam 103 | LR: 0.001 104 | LR_FACTOR: 0.1 105 | LR_STEP: 106 | - 170 107 | - 200 108 | WD: 0.0001 109 | GAMMA1: 0.99 110 | GAMMA2: 0.0 111 | MOMENTUM: 0.9 112 | NESTEROV: false 113 | TEST: 114 | BATCH_SIZE_PER_GPU: 32 115 | MODEL_FILE: '' 116 | FLIP_TEST: true 117 | POST_PROCESS: false 118 | SHIFT_HEATMAP: true 119 | PCKH_THRE: 0.5 120 | DEBUG: 121 | DEBUG: true 122 | SAVE_BATCH_IMAGES_GT: true 123 | SAVE_BATCH_IMAGES_PRED: true 124 | SAVE_HEATMAPS_GT: true 125 | SAVE_HEATMAPS_PRED: true 126 | -------------------------------------------------------------------------------- /experiments/mpii/hrnet/simdr/norm_w32_64x64_adam_lr1e-3_ls2e1_split3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: '/data/dataset/MPIHP' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | SIMDR_SPLIT_RATIO: 3.0 29 | HEAD_INPUT: 256 30 | NUM_JOINTS: 16 31 | PRETRAINED: '/data/pretrained/imagenet/hrnet_w32-36af842e.pth' 32 | COORD_REPRESENTATION: 'simdr' 33 | IMAGE_SIZE: 34 | - 64 35 | - 64 36 | HEATMAP_SIZE: 37 | - 64 38 | - 64 39 | SIGMA: 1 40 | EXTRA: 41 | PRETRAINED_LAYERS: 42 | - 'conv1' 43 | - 'bn1' 44 | - 'conv2' 45 | - 'bn2' 46 | - 'layer1' 47 | - 'transition1' 48 | - 'stage2' 49 | - 'transition2' 50 | - 'stage3' 51 | - 'transition3' 52 | - 'stage4' 53 | FINAL_CONV_KERNEL: 1 54 | STAGE2: 55 | NUM_MODULES: 1 56 | NUM_BRANCHES: 2 57 | BLOCK: BASIC 58 | NUM_BLOCKS: 59 | - 4 60 | - 4 61 | NUM_CHANNELS: 62 | - 32 63 | - 64 64 | FUSE_METHOD: SUM 65 | STAGE3: 66 | NUM_MODULES: 4 67 | NUM_BRANCHES: 3 68 | BLOCK: BASIC 69 | NUM_BLOCKS: 70 | - 4 71 | - 4 72 | - 4 73 | NUM_CHANNELS: 74 | - 32 75 | - 64 76 | - 128 77 | FUSE_METHOD: SUM 78 | STAGE4: 79 | NUM_MODULES: 3 80 | NUM_BRANCHES: 4 81 | BLOCK: BASIC 82 | NUM_BLOCKS: 83 | - 4 84 | - 4 85 | - 4 86 | - 4 87 | NUM_CHANNELS: 88 | - 32 89 | - 64 90 | - 128 91 | - 256 92 | FUSE_METHOD: SUM 93 | LOSS: 94 | USE_TARGET_WEIGHT: true 95 | TYPE: 'NMTNORMCritierion' 96 | LABEL_SMOOTHING: 0.2 97 | TRAIN: 98 | BATCH_SIZE_PER_GPU: 32 99 | SHUFFLE: true 100 | BEGIN_EPOCH: 0 101 | END_EPOCH: 210 102 | OPTIMIZER: adam 103 | LR: 0.001 104 | LR_FACTOR: 0.1 105 | LR_STEP: 106 | - 170 107 | - 200 108 | WD: 0.0001 109 | GAMMA1: 0.99 110 | GAMMA2: 0.0 111 | MOMENTUM: 0.9 112 | NESTEROV: false 113 | TEST: 114 | BATCH_SIZE_PER_GPU: 32 115 | MODEL_FILE: '' 116 | FLIP_TEST: true 117 | POST_PROCESS: false 118 | SHIFT_HEATMAP: true 119 | PCKH_THRE: 0.5 120 | DEBUG: 121 | DEBUG: true 122 | SAVE_BATCH_IMAGES_GT: true 123 | SAVE_BATCH_IMAGES_PRED: true 124 | SAVE_HEATMAPS_GT: true 125 | SAVE_HEATMAPS_PRED: true 126 | -------------------------------------------------------------------------------- /lib/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | cd nms; python setup_linux.py build_ext --inplace; rm -rf build; cd ../../ 3 | clean: 4 | cd nms; rm *.so; cd ../../ 5 | -------------------------------------------------------------------------------- /lib/config/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from .default import _C as cfg 8 | from .default import update_config 9 | from .models import MODEL_EXTRAS 10 | -------------------------------------------------------------------------------- /lib/config/default.py: -------------------------------------------------------------------------------- 1 | 2 | # ------------------------------------------------------------------------------ 3 | # Copyright (c) Microsoft 4 | # Licensed under the MIT License. 5 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 6 | # ------------------------------------------------------------------------------ 7 | # ------------------------------------------------------------------------------ 8 | # The SimDR and SA-SimDR part: 9 | # Written by Yanjie Li (lyj20@mails.tsinghua.edu.cn) 10 | # ------------------------------------------------------------------------------ 11 | 12 | from __future__ import absolute_import 13 | from __future__ import division 14 | from __future__ import print_function 15 | 16 | import os 17 | 18 | from yacs.config import CfgNode as CN 19 | 20 | 21 | _C = CN() 22 | 23 | _C.OUTPUT_DIR = '' 24 | _C.LOG_DIR = '' 25 | _C.DATA_DIR = '' 26 | _C.GPUS = (0,) 27 | _C.WORKERS = 4 28 | _C.PRINT_FREQ = 20 29 | _C.AUTO_RESUME = False 30 | _C.PIN_MEMORY = True 31 | _C.RANK = 0 32 | 33 | # Cudnn related params 34 | _C.CUDNN = CN() 35 | _C.CUDNN.BENCHMARK = True 36 | _C.CUDNN.DETERMINISTIC = False 37 | _C.CUDNN.ENABLED = True 38 | 39 | # common params for NETWORK 40 | _C.MODEL = CN() 41 | _C.MODEL.NAME = 'pose_hrnet' 42 | _C.MODEL.INIT_WEIGHTS = True 43 | _C.MODEL.PRETRAINED = '' 44 | _C.MODEL.NUM_JOINTS = 17 45 | _C.MODEL.TAG_PER_JOINT = True 46 | _C.MODEL.IMAGE_SIZE = [256, 256] # width * height, ex: 192 * 256 47 | _C.MODEL.HEATMAP_SIZE = [64, 64] # width * height, ex: 24 * 32 48 | _C.MODEL.SIGMA = 2 49 | _C.MODEL.COORD_REPRESENTATION = 'heatmap' 50 | _C.MODEL.AUX_ALPHA = 0.00001 51 | _C.MODEL.SIMDR_SPLIT_RATIO = 1.0 52 | _C.MODEL.HEAD_INPUT = 256 53 | _C.MODEL.DIM = 2 54 | _C.MODEL.INIT = False 55 | _C.MODEL.EXTRA = CN(new_allowed=True) 56 | 57 | _C.LOSS = CN() 58 | _C.LOSS.USE_OHKM = False 59 | _C.LOSS.TOPK = 8 60 | _C.LOSS.USE_TARGET_WEIGHT = True 61 | _C.LOSS.USE_DIFFERENT_JOINTS_WEIGHT = False 62 | _C.LOSS.TYPE = 'JointsMSELoss' 63 | _C.LOSS.LABEL_SMOOTHING = 0.1 64 | 65 | # DATASET related params 66 | _C.DATASET = CN() 67 | _C.DATASET.ROOT = '' 68 | _C.DATASET.DATASET = 'mpii' 69 | _C.DATASET.TRAIN_SET = 'train' 70 | _C.DATASET.TEST_SET = 'valid' 71 | _C.DATASET.DATA_FORMAT = 'jpg' 72 | _C.DATASET.HYBRID_JOINTS_TYPE = '' 73 | _C.DATASET.SELECT_DATA = False 74 | _C.DATASET.TRAIN_RATIO = 1.0 75 | _C.DATASET.TEST_RATIO = 1.0 76 | 77 | # training data augmentation 78 | _C.DATASET.FLIP = True 79 | _C.DATASET.SCALE_FACTOR = 0.25 80 | _C.DATASET.ROT_FACTOR = 30 81 | _C.DATASET.PROB_HALF_BODY = 0.0 82 | _C.DATASET.NUM_JOINTS_HALF_BODY = 8 83 | _C.DATASET.COLOR_RGB = False 84 | 85 | # train 86 | _C.TRAIN = CN() 87 | 88 | _C.TRAIN.LR_FACTOR = 0.1 89 | _C.TRAIN.LR_STEP = [90, 110] 90 | _C.TRAIN.LR = 0.001 91 | 92 | 93 | _C.TRAIN.OPTIMIZER = 'adam' 94 | _C.TRAIN.MOMENTUM = 0.9 95 | _C.TRAIN.WD = 0.0001 96 | _C.TRAIN.NESTEROV = False 97 | _C.TRAIN.GAMMA1 = 0.99 98 | _C.TRAIN.GAMMA2 = 0.0 99 | 100 | _C.TRAIN.BEGIN_EPOCH = 0 101 | _C.TRAIN.END_EPOCH = 140 102 | 103 | _C.TRAIN.RESUME = False 104 | _C.TRAIN.CHECKPOINT = '' 105 | 106 | _C.TRAIN.BATCH_SIZE_PER_GPU = 32 107 | _C.TRAIN.SHUFFLE = True 108 | 109 | # testing 110 | _C.TEST = CN() 111 | 112 | # size of images for each device 113 | _C.TEST.BATCH_SIZE_PER_GPU = 32 114 | # Test Model Epoch 115 | _C.TEST.FLIP_TEST = False 116 | _C.TEST.POST_PROCESS = False 117 | _C.TEST.SHIFT_HEATMAP = False 118 | 119 | _C.TEST.USE_GT_BBOX = False 120 | _C.TEST.BLUR_KERNEL = 11 121 | 122 | # nms 123 | _C.TEST.IMAGE_THRE = 0.1 124 | _C.TEST.NMS_THRE = 0.6 125 | _C.TEST.SOFT_NMS = False 126 | _C.TEST.OKS_THRE = 0.5 127 | _C.TEST.IN_VIS_THRE = 0.0 128 | _C.TEST.COCO_BBOX_FILE = '' 129 | _C.TEST.BBOX_THRE = 1.0 130 | _C.TEST.MODEL_FILE = '' 131 | 132 | # PCKH 133 | _C.TEST.PCKH_THRE = 0.5 134 | 135 | # debug 136 | _C.DEBUG = CN() 137 | _C.DEBUG.DEBUG = False 138 | _C.DEBUG.SAVE_BATCH_IMAGES_GT = False 139 | _C.DEBUG.SAVE_BATCH_IMAGES_PRED = False 140 | _C.DEBUG.SAVE_HEATMAPS_GT = False 141 | _C.DEBUG.SAVE_HEATMAPS_PRED = False 142 | 143 | 144 | def update_config(cfg, args): 145 | cfg.defrost() 146 | cfg.merge_from_file(args.cfg) 147 | cfg.merge_from_list(args.opts) 148 | 149 | if args.modelDir: 150 | cfg.OUTPUT_DIR = args.modelDir 151 | 152 | if args.logDir: 153 | cfg.LOG_DIR = args.logDir 154 | 155 | if args.dataDir: 156 | cfg.DATA_DIR = args.dataDir 157 | 158 | cfg.DATASET.ROOT = os.path.join( 159 | cfg.DATA_DIR, cfg.DATASET.ROOT 160 | ) 161 | 162 | cfg.MODEL.PRETRAINED = os.path.join( 163 | cfg.DATA_DIR, cfg.MODEL.PRETRAINED 164 | ) 165 | 166 | if cfg.TEST.MODEL_FILE: 167 | cfg.TEST.MODEL_FILE = os.path.join( 168 | cfg.DATA_DIR, cfg.TEST.MODEL_FILE 169 | ) 170 | 171 | cfg.freeze() 172 | 173 | 174 | if __name__ == '__main__': 175 | import sys 176 | with open(sys.argv[1], 'w') as f: 177 | print(_C, file=f) 178 | 179 | -------------------------------------------------------------------------------- /lib/config/models.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from yacs.config import CfgNode as CN 12 | 13 | 14 | # pose_resnet related params 15 | POSE_RESNET = CN() 16 | POSE_RESNET.NUM_LAYERS = 50 17 | POSE_RESNET.DECONV_WITH_BIAS = False 18 | POSE_RESNET.NUM_DECONV_LAYERS = 3 19 | POSE_RESNET.NUM_DECONV_FILTERS = [256, 256, 256] 20 | POSE_RESNET.NUM_DECONV_KERNELS = [4, 4, 4] 21 | POSE_RESNET.FINAL_CONV_KERNEL = 1 22 | POSE_RESNET.PRETRAINED_LAYERS = ['*'] 23 | 24 | # pose_multi_resoluton_net related params 25 | POSE_HIGH_RESOLUTION_NET = CN() 26 | POSE_HIGH_RESOLUTION_NET.PRETRAINED_LAYERS = ['*'] 27 | POSE_HIGH_RESOLUTION_NET.STEM_INPLANES = 64 28 | POSE_HIGH_RESOLUTION_NET.FINAL_CONV_KERNEL = 1 29 | 30 | POSE_HIGH_RESOLUTION_NET.STAGE2 = CN() 31 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_MODULES = 1 32 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BRANCHES = 2 33 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BLOCKS = [4, 4] 34 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_CHANNELS = [32, 64] 35 | POSE_HIGH_RESOLUTION_NET.STAGE2.BLOCK = 'BASIC' 36 | POSE_HIGH_RESOLUTION_NET.STAGE2.FUSE_METHOD = 'SUM' 37 | 38 | POSE_HIGH_RESOLUTION_NET.STAGE3 = CN() 39 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_MODULES = 1 40 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BRANCHES = 3 41 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BLOCKS = [4, 4, 4] 42 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_CHANNELS = [32, 64, 128] 43 | POSE_HIGH_RESOLUTION_NET.STAGE3.BLOCK = 'BASIC' 44 | POSE_HIGH_RESOLUTION_NET.STAGE3.FUSE_METHOD = 'SUM' 45 | 46 | POSE_HIGH_RESOLUTION_NET.STAGE4 = CN() 47 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_MODULES = 1 48 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BRANCHES = 4 49 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] 50 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256] 51 | POSE_HIGH_RESOLUTION_NET.STAGE4.BLOCK = 'BASIC' 52 | POSE_HIGH_RESOLUTION_NET.STAGE4.FUSE_METHOD = 'SUM' 53 | 54 | 55 | MODEL_EXTRAS = { 56 | 'pose_resnet': POSE_RESNET, 57 | 'pose_high_resolution_net': POSE_HIGH_RESOLUTION_NET, 58 | } 59 | -------------------------------------------------------------------------------- /lib/core/evaluate.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | 13 | from core.inference import get_max_preds 14 | 15 | 16 | def calc_dists(preds, target, normalize): 17 | preds = preds.astype(np.float32) 18 | target = target.astype(np.float32) 19 | dists = np.zeros((preds.shape[1], preds.shape[0])) 20 | for n in range(preds.shape[0]): 21 | for c in range(preds.shape[1]): 22 | if target[n, c, 0] > 1 and target[n, c, 1] > 1: 23 | normed_preds = preds[n, c, :] / normalize[n] 24 | normed_targets = target[n, c, :] / normalize[n] 25 | dists[c, n] = np.linalg.norm(normed_preds - normed_targets) 26 | else: 27 | dists[c, n] = -1 28 | return dists 29 | 30 | 31 | def dist_acc(dists, thr=0.5): 32 | ''' Return percentage below threshold while ignoring values with a -1 ''' 33 | dist_cal = np.not_equal(dists, -1) 34 | num_dist_cal = dist_cal.sum() 35 | if num_dist_cal > 0: 36 | return np.less(dists[dist_cal], thr).sum() * 1.0 / num_dist_cal 37 | else: 38 | return -1 39 | 40 | 41 | def accuracy(output, target, hm_type='gaussian', thr=0.5): 42 | ''' 43 | Calculate accuracy according to PCK, 44 | but uses ground truth heatmap rather than x,y locations 45 | First value to be returned is average accuracy across 'idxs', 46 | followed by individual accuracies 47 | ''' 48 | idx = list(range(output.shape[1])) 49 | norm = 1.0 50 | if hm_type == 'gaussian': 51 | pred, _ = get_max_preds(output) 52 | target, _ = get_max_preds(target) 53 | h = output.shape[2] 54 | w = output.shape[3] 55 | norm = np.ones((pred.shape[0], 2)) * np.array([h, w]) / 10 56 | dists = calc_dists(pred, target, norm) 57 | 58 | acc = np.zeros((len(idx) + 1)) 59 | avg_acc = 0 60 | cnt = 0 61 | 62 | for i in range(len(idx)): 63 | acc[i + 1] = dist_acc(dists[idx[i]]) 64 | if acc[i + 1] >= 0: 65 | avg_acc = avg_acc + acc[i + 1] 66 | cnt += 1 67 | 68 | avg_acc = avg_acc / cnt if cnt != 0 else 0 69 | if cnt != 0: 70 | acc[0] = avg_acc 71 | return acc, avg_acc, cnt, pred 72 | 73 | 74 | -------------------------------------------------------------------------------- /lib/core/inference.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import math 12 | import torch 13 | 14 | import numpy as np 15 | import cv2 16 | 17 | from utils.transforms import transform_preds 18 | 19 | 20 | def get_max_preds(batch_heatmaps): 21 | ''' 22 | get predictions from score maps 23 | heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) 24 | ''' 25 | assert isinstance(batch_heatmaps, np.ndarray), \ 26 | 'batch_heatmaps should be numpy.ndarray' 27 | assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim' 28 | 29 | batch_size = batch_heatmaps.shape[0] 30 | num_joints = batch_heatmaps.shape[1] 31 | width = batch_heatmaps.shape[3] 32 | heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) 33 | idx = np.argmax(heatmaps_reshaped, 2) 34 | maxvals = np.amax(heatmaps_reshaped, 2) 35 | 36 | maxvals = maxvals.reshape((batch_size, num_joints, 1)) 37 | idx = idx.reshape((batch_size, num_joints, 1)) 38 | 39 | preds = np.tile(idx, (1, 1, 2)).astype(np.float32) 40 | 41 | preds[:, :, 0] = (preds[:, :, 0]) % width 42 | preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) 43 | 44 | pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) 45 | pred_mask = pred_mask.astype(np.float32) 46 | 47 | preds *= pred_mask 48 | return preds, maxvals 49 | 50 | def get_final_preds(config, batch_heatmaps, center, scale): 51 | coords, maxvals = get_max_preds(batch_heatmaps) 52 | 53 | heatmap_height = batch_heatmaps.shape[2] 54 | heatmap_width = batch_heatmaps.shape[3] 55 | 56 | # post-processing 57 | if config.TEST.POST_PROCESS: 58 | for n in range(coords.shape[0]): 59 | for p in range(coords.shape[1]): 60 | hm = batch_heatmaps[n][p] 61 | px = int(math.floor(coords[n][p][0] + 0.5)) 62 | py = int(math.floor(coords[n][p][1] + 0.5)) 63 | if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1: 64 | diff = np.array( 65 | [ 66 | hm[py][px+1] - hm[py][px-1], 67 | hm[py+1][px]-hm[py-1][px] 68 | ] 69 | ) 70 | coords[n][p] += np.sign(diff) * .25 71 | 72 | preds = coords.copy() 73 | 74 | # Transform back 75 | for i in range(coords.shape[0]): 76 | preds[i] = transform_preds( 77 | coords[i], center[i], scale[i], [heatmap_width, heatmap_height] 78 | ) 79 | 80 | return preds, maxvals 81 | -------------------------------------------------------------------------------- /lib/core/loss.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | # ------------------------------------------------------------------------------ 8 | # The SimDR and SA-SimDR part: 9 | # Written by Yanjie Li (lyj20@mails.tsinghua.edu.cn) 10 | # ------------------------------------------------------------------------------ 11 | 12 | from __future__ import absolute_import 13 | from __future__ import division 14 | from __future__ import print_function 15 | 16 | import torch 17 | import torch.nn as nn 18 | import torch.nn.functional as F 19 | from torch.autograd import Variable 20 | 21 | 22 | class KLDiscretLoss(nn.Module): 23 | def __init__(self): 24 | super(KLDiscretLoss, self).__init__() 25 | self.LogSoftmax = nn.LogSoftmax(dim=1) #[B,LOGITS] 26 | self.criterion_ = nn.KLDivLoss(reduction='none') 27 | 28 | 29 | def criterion(self, dec_outs, labels): 30 | scores = self.LogSoftmax(dec_outs) 31 | loss = torch.mean(self.criterion_(scores, labels), dim=1) 32 | return loss 33 | 34 | def forward(self, output_x, output_y, target_x, target_y, target_weight): 35 | num_joints = output_x.size(1) 36 | loss = 0 37 | 38 | for idx in range(num_joints): 39 | coord_x_pred = output_x[:,idx].squeeze() 40 | coord_y_pred = output_y[:,idx].squeeze() 41 | coord_x_gt = target_x[:,idx].squeeze() 42 | coord_y_gt = target_y[:,idx].squeeze() 43 | weight = target_weight[:,idx].squeeze() 44 | loss += (self.criterion(coord_x_pred,coord_x_gt).mul(weight).mean()) 45 | loss += (self.criterion(coord_y_pred,coord_y_gt).mul(weight).mean()) 46 | return loss / num_joints 47 | 48 | class NMTNORMCritierion(nn.Module): 49 | def __init__(self, label_smoothing=0.0): 50 | super(NMTNORMCritierion, self).__init__() 51 | self.label_smoothing = label_smoothing 52 | self.LogSoftmax = nn.LogSoftmax(dim=1) #[B,LOGITS] 53 | 54 | if label_smoothing > 0: 55 | self.criterion_ = nn.KLDivLoss(reduction='none') 56 | else: 57 | self.criterion_ = nn.NLLLoss(reduction='none', ignore_index=100000) 58 | self.confidence = 1.0 - label_smoothing 59 | 60 | def _smooth_label(self, num_tokens): 61 | one_hot = torch.randn(1, num_tokens) 62 | one_hot.fill_(self.label_smoothing / (num_tokens - 1)) 63 | return one_hot 64 | 65 | def _bottle(self, v): 66 | return v.view(-1, v.size(2)) 67 | 68 | def criterion(self, dec_outs, labels): 69 | scores = self.LogSoftmax(dec_outs) 70 | num_tokens = scores.size(-1) 71 | 72 | # conduct label_smoothing module 73 | gtruth = labels.view(-1) 74 | if self.confidence < 1: 75 | tdata = gtruth.detach() 76 | one_hot = self._smooth_label(num_tokens) # Do label smoothing, shape is [M] 77 | if labels.is_cuda: 78 | one_hot = one_hot.cuda() 79 | tmp_ = one_hot.repeat(gtruth.size(0), 1) # [N, M] 80 | tmp_.scatter_(1, tdata.unsqueeze(1), self.confidence) # after tdata.unsqueeze(1) , tdata shape is [N,1] 81 | gtruth = tmp_.detach() 82 | loss = torch.mean(self.criterion_(scores, gtruth), dim=1) 83 | return loss 84 | 85 | def forward(self, output_x, output_y, target, target_weight): 86 | batch_size = output_x.size(0) 87 | num_joints = output_x.size(1) 88 | loss = 0 89 | 90 | for idx in range(num_joints): 91 | coord_x_pred = output_x[:,idx].squeeze() 92 | coord_y_pred = output_y[:,idx].squeeze() 93 | coord_gt = target[:,idx].squeeze() 94 | weight = target_weight[:,idx].squeeze() 95 | 96 | loss += self.criterion(coord_x_pred,coord_gt[:,0]).mul(weight).mean() 97 | loss += self.criterion(coord_y_pred,coord_gt[:,1]).mul(weight).mean() 98 | return loss / num_joints 99 | 100 | class NMTCritierion(nn.Module): 101 | def __init__(self, label_smoothing=0.0): 102 | super(NMTCritierion, self).__init__() 103 | self.label_smoothing = label_smoothing 104 | self.LogSoftmax = nn.LogSoftmax(dim=1) #[B,LOGITS] 105 | 106 | if label_smoothing > 0: 107 | self.criterion_ = nn.KLDivLoss(reduction='none') 108 | else: 109 | self.criterion_ = nn.NLLLoss(reduction='none', ignore_index=100000) 110 | self.confidence = 1.0 - label_smoothing 111 | 112 | def _smooth_label(self, num_tokens): 113 | one_hot = torch.randn(1, num_tokens) 114 | one_hot.fill_(self.label_smoothing / (num_tokens - 1)) 115 | return one_hot 116 | 117 | def _bottle(self, v): 118 | return v.view(-1, v.size(2)) 119 | 120 | def criterion(self, dec_outs, labels): 121 | scores = self.LogSoftmax(dec_outs) 122 | num_tokens = scores.size(-1) 123 | 124 | # conduct label_smoothing module 125 | gtruth = labels.view(-1) 126 | if self.confidence < 1: 127 | tdata = gtruth.detach() 128 | one_hot = self._smooth_label(num_tokens) # Do label smoothing, shape is [M] 129 | if labels.is_cuda: 130 | one_hot = one_hot.cuda() 131 | tmp_ = one_hot.repeat(gtruth.size(0), 1) # [N, M] 132 | tmp_.scatter_(1, tdata.unsqueeze(1), self.confidence) # after tdata.unsqueeze(1) , tdata shape is [N,1] 133 | gtruth = tmp_.detach() 134 | loss = torch.sum(self.criterion_(scores, gtruth), dim=1) 135 | return loss 136 | 137 | def forward(self, output_x, output_y, target, target_weight): 138 | batch_size = output_x.size(0) 139 | num_joints = output_x.size(1) 140 | loss = 0 141 | 142 | for idx in range(num_joints): 143 | coord_x_pred = output_x[:,idx].squeeze() 144 | coord_y_pred = output_y[:,idx].squeeze() 145 | coord_gt = target[:,idx].squeeze() 146 | weight = target_weight[:,idx].squeeze() 147 | loss += self.criterion(coord_x_pred,coord_gt[:,0]).mul(weight).sum() 148 | loss += self.criterion(coord_y_pred,coord_gt[:,1]).mul(weight).sum() 149 | return loss / batch_size 150 | 151 | class JointsMSELoss(nn.Module): 152 | def __init__(self, use_target_weight): 153 | super(JointsMSELoss, self).__init__() 154 | self.criterion = nn.MSELoss(reduction='mean') 155 | self.use_target_weight = use_target_weight 156 | 157 | def forward(self, output, target, target_weight): 158 | batch_size = output.size(0) 159 | num_joints = output.size(1) 160 | heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1) 161 | heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1) 162 | loss = 0 163 | 164 | for idx in range(num_joints): 165 | heatmap_pred = heatmaps_pred[idx].squeeze() 166 | heatmap_gt = heatmaps_gt[idx].squeeze() 167 | 168 | if self.use_target_weight: 169 | loss += 0.5 * self.criterion( 170 | heatmap_pred.mul(target_weight[:, idx]), 171 | heatmap_gt.mul(target_weight[:, idx]) 172 | ) 173 | else: 174 | loss += 0.5 * self.criterion(heatmap_pred, heatmap_gt) 175 | 176 | return loss / num_joints 177 | 178 | 179 | class JointsOHKMMSELoss(nn.Module): 180 | def __init__(self, use_target_weight, topk=8): 181 | super(JointsOHKMMSELoss, self).__init__() 182 | self.criterion = nn.MSELoss(reduction='none') 183 | self.use_target_weight = use_target_weight 184 | self.topk = topk 185 | 186 | def ohkm(self, loss): 187 | ohkm_loss = 0. 188 | for i in range(loss.size()[0]): 189 | sub_loss = loss[i] 190 | topk_val, topk_idx = torch.topk( 191 | sub_loss, k=self.topk, dim=0, sorted=False 192 | ) 193 | tmp_loss = torch.gather(sub_loss, 0, topk_idx) 194 | ohkm_loss += torch.sum(tmp_loss) / self.topk 195 | ohkm_loss /= loss.size()[0] 196 | return ohkm_loss 197 | 198 | def forward(self, output, target, target_weight): 199 | batch_size = output.size(0) 200 | num_joints = output.size(1) 201 | heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1) 202 | heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1) 203 | 204 | loss = [] 205 | for idx in range(num_joints): 206 | heatmap_pred = heatmaps_pred[idx].squeeze() 207 | heatmap_gt = heatmaps_gt[idx].squeeze() 208 | if self.use_target_weight: 209 | loss.append(0.5 * self.criterion( 210 | heatmap_pred.mul(target_weight[:, idx]), 211 | heatmap_gt.mul(target_weight[:, idx]) 212 | )) 213 | else: 214 | loss.append( 215 | 0.5 * self.criterion(heatmap_pred, heatmap_gt) 216 | ) 217 | 218 | loss = [l.mean(dim=1).unsqueeze(dim=1) for l in loss] 219 | loss = torch.cat(loss, dim=1) 220 | 221 | return self.ohkm(loss) 222 | -------------------------------------------------------------------------------- /lib/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from .mpii import MPIIDataset as mpii 12 | from .coco import COCODataset as coco 13 | -------------------------------------------------------------------------------- /lib/dataset/mpii.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import logging 12 | import os 13 | import json_tricks as json 14 | from collections import OrderedDict 15 | 16 | import numpy as np 17 | from scipy.io import loadmat, savemat 18 | 19 | from dataset.JointsDataset import JointsDataset 20 | 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | class MPIIDataset(JointsDataset): 25 | def __init__(self, cfg, root, image_set, is_train, transform=None, coord_representation='heatmap', simdr_split_ratio=1): 26 | super().__init__(cfg, root, image_set, is_train, transform, coord_representation, simdr_split_ratio) 27 | 28 | self.num_joints = 16 29 | self.pckh_thre = cfg.TEST.PCKH_THRE 30 | self.flip_pairs = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]] 31 | self.parent_ids = [1, 2, 6, 6, 3, 4, 6, 6, 7, 8, 11, 12, 7, 7, 13, 14] 32 | 33 | self.upper_body_ids = (7, 8, 9, 10, 11, 12, 13, 14, 15) 34 | self.lower_body_ids = (0, 1, 2, 3, 4, 5, 6) 35 | 36 | self.db = self._get_db() 37 | 38 | if is_train and cfg.DATASET.SELECT_DATA: 39 | self.db = self.select_data(self.db) 40 | 41 | logger.info('=> load {} samples'.format(len(self.db))) 42 | 43 | def _get_db(self): 44 | # create train/val split 45 | file_name = os.path.join( 46 | self.root, 'annot', self.image_set+'.json' 47 | ) 48 | with open(file_name) as anno_file: 49 | anno = json.load(anno_file) 50 | 51 | gt_db = [] 52 | for a in anno: 53 | image_name = a['image'] 54 | 55 | c = np.array(a['center'], dtype=np.float) 56 | s = np.array([a['scale'], a['scale']], dtype=np.float) 57 | 58 | # Adjust center/scale slightly to avoid cropping limbs 59 | if c[0] != -1: 60 | c[1] = c[1] + 15 * s[1] 61 | s = s * 1.25 62 | 63 | # MPII uses matlab format, index is based 1, 64 | # we should first convert to 0-based index 65 | c = c - 1 66 | 67 | joints_3d = np.zeros((self.num_joints, 3), dtype=np.float) 68 | joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float) 69 | if self.image_set != 'test': 70 | joints = np.array(a['joints']) 71 | joints[:, 0:2] = joints[:, 0:2] - 1 72 | joints_vis = np.array(a['joints_vis']) 73 | assert len(joints) == self.num_joints, \ 74 | 'joint num diff: {} vs {}'.format(len(joints), 75 | self.num_joints) 76 | 77 | joints_3d[:, 0:2] = joints[:, 0:2] 78 | joints_3d_vis[:, 0] = joints_vis[:] 79 | joints_3d_vis[:, 1] = joints_vis[:] 80 | 81 | image_dir = 'images.zip@' if self.data_format == 'zip' else 'images' 82 | gt_db.append( 83 | { 84 | 'image': os.path.join(self.root, image_dir, image_name), 85 | 'center': c, 86 | 'scale': s, 87 | 'joints_3d': joints_3d, 88 | 'joints_3d_vis': joints_3d_vis, 89 | 'filename': '', 90 | 'imgnum': 0, 91 | } 92 | ) 93 | 94 | return gt_db 95 | 96 | def evaluate(self, cfg, preds, output_dir, *args, **kwargs): 97 | # convert 0-based index to 1-based index 98 | preds = preds[:, :, 0:2] + 1.0 99 | 100 | if output_dir: 101 | pred_file = os.path.join(output_dir, 'pred.mat') 102 | savemat(pred_file, mdict={'preds': preds}) 103 | 104 | if 'test' in cfg.DATASET.TEST_SET: 105 | return {'Null': 0.0}, 0.0 106 | 107 | SC_BIAS = 0.6 108 | threshold = self.pckh_thre 109 | 110 | gt_file = os.path.join(cfg.DATASET.ROOT, 111 | 'annot', 112 | 'gt_{}.mat'.format(cfg.DATASET.TEST_SET)) 113 | gt_dict = loadmat(gt_file) 114 | dataset_joints = gt_dict['dataset_joints'] 115 | jnt_missing = gt_dict['jnt_missing'] 116 | pos_gt_src = gt_dict['pos_gt_src'] 117 | headboxes_src = gt_dict['headboxes_src'] 118 | 119 | pos_pred_src = np.transpose(preds, [1, 2, 0]) 120 | 121 | head = np.where(dataset_joints == 'head')[1][0] 122 | lsho = np.where(dataset_joints == 'lsho')[1][0] 123 | lelb = np.where(dataset_joints == 'lelb')[1][0] 124 | lwri = np.where(dataset_joints == 'lwri')[1][0] 125 | lhip = np.where(dataset_joints == 'lhip')[1][0] 126 | lkne = np.where(dataset_joints == 'lkne')[1][0] 127 | lank = np.where(dataset_joints == 'lank')[1][0] 128 | 129 | rsho = np.where(dataset_joints == 'rsho')[1][0] 130 | relb = np.where(dataset_joints == 'relb')[1][0] 131 | rwri = np.where(dataset_joints == 'rwri')[1][0] 132 | rkne = np.where(dataset_joints == 'rkne')[1][0] 133 | rank = np.where(dataset_joints == 'rank')[1][0] 134 | rhip = np.where(dataset_joints == 'rhip')[1][0] 135 | 136 | jnt_visible = 1 - jnt_missing 137 | uv_error = pos_pred_src - pos_gt_src 138 | uv_err = np.linalg.norm(uv_error, axis=1) 139 | headsizes = headboxes_src[1, :, :] - headboxes_src[0, :, :] 140 | headsizes = np.linalg.norm(headsizes, axis=0) 141 | headsizes *= SC_BIAS 142 | scale = np.multiply(headsizes, np.ones((len(uv_err), 1))) 143 | scaled_uv_err = np.divide(uv_err, scale) 144 | scaled_uv_err = np.multiply(scaled_uv_err, jnt_visible) 145 | jnt_count = np.sum(jnt_visible, axis=1) 146 | less_than_threshold = np.multiply((scaled_uv_err <= threshold), 147 | jnt_visible) 148 | PCKh = np.divide(100.*np.sum(less_than_threshold, axis=1), jnt_count) 149 | # save 150 | rng = np.arange(0, 0.5+0.01, 0.01) 151 | pckAll = np.zeros((len(rng), 16)) 152 | 153 | for r in range(len(rng)): 154 | threshold = rng[r] 155 | if r == 11: 156 | print(rng[r]) 157 | less_than_threshold = np.multiply(scaled_uv_err <= threshold, 158 | jnt_visible) 159 | pckAll[r, :] = np.divide(100.*np.sum(less_than_threshold, axis=1), 160 | jnt_count) 161 | 162 | PCKh = np.ma.array(PCKh, mask=False) 163 | PCKh.mask[6:8] = True 164 | 165 | jnt_count = np.ma.array(jnt_count, mask=False) 166 | jnt_count.mask[6:8] = True 167 | jnt_ratio = jnt_count / np.sum(jnt_count).astype(np.float64) 168 | 169 | name_value = [ 170 | ('Head', PCKh[head]), 171 | ('Shoulder', 0.5 * (PCKh[lsho] + PCKh[rsho])), 172 | ('Elbow', 0.5 * (PCKh[lelb] + PCKh[relb])), 173 | ('Wrist', 0.5 * (PCKh[lwri] + PCKh[rwri])), 174 | ('Hip', 0.5 * (PCKh[lhip] + PCKh[rhip])), 175 | ('Knee', 0.5 * (PCKh[lkne] + PCKh[rkne])), 176 | ('Ankle', 0.5 * (PCKh[lank] + PCKh[rank])), 177 | ('Mean', np.sum(PCKh * jnt_ratio)), 178 | ('Mean@0.1', np.sum(pckAll[10, :] * jnt_ratio)) 179 | # ('Mean@0.1', np.sum(pckAll[11, :] * jnt_ratio)) 180 | ] 181 | name_value = OrderedDict(name_value) 182 | 183 | return name_value, name_value['Mean'] 184 | -------------------------------------------------------------------------------- /lib/models/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | # ------------------------------------------------------------------------------ 7 | # The SimDR and SA-SimDR part: 8 | # Written by Yanjie Li (lyj20@mails.tsinghua.edu.cn) 9 | # ------------------------------------------------------------------------------ 10 | from __future__ import absolute_import 11 | from __future__ import division 12 | from __future__ import print_function 13 | 14 | from __future__ import absolute_import 15 | from __future__ import division 16 | from __future__ import print_function 17 | 18 | import models.pose_resnet 19 | import models.pose_resnet_upfree 20 | import models.pose_hrnet 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /lib/models/pose_resnet.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | # ------------------------------------------------------------------------------ 7 | # The SimDR and SA-SimDR part: 8 | # Written by Yanjie Li (lyj20@mails.tsinghua.edu.cn) 9 | # ------------------------------------------------------------------------------ 10 | from __future__ import absolute_import 11 | from __future__ import division 12 | from __future__ import print_function 13 | 14 | import os 15 | import logging 16 | 17 | import torch 18 | import torch.nn as nn 19 | from einops import rearrange, repeat 20 | 21 | BN_MOMENTUM = 0.1 22 | logger = logging.getLogger(__name__) 23 | 24 | 25 | def conv3x3(in_planes, out_planes, stride=1): 26 | """3x3 convolution with padding""" 27 | return nn.Conv2d( 28 | in_planes, out_planes, kernel_size=3, stride=stride, 29 | padding=1, bias=False 30 | ) 31 | 32 | 33 | class BasicBlock(nn.Module): 34 | expansion = 1 35 | 36 | def __init__(self, inplanes, planes, stride=1, downsample=None): 37 | super(BasicBlock, self).__init__() 38 | self.conv1 = conv3x3(inplanes, planes, stride) 39 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 40 | self.relu = nn.ReLU(inplace=True) 41 | self.conv2 = conv3x3(planes, planes) 42 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 43 | self.downsample = downsample 44 | self.stride = stride 45 | 46 | def forward(self, x): 47 | residual = x 48 | 49 | out = self.conv1(x) 50 | out = self.bn1(out) 51 | out = self.relu(out) 52 | 53 | out = self.conv2(out) 54 | out = self.bn2(out) 55 | 56 | if self.downsample is not None: 57 | residual = self.downsample(x) 58 | 59 | out += residual 60 | out = self.relu(out) 61 | 62 | return out 63 | 64 | 65 | class Bottleneck(nn.Module): 66 | expansion = 4 67 | 68 | def __init__(self, inplanes, planes, stride=1, downsample=None): 69 | super(Bottleneck, self).__init__() 70 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 71 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 72 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 73 | padding=1, bias=False) 74 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 75 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, 76 | bias=False) 77 | self.bn3 = nn.BatchNorm2d(planes * self.expansion, 78 | momentum=BN_MOMENTUM) 79 | self.relu = nn.ReLU(inplace=True) 80 | self.downsample = downsample 81 | self.stride = stride 82 | 83 | def forward(self, x): 84 | residual = x 85 | 86 | out = self.conv1(x) 87 | out = self.bn1(out) 88 | out = self.relu(out) 89 | 90 | out = self.conv2(out) 91 | out = self.bn2(out) 92 | out = self.relu(out) 93 | 94 | out = self.conv3(out) 95 | out = self.bn3(out) 96 | 97 | if self.downsample is not None: 98 | residual = self.downsample(x) 99 | 100 | out += residual 101 | out = self.relu(out) 102 | 103 | return out 104 | 105 | 106 | class PoseResNet(nn.Module): 107 | 108 | def __init__(self, block, layers, cfg, **kwargs): 109 | super(PoseResNet, self).__init__() 110 | 111 | self.inplanes = 64 112 | extra = cfg.MODEL.EXTRA 113 | self.deconv_with_bias = extra.DECONV_WITH_BIAS 114 | self.coord_representation = cfg.MODEL.COORD_REPRESENTATION 115 | assert self.coord_representation in ['heatmap', 'simdr', 'sa-simdr'], 'only heatmap or simdr or sa-simdr supported ~ ' 116 | 117 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 118 | bias=False) 119 | self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) 120 | self.relu = nn.ReLU(inplace=True) 121 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 122 | self.layer1 = self._make_layer(block, 64, layers[0]) 123 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 124 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 125 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 126 | 127 | # used for deconv layers 128 | self.deconv_layers = self._make_deconv_layer( 129 | extra.NUM_DECONV_LAYERS, 130 | extra.NUM_DECONV_FILTERS, 131 | extra.NUM_DECONV_KERNELS, 132 | ) 133 | 134 | self.final_layer = nn.Conv2d( 135 | in_channels=extra.NUM_DECONV_FILTERS[-1], 136 | out_channels=cfg.MODEL.NUM_JOINTS, 137 | kernel_size=extra.FINAL_CONV_KERNEL, 138 | stride=1, 139 | padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0 140 | ) 141 | 142 | # head 143 | if self.coord_representation == 'simdr' or self.coord_representation == 'sa-simdr': 144 | self.mlp_head_x = nn.Linear(cfg.MODEL.HEAD_INPUT, int(cfg.MODEL.IMAGE_SIZE[0]*cfg.MODEL.SIMDR_SPLIT_RATIO)) 145 | self.mlp_head_y = nn.Linear(cfg.MODEL.HEAD_INPUT, int(cfg.MODEL.IMAGE_SIZE[1]*cfg.MODEL.SIMDR_SPLIT_RATIO)) 146 | 147 | def _make_layer(self, block, planes, blocks, stride=1): 148 | downsample = None 149 | if stride != 1 or self.inplanes != planes * block.expansion: 150 | downsample = nn.Sequential( 151 | nn.Conv2d(self.inplanes, planes * block.expansion, 152 | kernel_size=1, stride=stride, bias=False), 153 | nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), 154 | ) 155 | 156 | layers = [] 157 | layers.append(block(self.inplanes, planes, stride, downsample)) 158 | self.inplanes = planes * block.expansion 159 | for i in range(1, blocks): 160 | layers.append(block(self.inplanes, planes)) 161 | 162 | return nn.Sequential(*layers) 163 | 164 | def _get_deconv_cfg(self, deconv_kernel, index): 165 | if deconv_kernel == 4: 166 | padding = 1 167 | output_padding = 0 168 | elif deconv_kernel == 3: 169 | padding = 1 170 | output_padding = 1 171 | elif deconv_kernel == 2: 172 | padding = 0 173 | output_padding = 0 174 | 175 | return deconv_kernel, padding, output_padding 176 | 177 | def _make_deconv_layer(self, num_layers, num_filters, num_kernels): 178 | assert num_layers == len(num_filters), \ 179 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 180 | assert num_layers == len(num_kernels), \ 181 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 182 | 183 | layers = [] 184 | for i in range(num_layers): 185 | kernel, padding, output_padding = \ 186 | self._get_deconv_cfg(num_kernels[i], i) 187 | 188 | planes = num_filters[i] 189 | layers.append( 190 | nn.ConvTranspose2d( 191 | in_channels=self.inplanes, 192 | out_channels=planes, 193 | kernel_size=kernel, 194 | stride=2, 195 | padding=padding, 196 | output_padding=output_padding, 197 | bias=self.deconv_with_bias)) 198 | layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) 199 | layers.append(nn.ReLU(inplace=True)) 200 | self.inplanes = planes 201 | 202 | return nn.Sequential(*layers) 203 | 204 | def forward(self, x): 205 | x = self.conv1(x) 206 | x = self.bn1(x) 207 | x = self.relu(x) 208 | x = self.maxpool(x) 209 | 210 | x = self.layer1(x) 211 | x = self.layer2(x) 212 | x = self.layer3(x) 213 | x = self.layer4(x) 214 | 215 | x = self.deconv_layers(x) 216 | 217 | x = self.final_layer(x) 218 | 219 | if self.coord_representation == 'simdr' or self.coord_representation == 'sa-simdr': 220 | x = rearrange(x, 'b c h w -> b c (h w)') 221 | pred_x = self.mlp_head_x(x) 222 | pred_y = self.mlp_head_y(x) 223 | return pred_x, pred_y 224 | elif self.coord_representation == 'heatmap': 225 | return x 226 | 227 | def init_weights(self, pretrained=''): 228 | if os.path.isfile(pretrained): 229 | logger.info('=> init deconv weights from normal distribution') 230 | for name, m in self.deconv_layers.named_modules(): 231 | if isinstance(m, nn.ConvTranspose2d): 232 | logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 233 | logger.info('=> init {}.bias as 0'.format(name)) 234 | nn.init.normal_(m.weight, std=0.001) 235 | if self.deconv_with_bias: 236 | nn.init.constant_(m.bias, 0) 237 | elif isinstance(m, nn.BatchNorm2d): 238 | logger.info('=> init {}.weight as 1'.format(name)) 239 | logger.info('=> init {}.bias as 0'.format(name)) 240 | nn.init.constant_(m.weight, 1) 241 | nn.init.constant_(m.bias, 0) 242 | logger.info('=> init final conv weights from normal distribution') 243 | for m in self.final_layer.modules(): 244 | if isinstance(m, nn.Conv2d): 245 | logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 246 | logger.info('=> init {}.bias as 0'.format(name)) 247 | nn.init.normal_(m.weight, std=0.001) 248 | nn.init.constant_(m.bias, 0) 249 | 250 | pretrained_state_dict = torch.load(pretrained) 251 | logger.info('=> loading pretrained model {}'.format(pretrained)) 252 | self.load_state_dict(pretrained_state_dict, strict=False) 253 | else: 254 | logger.info('=> init weights from normal distribution') 255 | for m in self.modules(): 256 | if isinstance(m, nn.Conv2d): 257 | nn.init.normal_(m.weight, std=0.001) 258 | elif isinstance(m, nn.BatchNorm2d): 259 | nn.init.constant_(m.weight, 1) 260 | nn.init.constant_(m.bias, 0) 261 | elif isinstance(m, nn.ConvTranspose2d): 262 | nn.init.normal_(m.weight, std=0.001) 263 | if self.deconv_with_bias: 264 | nn.init.constant_(m.bias, 0) 265 | 266 | 267 | resnet_spec = { 268 | 18: (BasicBlock, [2, 2, 2, 2]), 269 | 34: (BasicBlock, [3, 4, 6, 3]), 270 | 50: (Bottleneck, [3, 4, 6, 3]), 271 | 101: (Bottleneck, [3, 4, 23, 3]), 272 | 152: (Bottleneck, [3, 8, 36, 3]) 273 | } 274 | 275 | 276 | def get_pose_net(cfg, is_train, **kwargs): 277 | num_layers = cfg.MODEL.EXTRA.NUM_LAYERS 278 | 279 | block_class, layers = resnet_spec[num_layers] 280 | 281 | model = PoseResNet(block_class, layers, cfg, **kwargs) 282 | 283 | if is_train and cfg.MODEL.INIT_WEIGHTS: 284 | model.init_weights(cfg.MODEL.PRETRAINED) 285 | return model 286 | -------------------------------------------------------------------------------- /lib/models/pose_resnet_upfree.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | # ------------------------------------------------------------------------------ 7 | # The SimDR and SA-SimDR part: 8 | # Written by Yanjie Li (lyj20@mails.tsinghua.edu.cn) 9 | # ------------------------------------------------------------------------------ 10 | from __future__ import absolute_import 11 | from __future__ import division 12 | from __future__ import print_function 13 | 14 | import os 15 | import logging 16 | 17 | import torch 18 | import torch.nn as nn 19 | from einops import rearrange, repeat 20 | 21 | BN_MOMENTUM = 0.1 22 | logger = logging.getLogger(__name__) 23 | 24 | 25 | def conv3x3(in_planes, out_planes, stride=1): 26 | """3x3 convolution with padding""" 27 | return nn.Conv2d( 28 | in_planes, out_planes, kernel_size=3, stride=stride, 29 | padding=1, bias=False 30 | ) 31 | 32 | 33 | class BasicBlock(nn.Module): 34 | expansion = 1 35 | 36 | def __init__(self, inplanes, planes, stride=1, downsample=None): 37 | super(BasicBlock, self).__init__() 38 | self.conv1 = conv3x3(inplanes, planes, stride) 39 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 40 | self.relu = nn.ReLU(inplace=True) 41 | self.conv2 = conv3x3(planes, planes) 42 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 43 | self.downsample = downsample 44 | self.stride = stride 45 | 46 | def forward(self, x): 47 | residual = x 48 | 49 | out = self.conv1(x) 50 | out = self.bn1(out) 51 | out = self.relu(out) 52 | 53 | out = self.conv2(out) 54 | out = self.bn2(out) 55 | 56 | if self.downsample is not None: 57 | residual = self.downsample(x) 58 | 59 | out += residual 60 | out = self.relu(out) 61 | 62 | return out 63 | 64 | 65 | class Bottleneck(nn.Module): 66 | expansion = 4 67 | 68 | def __init__(self, inplanes, planes, stride=1, downsample=None): 69 | super(Bottleneck, self).__init__() 70 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 71 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 72 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 73 | padding=1, bias=False) 74 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 75 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, 76 | bias=False) 77 | self.bn3 = nn.BatchNorm2d(planes * self.expansion, 78 | momentum=BN_MOMENTUM) 79 | self.relu = nn.ReLU(inplace=True) 80 | self.downsample = downsample 81 | self.stride = stride 82 | 83 | def forward(self, x): 84 | residual = x 85 | 86 | out = self.conv1(x) 87 | out = self.bn1(out) 88 | out = self.relu(out) 89 | 90 | out = self.conv2(out) 91 | out = self.bn2(out) 92 | out = self.relu(out) 93 | 94 | out = self.conv3(out) 95 | out = self.bn3(out) 96 | 97 | if self.downsample is not None: 98 | residual = self.downsample(x) 99 | 100 | out += residual 101 | out = self.relu(out) 102 | 103 | return out 104 | 105 | 106 | class PoseResNet(nn.Module): 107 | 108 | def __init__(self, block, layers, cfg, **kwargs): 109 | super(PoseResNet, self).__init__() 110 | self.inplanes = 64 111 | extra = cfg.MODEL.EXTRA 112 | self.num_joints=cfg.MODEL.NUM_JOINTS 113 | self.channel_per_joint = extra.CHANNEL_PER_JOINT 114 | assert cfg.MODEL.COORD_REPRESENTATION == 'simdr' or cfg.MODEL.COORD_REPRESENTATION == 'sa-simdr', 'only simdr and sa-simdr supported for pose_resnet_upfree' 115 | 116 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 117 | bias=False) 118 | self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) 119 | self.relu = nn.ReLU(inplace=True) 120 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 121 | self.layer1 = self._make_layer(block, 64, layers[0]) 122 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 123 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 124 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 125 | 126 | self.final_layer = nn.Conv2d( 127 | in_channels=self.inplanes, 128 | out_channels=cfg.MODEL.NUM_JOINTS*extra.CHANNEL_PER_JOINT, 129 | kernel_size=extra.FINAL_CONV_KERNEL, 130 | stride=1, 131 | padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0 132 | ) 133 | 134 | # head 135 | self.mlp_head_x = nn.Linear(cfg.MODEL.HEAD_INPUT, int(cfg.MODEL.HEATMAP_SIZE[0]*cfg.MODEL.SIMDR_SPLIT_RATIO)) 136 | self.mlp_head_y = nn.Linear(cfg.MODEL.HEAD_INPUT, int(cfg.MODEL.HEATMAP_SIZE[1]*cfg.MODEL.SIMDR_SPLIT_RATIO)) 137 | 138 | 139 | def _make_layer(self, block, planes, blocks, stride=1): 140 | downsample = None 141 | if stride != 1 or self.inplanes != planes * block.expansion: 142 | downsample = nn.Sequential( 143 | nn.Conv2d(self.inplanes, planes * block.expansion, 144 | kernel_size=1, stride=stride, bias=False), 145 | nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), 146 | ) 147 | 148 | layers = [] 149 | layers.append(block(self.inplanes, planes, stride, downsample)) 150 | self.inplanes = planes * block.expansion 151 | for i in range(1, blocks): 152 | layers.append(block(self.inplanes, planes)) 153 | 154 | return nn.Sequential(*layers) 155 | 156 | def forward(self, x): 157 | x = self.conv1(x) 158 | x = self.bn1(x) 159 | x = self.relu(x) 160 | x = self.maxpool(x) 161 | 162 | x = self.layer1(x) 163 | x = self.layer2(x) 164 | x = self.layer3(x) 165 | x = self.layer4(x) 166 | 167 | x = self.final_layer(x) 168 | x = rearrange(x, 'b (k t) h w -> b k (t h w)',k=self.num_joints,t=self.channel_per_joint) 169 | 170 | pred_x = self.mlp_head_x(x) 171 | pred_y = self.mlp_head_y(x) 172 | return pred_x, pred_y 173 | 174 | def init_weights(self, pretrained=''): 175 | if os.path.isfile(pretrained): 176 | logger.info('=> init final conv weights from normal distribution') 177 | for m in self.final_layer.modules(): 178 | if isinstance(m, nn.Conv2d): 179 | logger.info('=> init final_layer.weight as normal(0, 0.001)') 180 | logger.info('=> init final_layer.bias as 0') 181 | nn.init.normal_(m.weight, std=0.001) 182 | nn.init.constant_(m.bias, 0) 183 | pretrained_state_dict = torch.load(pretrained) 184 | logger.info('=> loading pretrained model {}'.format(pretrained)) 185 | self.load_state_dict(pretrained_state_dict, strict=False) 186 | else: 187 | logger.info('=> init weights from normal distribution') 188 | for m in self.modules(): 189 | if isinstance(m, nn.Conv2d): 190 | nn.init.normal_(m.weight, std=0.001) 191 | elif isinstance(m, nn.BatchNorm2d): 192 | nn.init.constant_(m.weight, 1) 193 | nn.init.constant_(m.bias, 0) 194 | 195 | resnet_spec = { 196 | 18: (BasicBlock, [2, 2, 2, 2]), 197 | 34: (BasicBlock, [3, 4, 6, 3]), 198 | 50: (Bottleneck, [3, 4, 6, 3]), 199 | 101: (Bottleneck, [3, 4, 23, 3]), 200 | 152: (Bottleneck, [3, 8, 36, 3]) 201 | } 202 | 203 | 204 | def get_pose_net(cfg, is_train, **kwargs): 205 | num_layers = cfg.MODEL.EXTRA.NUM_LAYERS 206 | 207 | block_class, layers = resnet_spec[num_layers] 208 | 209 | model = PoseResNet(block_class, layers, cfg, **kwargs) 210 | 211 | if is_train and cfg.MODEL.INIT_WEIGHTS: 212 | model.init_weights(cfg.MODEL.PRETRAINED) 213 | return model 214 | -------------------------------------------------------------------------------- /lib/nms/cpu_nms.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leeyegy/SimCC/aa6e089af29718e889680b6226eb7cac107aa21a/lib/nms/cpu_nms.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lib/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | cimport numpy as np 13 | 14 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 15 | return a if a >= b else b 16 | 17 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 18 | return a if a <= b else b 19 | 20 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 21 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 22 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 23 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 24 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 25 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 26 | 27 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 28 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1].astype('i') 29 | 30 | cdef int ndets = dets.shape[0] 31 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 32 | np.zeros((ndets), dtype=np.int) 33 | 34 | # nominal indices 35 | cdef int _i, _j 36 | # sorted indices 37 | cdef int i, j 38 | # temp variables for box i's (the box currently under consideration) 39 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 40 | # variables for computing overlap with box j (lower scoring box) 41 | cdef np.float32_t xx1, yy1, xx2, yy2 42 | cdef np.float32_t w, h 43 | cdef np.float32_t inter, ovr 44 | 45 | keep = [] 46 | for _i in range(ndets): 47 | i = order[_i] 48 | if suppressed[i] == 1: 49 | continue 50 | keep.append(i) 51 | ix1 = x1[i] 52 | iy1 = y1[i] 53 | ix2 = x2[i] 54 | iy2 = y2[i] 55 | iarea = areas[i] 56 | for _j in range(_i + 1, ndets): 57 | j = order[_j] 58 | if suppressed[j] == 1: 59 | continue 60 | xx1 = max(ix1, x1[j]) 61 | yy1 = max(iy1, y1[j]) 62 | xx2 = min(ix2, x2[j]) 63 | yy2 = min(iy2, y2[j]) 64 | w = max(0.0, xx2 - xx1 + 1) 65 | h = max(0.0, yy2 - yy1 + 1) 66 | inter = w * h 67 | ovr = inter / (iarea + areas[j] - inter) 68 | if ovr >= thresh: 69 | suppressed[j] = 1 70 | 71 | return keep 72 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leeyegy/SimCC/aa6e089af29718e889680b6226eb7cac107aa21a/lib/nms/gpu_nms.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lib/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | cimport numpy as np 13 | 14 | assert sizeof(int) == sizeof(np.int32_t) 15 | 16 | cdef extern from "gpu_nms.hpp": 17 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 18 | 19 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 20 | np.int32_t device_id=0): 21 | cdef int boxes_num = dets.shape[0] 22 | cdef int boxes_dim = dets.shape[1] 23 | cdef int num_out 24 | cdef np.ndarray[np.int32_t, ndim=1] \ 25 | keep = np.zeros(boxes_num, dtype=np.int32) 26 | cdef np.ndarray[np.float32_t, ndim=1] \ 27 | scores = dets[:, 4] 28 | cdef np.ndarray[np.int32_t, ndim=1] \ 29 | order = scores.argsort()[::-1].astype(np.int32) 30 | cdef np.ndarray[np.float32_t, ndim=2] \ 31 | sorted_dets = dets[order, :] 32 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 33 | keep = keep[:num_out] 34 | return list(order[keep]) 35 | -------------------------------------------------------------------------------- /lib/nms/nms.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | 13 | from .cpu_nms import cpu_nms 14 | from .gpu_nms import gpu_nms 15 | 16 | 17 | def py_nms_wrapper(thresh): 18 | def _nms(dets): 19 | return nms(dets, thresh) 20 | return _nms 21 | 22 | 23 | def cpu_nms_wrapper(thresh): 24 | def _nms(dets): 25 | return cpu_nms(dets, thresh) 26 | return _nms 27 | 28 | 29 | def gpu_nms_wrapper(thresh, device_id): 30 | def _nms(dets): 31 | return gpu_nms(dets, thresh, device_id) 32 | return _nms 33 | 34 | 35 | def nms(dets, thresh): 36 | """ 37 | greedily select boxes with high confidence and overlap with current maximum <= thresh 38 | rule out overlap >= thresh 39 | :param dets: [[x1, y1, x2, y2 score]] 40 | :param thresh: retain overlap < thresh 41 | :return: indexes to keep 42 | """ 43 | if dets.shape[0] == 0: 44 | return [] 45 | 46 | x1 = dets[:, 0] 47 | y1 = dets[:, 1] 48 | x2 = dets[:, 2] 49 | y2 = dets[:, 3] 50 | scores = dets[:, 4] 51 | 52 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 53 | order = scores.argsort()[::-1] 54 | 55 | keep = [] 56 | while order.size > 0: 57 | i = order[0] 58 | keep.append(i) 59 | xx1 = np.maximum(x1[i], x1[order[1:]]) 60 | yy1 = np.maximum(y1[i], y1[order[1:]]) 61 | xx2 = np.minimum(x2[i], x2[order[1:]]) 62 | yy2 = np.minimum(y2[i], y2[order[1:]]) 63 | 64 | w = np.maximum(0.0, xx2 - xx1 + 1) 65 | h = np.maximum(0.0, yy2 - yy1 + 1) 66 | inter = w * h 67 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 68 | 69 | inds = np.where(ovr <= thresh)[0] 70 | order = order[inds + 1] 71 | 72 | return keep 73 | 74 | 75 | def oks_iou(g, d, a_g, a_d, sigmas=None, in_vis_thre=None): 76 | if not isinstance(sigmas, np.ndarray): 77 | sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0 78 | vars = (sigmas * 2) ** 2 79 | xg = g[0::3] 80 | yg = g[1::3] 81 | vg = g[2::3] 82 | ious = np.zeros((d.shape[0])) 83 | for n_d in range(0, d.shape[0]): 84 | xd = d[n_d, 0::3] 85 | yd = d[n_d, 1::3] 86 | vd = d[n_d, 2::3] 87 | dx = xd - xg 88 | dy = yd - yg 89 | e = (dx ** 2 + dy ** 2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2 90 | if in_vis_thre is not None: 91 | ind = list(vg > in_vis_thre) and list(vd > in_vis_thre) 92 | e = e[ind] 93 | ious[n_d] = np.sum(np.exp(-e)) / e.shape[0] if e.shape[0] != 0 else 0.0 94 | return ious 95 | 96 | 97 | # for box detection and keep the high confidence------- so the 98 | def oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None): 99 | """ 100 | greedily select boxes with high confidence and overlap with current maximum <= thresh 101 | rule out overlap >= thresh, overlap = oks 102 | :param kpts_db 103 | :param thresh: retain overlap < thresh 104 | :return: indexes to keep 105 | """ 106 | if len(kpts_db) == 0: 107 | return [] 108 | 109 | scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))]) 110 | kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))]) 111 | areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))]) 112 | 113 | order = scores.argsort()[::-1] 114 | 115 | keep = [] 116 | while order.size > 0: 117 | i = order[0] 118 | keep.append(i) 119 | 120 | oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre) 121 | 122 | inds = np.where(oks_ovr <= thresh)[0] 123 | order = order[inds + 1] 124 | 125 | return keep 126 | 127 | 128 | def rescore(overlap, scores, thresh, type='gaussian'): 129 | assert overlap.shape[0] == scores.shape[0] 130 | if type == 'linear': 131 | inds = np.where(overlap >= thresh)[0] 132 | scores[inds] = scores[inds] * (1 - overlap[inds]) 133 | else: 134 | scores = scores * np.exp(- overlap**2 / thresh) 135 | 136 | return scores 137 | 138 | 139 | def soft_oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None): 140 | """ 141 | greedily select boxes with high confidence and overlap with current maximum <= thresh 142 | rule out overlap >= thresh, overlap = oks 143 | :param kpts_db 144 | :param thresh: retain overlap < thresh 145 | :return: indexes to keep 146 | """ 147 | if len(kpts_db) == 0: 148 | return [] 149 | 150 | scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))]) 151 | kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))]) 152 | areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))]) 153 | 154 | order = scores.argsort()[::-1] 155 | scores = scores[order] 156 | 157 | # max_dets = order.size 158 | max_dets = 20 159 | keep = np.zeros(max_dets, dtype=np.intp) 160 | keep_cnt = 0 161 | while order.size > 0 and keep_cnt < max_dets: 162 | i = order[0] 163 | 164 | oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre) 165 | 166 | order = order[1:] 167 | scores = rescore(oks_ovr, scores[1:], thresh) 168 | 169 | tmp = scores.argsort()[::-1] 170 | order = order[tmp] 171 | scores = scores[tmp] 172 | 173 | keep[keep_cnt] = i 174 | keep_cnt += 1 175 | 176 | keep = keep[:keep_cnt] 177 | 178 | return keep 179 | # kpts_db = kpts_db[:keep_cnt] 180 | 181 | # return kpts_db 182 | -------------------------------------------------------------------------------- /lib/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Copyright (c) Microsoft 3 | // Licensed under The MIT License 4 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 5 | // ------------------------------------------------------------------ 6 | 7 | #include "gpu_nms.hpp" 8 | #include 9 | #include 10 | 11 | #define CUDA_CHECK(condition) \ 12 | /* Code block avoids redefinition of cudaError_t error */ \ 13 | do { \ 14 | cudaError_t error = condition; \ 15 | if (error != cudaSuccess) { \ 16 | std::cout << cudaGetErrorString(error) << std::endl; \ 17 | } \ 18 | } while (0) 19 | 20 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 21 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 22 | 23 | __device__ inline float devIoU(float const * const a, float const * const b) { 24 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 25 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 26 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 27 | float interS = width * height; 28 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 29 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 30 | return interS / (Sa + Sb - interS); 31 | } 32 | 33 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 34 | const float *dev_boxes, unsigned long long *dev_mask) { 35 | const int row_start = blockIdx.y; 36 | const int col_start = blockIdx.x; 37 | 38 | // if (row_start > col_start) return; 39 | 40 | const int row_size = 41 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 42 | const int col_size = 43 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 44 | 45 | __shared__ float block_boxes[threadsPerBlock * 5]; 46 | if (threadIdx.x < col_size) { 47 | block_boxes[threadIdx.x * 5 + 0] = 48 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 49 | block_boxes[threadIdx.x * 5 + 1] = 50 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 51 | block_boxes[threadIdx.x * 5 + 2] = 52 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 53 | block_boxes[threadIdx.x * 5 + 3] = 54 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 55 | block_boxes[threadIdx.x * 5 + 4] = 56 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 57 | } 58 | __syncthreads(); 59 | 60 | if (threadIdx.x < row_size) { 61 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 62 | const float *cur_box = dev_boxes + cur_box_idx * 5; 63 | int i = 0; 64 | unsigned long long t = 0; 65 | int start = 0; 66 | if (row_start == col_start) { 67 | start = threadIdx.x + 1; 68 | } 69 | for (i = start; i < col_size; i++) { 70 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 71 | t |= 1ULL << i; 72 | } 73 | } 74 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 75 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 76 | } 77 | } 78 | 79 | void _set_device(int device_id) { 80 | int current_device; 81 | CUDA_CHECK(cudaGetDevice(¤t_device)); 82 | if (current_device == device_id) { 83 | return; 84 | } 85 | // The call to cudaSetDevice must come before any calls to Get, which 86 | // may perform initialization using the GPU. 87 | CUDA_CHECK(cudaSetDevice(device_id)); 88 | } 89 | 90 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 91 | int boxes_dim, float nms_overlap_thresh, int device_id) { 92 | _set_device(device_id); 93 | 94 | float* boxes_dev = NULL; 95 | unsigned long long* mask_dev = NULL; 96 | 97 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 98 | 99 | CUDA_CHECK(cudaMalloc(&boxes_dev, 100 | boxes_num * boxes_dim * sizeof(float))); 101 | CUDA_CHECK(cudaMemcpy(boxes_dev, 102 | boxes_host, 103 | boxes_num * boxes_dim * sizeof(float), 104 | cudaMemcpyHostToDevice)); 105 | 106 | CUDA_CHECK(cudaMalloc(&mask_dev, 107 | boxes_num * col_blocks * sizeof(unsigned long long))); 108 | 109 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 110 | DIVUP(boxes_num, threadsPerBlock)); 111 | dim3 threads(threadsPerBlock); 112 | nms_kernel<<>>(boxes_num, 113 | nms_overlap_thresh, 114 | boxes_dev, 115 | mask_dev); 116 | 117 | std::vector mask_host(boxes_num * col_blocks); 118 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 119 | mask_dev, 120 | sizeof(unsigned long long) * boxes_num * col_blocks, 121 | cudaMemcpyDeviceToHost)); 122 | 123 | std::vector remv(col_blocks); 124 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 125 | 126 | int num_to_keep = 0; 127 | for (int i = 0; i < boxes_num; i++) { 128 | int nblock = i / threadsPerBlock; 129 | int inblock = i % threadsPerBlock; 130 | 131 | if (!(remv[nblock] & (1ULL << inblock))) { 132 | keep_out[num_to_keep++] = i; 133 | unsigned long long *p = &mask_host[0] + i * col_blocks; 134 | for (int j = nblock; j < col_blocks; j++) { 135 | remv[j] |= p[j]; 136 | } 137 | } 138 | } 139 | *num_out = num_to_keep; 140 | 141 | CUDA_CHECK(cudaFree(boxes_dev)); 142 | CUDA_CHECK(cudaFree(mask_dev)); 143 | } 144 | -------------------------------------------------------------------------------- /lib/nms/setup_linux.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Pose.gluon 3 | # Copyright (c) 2018-present Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | from setuptools import setup 11 | from distutils.extension import Extension 12 | from Cython.Distutils import build_ext 13 | import numpy as np 14 | 15 | 16 | def find_in_path(name, path): 17 | "Find a file in a search path" 18 | # Adapted fom 19 | # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 20 | for dir in path.split(os.pathsep): 21 | binpath = pjoin(dir, name) 22 | if os.path.exists(binpath): 23 | return os.path.abspath(binpath) 24 | return None 25 | 26 | 27 | def locate_cuda(): 28 | """Locate the CUDA environment on the system 29 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 30 | and values giving the absolute path to each directory. 31 | Starts by looking for the CUDAHOME env variable. If not found, everything 32 | is based on finding 'nvcc' in the PATH. 33 | """ 34 | 35 | # first check if the CUDAHOME env variable is in use 36 | if 'CUDAHOME' in os.environ: 37 | home = os.environ['CUDAHOME'] 38 | nvcc = pjoin(home, 'bin', 'nvcc') 39 | else: 40 | # otherwise, search the PATH for NVCC 41 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 42 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 43 | if nvcc is None: 44 | raise EnvironmentError('The nvcc binary could not be ' 45 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 46 | home = os.path.dirname(os.path.dirname(nvcc)) 47 | 48 | cudaconfig = {'home':home, 'nvcc':nvcc, 49 | 'include': pjoin(home, 'include'), 50 | 'lib64': pjoin(home, 'lib64')} 51 | for k, v in cudaconfig.items(): 52 | if not os.path.exists(v): 53 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 54 | 55 | return cudaconfig 56 | CUDA = locate_cuda() 57 | 58 | 59 | # Obtain the numpy include directory. This logic works across numpy versions. 60 | try: 61 | numpy_include = np.get_include() 62 | except AttributeError: 63 | numpy_include = np.get_numpy_include() 64 | 65 | 66 | def customize_compiler_for_nvcc(self): 67 | """inject deep into distutils to customize how the dispatch 68 | to gcc/nvcc works. 69 | If you subclass UnixCCompiler, it's not trivial to get your subclass 70 | injected in, and still have the right customizations (i.e. 71 | distutils.sysconfig.customize_compiler) run on it. So instead of going 72 | the OO route, I have this. Note, it's kindof like a wierd functional 73 | subclassing going on.""" 74 | 75 | # tell the compiler it can processes .cu 76 | self.src_extensions.append('.cu') 77 | 78 | # save references to the default compiler_so and _comple methods 79 | default_compiler_so = self.compiler_so 80 | super = self._compile 81 | 82 | # now redefine the _compile method. This gets executed for each 83 | # object but distutils doesn't have the ability to change compilers 84 | # based on source extension: we add it. 85 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 86 | if os.path.splitext(src)[1] == '.cu': 87 | # use the cuda for .cu files 88 | self.set_executable('compiler_so', CUDA['nvcc']) 89 | # use only a subset of the extra_postargs, which are 1-1 translated 90 | # from the extra_compile_args in the Extension class 91 | postargs = extra_postargs['nvcc'] 92 | else: 93 | postargs = extra_postargs['gcc'] 94 | 95 | super(obj, src, ext, cc_args, postargs, pp_opts) 96 | # reset the default compiler_so, which we might have changed for cuda 97 | self.compiler_so = default_compiler_so 98 | 99 | # inject our redefined _compile method into the class 100 | self._compile = _compile 101 | 102 | 103 | # run the customize_compiler 104 | class custom_build_ext(build_ext): 105 | def build_extensions(self): 106 | customize_compiler_for_nvcc(self.compiler) 107 | build_ext.build_extensions(self) 108 | 109 | 110 | ext_modules = [ 111 | Extension( 112 | "cpu_nms", 113 | ["cpu_nms.pyx"], 114 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 115 | include_dirs = [numpy_include] 116 | ), 117 | Extension('gpu_nms', 118 | ['nms_kernel.cu', 'gpu_nms.pyx'], 119 | library_dirs=[CUDA['lib64']], 120 | libraries=['cudart'], 121 | language='c++', 122 | runtime_library_dirs=[CUDA['lib64']], 123 | # this syntax is specific to this build system 124 | # we're only going to use certain compiler args with nvcc and not with 125 | # gcc the implementation of this trick is in customize_compiler() below 126 | extra_compile_args={'gcc': ["-Wno-unused-function"], 127 | 'nvcc': ['-arch=sm_35', 128 | '--ptxas-options=-v', 129 | '-c', 130 | '--compiler-options', 131 | "'-fPIC'"]}, 132 | include_dirs = [numpy_include, CUDA['include']] 133 | ), 134 | ] 135 | 136 | setup( 137 | name='nms', 138 | ext_modules=ext_modules, 139 | # inject our custom trigger 140 | cmdclass={'build_ext': custom_build_ext}, 141 | ) 142 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /lib/utils/transforms.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | # ------------------------------------------------------------------------------ 7 | # The SimDR and SA-SimDR part: 8 | # Written by Yanjie Li (lyj20@mails.tsinghua.edu.cn) 9 | # ------------------------------------------------------------------------------ 10 | 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import numpy as np 16 | import cv2 17 | 18 | 19 | def flip_back(output_flipped, matched_parts): 20 | ''' 21 | ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width) 22 | ''' 23 | assert output_flipped.ndim == 4,\ 24 | 'output_flipped should be [batch_size, num_joints, height, width]' 25 | 26 | output_flipped = output_flipped[:, :, :, ::-1] 27 | 28 | for pair in matched_parts: 29 | tmp = output_flipped[:, pair[0], :, :].copy() 30 | output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :] 31 | output_flipped[:, pair[1], :, :] = tmp 32 | 33 | return output_flipped 34 | 35 | def flip_back_simdr(output_flipped, matched_parts, type='x'): 36 | ''' 37 | ouput_flipped: numpy.ndarray(batch_size, num_joints, onehot) 38 | ''' 39 | assert output_flipped.ndim == 3,\ 40 | 'output_flipped should be [batch_size, num_joints, onehot]' 41 | 42 | if type == 'x': 43 | output_flipped = output_flipped[:, :, ::-1] 44 | 45 | for pair in matched_parts: 46 | tmp = output_flipped[:, pair[0], :].copy() 47 | output_flipped[:, pair[0], :] = output_flipped[:, pair[1], :] 48 | output_flipped[:, pair[1], :] = tmp 49 | 50 | return output_flipped 51 | 52 | 53 | def fliplr_joints(joints, joints_vis, width, matched_parts): 54 | """ 55 | flip coords 56 | """ 57 | # Flip horizontal 58 | joints[:, 0] = width - joints[:, 0] - 1 59 | 60 | # Change left-right parts 61 | for pair in matched_parts: 62 | joints[pair[0], :], joints[pair[1], :] = \ 63 | joints[pair[1], :], joints[pair[0], :].copy() 64 | joints_vis[pair[0], :], joints_vis[pair[1], :] = \ 65 | joints_vis[pair[1], :], joints_vis[pair[0], :].copy() 66 | 67 | return joints*joints_vis, joints_vis 68 | 69 | 70 | def transform_preds(coords, center, scale, output_size): 71 | target_coords = np.zeros(coords.shape) 72 | trans = get_affine_transform(center, scale, 0, output_size, inv=1) 73 | for p in range(coords.shape[0]): 74 | target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) 75 | return target_coords 76 | 77 | 78 | def get_affine_transform( 79 | center, scale, rot, output_size, 80 | shift=np.array([0, 0], dtype=np.float32), inv=0 81 | ): 82 | if not isinstance(scale, np.ndarray) and not isinstance(scale, list): 83 | print(scale) 84 | scale = np.array([scale, scale]) 85 | 86 | scale_tmp = scale * 200.0 87 | src_w = scale_tmp[0] 88 | dst_w = output_size[0] 89 | dst_h = output_size[1] 90 | 91 | rot_rad = np.pi * rot / 180 92 | src_dir = get_dir([0, src_w * -0.5], rot_rad) 93 | dst_dir = np.array([0, dst_w * -0.5], np.float32) 94 | 95 | src = np.zeros((3, 2), dtype=np.float32) 96 | dst = np.zeros((3, 2), dtype=np.float32) 97 | src[0, :] = center + scale_tmp * shift 98 | src[1, :] = center + src_dir + scale_tmp * shift 99 | dst[0, :] = [dst_w * 0.5, dst_h * 0.5] 100 | dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir 101 | 102 | src[2:, :] = get_3rd_point(src[0, :], src[1, :]) 103 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) 104 | 105 | if inv: 106 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) 107 | else: 108 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) 109 | 110 | return trans 111 | 112 | 113 | def affine_transform(pt, t): 114 | new_pt = np.array([pt[0], pt[1], 1.]).T 115 | new_pt = np.dot(t, new_pt) 116 | return new_pt[:2] 117 | 118 | 119 | def get_3rd_point(a, b): 120 | direct = a - b 121 | return b + np.array([-direct[1], direct[0]], dtype=np.float32) 122 | 123 | 124 | def get_dir(src_point, rot_rad): 125 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 126 | 127 | src_result = [0, 0] 128 | src_result[0] = src_point[0] * cs - src_point[1] * sn 129 | src_result[1] = src_point[0] * sn + src_point[1] * cs 130 | 131 | return src_result 132 | 133 | 134 | def crop(img, center, scale, output_size, rot=0): 135 | trans = get_affine_transform(center, scale, rot, output_size) 136 | 137 | dst_img = cv2.warpAffine( 138 | img, trans, (int(output_size[0]), int(output_size[1])), 139 | flags=cv2.INTER_LINEAR 140 | ) 141 | 142 | return dst_img 143 | -------------------------------------------------------------------------------- /lib/utils/utils.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import logging 13 | import time 14 | from collections import namedtuple 15 | from pathlib import Path 16 | 17 | import torch 18 | import torch.optim as optim 19 | import torch.nn as nn 20 | 21 | 22 | def create_logger(cfg, cfg_name, phase='train'): 23 | root_output_dir = Path(cfg.OUTPUT_DIR) 24 | # set up logger 25 | if not root_output_dir.exists(): 26 | print('=> creating {}'.format(root_output_dir)) 27 | root_output_dir.mkdir() 28 | 29 | dataset = cfg.DATASET.DATASET + '_' + cfg.DATASET.HYBRID_JOINTS_TYPE \ 30 | if cfg.DATASET.HYBRID_JOINTS_TYPE else cfg.DATASET.DATASET 31 | dataset = dataset.replace(':', '_') 32 | model = cfg.MODEL.NAME 33 | cfg_name = os.path.basename(cfg_name).split('.')[0] 34 | 35 | final_output_dir = root_output_dir / dataset / model / cfg_name 36 | 37 | print('=> creating {}'.format(final_output_dir)) 38 | final_output_dir.mkdir(parents=True, exist_ok=True) 39 | 40 | time_str = time.strftime('%Y-%m-%d-%H-%M') 41 | log_file = '{}_{}_{}.log'.format(cfg_name, time_str, phase) 42 | final_log_file = final_output_dir / log_file 43 | head = '%(asctime)-15s %(message)s' 44 | logging.basicConfig(filename=str(final_log_file), 45 | format=head) 46 | logger = logging.getLogger() 47 | logger.setLevel(logging.INFO) 48 | console = logging.StreamHandler() 49 | logging.getLogger('').addHandler(console) 50 | 51 | tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / \ 52 | (cfg_name + '_' + time_str) 53 | 54 | print('=> creating {}'.format(tensorboard_log_dir)) 55 | tensorboard_log_dir.mkdir(parents=True, exist_ok=True) 56 | 57 | return logger, str(final_output_dir), str(tensorboard_log_dir) 58 | 59 | 60 | def get_optimizer(cfg, model): 61 | optimizer = None 62 | if cfg.TRAIN.OPTIMIZER == 'sgd': 63 | optimizer = optim.SGD( 64 | model.parameters(), 65 | lr=cfg.TRAIN.LR, 66 | momentum=cfg.TRAIN.MOMENTUM, 67 | weight_decay=cfg.TRAIN.WD, 68 | nesterov=cfg.TRAIN.NESTEROV 69 | ) 70 | elif cfg.TRAIN.OPTIMIZER == 'adam': 71 | optimizer = optim.Adam( 72 | model.parameters(), 73 | lr=cfg.TRAIN.LR 74 | ) 75 | 76 | return optimizer 77 | 78 | 79 | def save_checkpoint(states, is_best, output_dir, 80 | filename='checkpoint.pth'): 81 | torch.save(states, os.path.join(output_dir, filename)) 82 | if is_best and 'state_dict' in states: 83 | torch.save(states['best_state_dict'], 84 | os.path.join(output_dir, 'model_best.pth')) 85 | 86 | 87 | def get_model_summary(model, *input_tensors, item_length=26, verbose=False): 88 | """ 89 | :param model: 90 | :param input_tensors: 91 | :param item_length: 92 | :return: 93 | """ 94 | 95 | summary = [] 96 | 97 | ModuleDetails = namedtuple( 98 | "Layer", ["name", "input_size", "output_size", "num_parameters", "multiply_adds"]) 99 | hooks = [] 100 | layer_instances = {} 101 | 102 | def add_hooks(module): 103 | 104 | def hook(module, input, output): 105 | class_name = str(module.__class__.__name__) 106 | 107 | instance_index = 1 108 | if class_name not in layer_instances: 109 | layer_instances[class_name] = instance_index 110 | else: 111 | instance_index = layer_instances[class_name] + 1 112 | layer_instances[class_name] = instance_index 113 | 114 | layer_name = class_name + "_" + str(instance_index) 115 | 116 | params = 0 117 | 118 | if class_name.find("Conv") != -1 or class_name.find("BatchNorm") != -1 or \ 119 | class_name.find("Linear") != -1: 120 | for param_ in module.parameters(): 121 | params += param_.view(-1).size(0) 122 | 123 | flops = "Not Available" 124 | if class_name.find("Conv") != -1 and hasattr(module, "weight"): 125 | flops = ( 126 | torch.prod( 127 | torch.LongTensor(list(module.weight.data.size()))) * 128 | torch.prod( 129 | torch.LongTensor(list(output.size())[2:]))).item() 130 | elif isinstance(module, nn.Linear): 131 | flops = (torch.prod(torch.LongTensor(list(output.size()))) \ 132 | * input[0].size(1)).item() 133 | 134 | if isinstance(input[0], list): 135 | input = input[0] 136 | if isinstance(output, list): 137 | output = output[0] 138 | 139 | summary.append( 140 | ModuleDetails( 141 | name=layer_name, 142 | input_size=list(input[0].size()), 143 | output_size=list(output.size()), 144 | num_parameters=params, 145 | multiply_adds=flops) 146 | ) 147 | 148 | if not isinstance(module, nn.ModuleList) \ 149 | and not isinstance(module, nn.Sequential) \ 150 | and module != model: 151 | hooks.append(module.register_forward_hook(hook)) 152 | 153 | model.eval() 154 | model.apply(add_hooks) 155 | 156 | space_len = item_length 157 | 158 | model(*input_tensors) 159 | for hook in hooks: 160 | hook.remove() 161 | 162 | details = '' 163 | if verbose: 164 | details = "Model Summary" + \ 165 | os.linesep + \ 166 | "Name{}Input Size{}Output Size{}Parameters{}Multiply Adds (Flops){}".format( 167 | ' ' * (space_len - len("Name")), 168 | ' ' * (space_len - len("Input Size")), 169 | ' ' * (space_len - len("Output Size")), 170 | ' ' * (space_len - len("Parameters")), 171 | ' ' * (space_len - len("Multiply Adds (Flops)"))) \ 172 | + os.linesep + '-' * space_len * 5 + os.linesep 173 | 174 | params_sum = 0 175 | flops_sum = 0 176 | for layer in summary: 177 | params_sum += layer.num_parameters 178 | if layer.multiply_adds != "Not Available": 179 | flops_sum += layer.multiply_adds 180 | if verbose: 181 | details += "{}{}{}{}{}{}{}{}{}{}".format( 182 | layer.name, 183 | ' ' * (space_len - len(layer.name)), 184 | layer.input_size, 185 | ' ' * (space_len - len(str(layer.input_size))), 186 | layer.output_size, 187 | ' ' * (space_len - len(str(layer.output_size))), 188 | layer.num_parameters, 189 | ' ' * (space_len - len(str(layer.num_parameters))), 190 | layer.multiply_adds, 191 | ' ' * (space_len - len(str(layer.multiply_adds)))) \ 192 | + os.linesep + '-' * space_len * 5 + os.linesep 193 | 194 | details += os.linesep \ 195 | + "Total Parameters: {:,}".format(params_sum) \ 196 | + os.linesep + '-' * space_len * 5 + os.linesep 197 | details += "Total Multiply Adds (For Convolution and Linear Layers only): {:,} GFLOPs".format(flops_sum/(1024**3)) \ 198 | + os.linesep + '-' * space_len * 5 + os.linesep 199 | details += "Number of Layers" + os.linesep 200 | for layer in layer_instances: 201 | details += "{} : {} layers ".format(layer, layer_instances[layer]) 202 | 203 | return details 204 | -------------------------------------------------------------------------------- /lib/utils/vis.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import math 12 | 13 | import numpy as np 14 | import torchvision 15 | import cv2 16 | 17 | from core.inference import get_max_preds 18 | 19 | 20 | def save_batch_image_with_joints(batch_image, batch_joints, batch_joints_vis, 21 | file_name, nrow=8, padding=2): 22 | ''' 23 | batch_image: [batch_size, channel, height, width] 24 | batch_joints: [batch_size, num_joints, 3], 25 | batch_joints_vis: [batch_size, num_joints, 1], 26 | } 27 | ''' 28 | grid = torchvision.utils.make_grid(batch_image, nrow, padding, True) 29 | ndarr = grid.mul(255).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy() 30 | ndarr = ndarr.copy() 31 | 32 | nmaps = batch_image.size(0) 33 | xmaps = min(nrow, nmaps) 34 | ymaps = int(math.ceil(float(nmaps) / xmaps)) 35 | height = int(batch_image.size(2) + padding) 36 | width = int(batch_image.size(3) + padding) 37 | k = 0 38 | for y in range(ymaps): 39 | for x in range(xmaps): 40 | if k >= nmaps: 41 | break 42 | joints = batch_joints[k] 43 | joints_vis = batch_joints_vis[k] 44 | 45 | for joint, joint_vis in zip(joints, joints_vis): 46 | joint[0] = x * width + padding + joint[0] 47 | joint[1] = y * height + padding + joint[1] 48 | if joint_vis[0]: 49 | cv2.circle(ndarr, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2) 50 | k = k + 1 51 | cv2.imwrite(file_name, ndarr) 52 | 53 | 54 | def save_batch_heatmaps(batch_image, batch_heatmaps, file_name, 55 | normalize=True): 56 | ''' 57 | batch_image: [batch_size, channel, height, width] 58 | batch_heatmaps: ['batch_size, num_joints, height, width] 59 | file_name: saved file name 60 | ''' 61 | if normalize: 62 | batch_image = batch_image.clone() 63 | min = float(batch_image.min()) 64 | max = float(batch_image.max()) 65 | 66 | batch_image.add_(-min).div_(max - min + 1e-5) 67 | 68 | batch_size = batch_heatmaps.size(0) 69 | num_joints = batch_heatmaps.size(1) 70 | heatmap_height = batch_heatmaps.size(2) 71 | heatmap_width = batch_heatmaps.size(3) 72 | 73 | grid_image = np.zeros((batch_size*heatmap_height, 74 | (num_joints+1)*heatmap_width, 75 | 3), 76 | dtype=np.uint8) 77 | 78 | preds, maxvals = get_max_preds(batch_heatmaps.detach().cpu().numpy()) 79 | 80 | for i in range(batch_size): 81 | image = batch_image[i].mul(255)\ 82 | .clamp(0, 255)\ 83 | .byte()\ 84 | .permute(1, 2, 0)\ 85 | .cpu().numpy() 86 | heatmaps = batch_heatmaps[i].mul(255)\ 87 | .clamp(0, 255)\ 88 | .byte()\ 89 | .cpu().numpy() 90 | 91 | resized_image = cv2.resize(image, 92 | (int(heatmap_width), int(heatmap_height))) 93 | 94 | height_begin = heatmap_height * i 95 | height_end = heatmap_height * (i + 1) 96 | for j in range(num_joints): 97 | cv2.circle(resized_image, 98 | (int(preds[i][j][0]), int(preds[i][j][1])), 99 | 1, [0, 0, 255], 1) 100 | heatmap = heatmaps[j, :, :] 101 | colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) 102 | masked_image = colored_heatmap*0.7 + resized_image*0.3 103 | cv2.circle(masked_image, 104 | (int(preds[i][j][0]), int(preds[i][j][1])), 105 | 1, [0, 0, 255], 1) 106 | 107 | width_begin = heatmap_width * (j+1) 108 | width_end = heatmap_width * (j+2) 109 | grid_image[height_begin:height_end, width_begin:width_end, :] = \ 110 | masked_image 111 | # grid_image[height_begin:height_end, width_begin:width_end, :] = \ 112 | # colored_heatmap*0.7 + resized_image*0.3 113 | 114 | grid_image[height_begin:height_end, 0:heatmap_width, :] = resized_image 115 | 116 | cv2.imwrite(file_name, grid_image) 117 | 118 | 119 | def save_debug_images(config, input, meta, target, joints_pred, output, 120 | prefix): 121 | if not config.DEBUG.DEBUG: 122 | return 123 | 124 | if config.DEBUG.SAVE_BATCH_IMAGES_GT: 125 | save_batch_image_with_joints( 126 | input, meta['joints'], meta['joints_vis'], 127 | '{}_gt.jpg'.format(prefix) 128 | ) 129 | if config.DEBUG.SAVE_BATCH_IMAGES_PRED: 130 | save_batch_image_with_joints( 131 | input, joints_pred, meta['joints_vis'], 132 | '{}_pred.jpg'.format(prefix) 133 | ) 134 | # if config.DEBUG.SAVE_HEATMAPS_GT: 135 | # save_batch_heatmaps( 136 | # input, target, '{}_hm_gt.jpg'.format(prefix) 137 | # ) 138 | # if config.DEBUG.SAVE_HEATMAPS_PRED: 139 | # save_batch_heatmaps( 140 | # input, output, '{}_hm_pred.jpg'.format(prefix) 141 | # ) 142 | -------------------------------------------------------------------------------- /lib/utils/zipreader.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import zipfile 13 | import xml.etree.ElementTree as ET 14 | 15 | import cv2 16 | import numpy as np 17 | 18 | _im_zfile = [] 19 | _xml_path_zip = [] 20 | _xml_zfile = [] 21 | 22 | 23 | def imread(filename, flags=cv2.IMREAD_COLOR): 24 | global _im_zfile 25 | path = filename 26 | pos_at = path.index('@') 27 | if pos_at == -1: 28 | print("character '@' is not found from the given path '%s'"%(path)) 29 | assert 0 30 | path_zip = path[0: pos_at] 31 | path_img = path[pos_at + 2:] 32 | if not os.path.isfile(path_zip): 33 | print("zip file '%s' is not found"%(path_zip)) 34 | assert 0 35 | for i in range(len(_im_zfile)): 36 | if _im_zfile[i]['path'] == path_zip: 37 | data = _im_zfile[i]['zipfile'].read(path_img) 38 | return cv2.imdecode(np.frombuffer(data, np.uint8), flags) 39 | 40 | _im_zfile.append({ 41 | 'path': path_zip, 42 | 'zipfile': zipfile.ZipFile(path_zip, 'r') 43 | }) 44 | data = _im_zfile[-1]['zipfile'].read(path_img) 45 | 46 | return cv2.imdecode(np.frombuffer(data, np.uint8), flags) 47 | 48 | 49 | def xmlread(filename): 50 | global _xml_path_zip 51 | global _xml_zfile 52 | path = filename 53 | pos_at = path.index('@') 54 | if pos_at == -1: 55 | print("character '@' is not found from the given path '%s'"%(path)) 56 | assert 0 57 | path_zip = path[0: pos_at] 58 | path_xml = path[pos_at + 2:] 59 | if not os.path.isfile(path_zip): 60 | print("zip file '%s' is not found"%(path_zip)) 61 | assert 0 62 | for i in xrange(len(_xml_path_zip)): 63 | if _xml_path_zip[i] == path_zip: 64 | data = _xml_zfile[i].open(path_xml) 65 | return ET.fromstring(data.read()) 66 | _xml_path_zip.append(path_zip) 67 | print("read new xml file '%s'"%(path_zip)) 68 | _xml_zfile.append(zipfile.ZipFile(path_zip, 'r')) 69 | data = _xml_zfile[-1].open(path_xml) 70 | return ET.fromstring(data.read()) 71 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | EasyDict==1.7 2 | opencv-python==3.4.1.15 3 | shapely==1.6.4 4 | Cython 5 | scipy 6 | pandas 7 | pyyaml 8 | json_tricks 9 | scikit-image 10 | yacs>=0.1.5 11 | tensorboardX==1.6 12 | einops==0.3.0 13 | -------------------------------------------------------------------------------- /tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # pose.pytorch 3 | # Copyright (c) 2018-present Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import os.path as osp 13 | import sys 14 | 15 | 16 | def add_path(path): 17 | if path not in sys.path: 18 | sys.path.insert(0, path) 19 | 20 | 21 | this_dir = osp.dirname(__file__) 22 | 23 | lib_path = osp.join(this_dir, '..', 'lib') 24 | add_path(lib_path) 25 | 26 | mm_path = osp.join(this_dir, '..', 'lib/poseeval/py-motmetrics') 27 | add_path(mm_path) 28 | -------------------------------------------------------------------------------- /tools/test.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # pose.pytorch 3 | # Copyright (c) 2018-present Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 6 | # ------------------------------------------------------------------------------ 7 | # ------------------------------------------------------------------------------ 8 | # The SimDR and SA-SimDR part: 9 | # Written by Yanjie Li (lyj20@mails.tsinghua.edu.cn) 10 | # ------------------------------------------------------------------------------ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import argparse 16 | import os 17 | import pprint 18 | 19 | import torch 20 | import torch.nn.parallel 21 | import torch.backends.cudnn as cudnn 22 | import torch.optim 23 | import torch.utils.data 24 | import torch.utils.data.distributed 25 | import torchvision.transforms as transforms 26 | 27 | import _init_paths 28 | from config import cfg 29 | from config import update_config 30 | from core.loss import JointsMSELoss, NMTCritierion, NMTNORMCritierion, KLDiscretLoss 31 | from core.function import validate_heatmap, validate_simdr, validate_sa_simdr 32 | from utils.utils import create_logger 33 | 34 | import dataset 35 | import models 36 | 37 | 38 | def parse_args(): 39 | parser = argparse.ArgumentParser(description='Train keypoints network') 40 | # general 41 | parser.add_argument('--cfg', 42 | help='experiment configure file name', 43 | required=True, 44 | type=str) 45 | 46 | parser.add_argument('opts', 47 | help="Modify config options using the command-line", 48 | default=None, 49 | nargs=argparse.REMAINDER) 50 | 51 | parser.add_argument('--modelDir', 52 | help='model directory', 53 | type=str, 54 | default='') 55 | parser.add_argument('--logDir', 56 | help='log directory', 57 | type=str, 58 | default='') 59 | parser.add_argument('--dataDir', 60 | help='data directory', 61 | type=str, 62 | default='') 63 | parser.add_argument('--prevModelDir', 64 | help='prev Model directory', 65 | type=str, 66 | default='') 67 | 68 | args = parser.parse_args() 69 | return args 70 | 71 | 72 | def main(): 73 | args = parse_args() 74 | update_config(cfg, args) 75 | 76 | logger, final_output_dir, tb_log_dir = create_logger( 77 | cfg, args.cfg, 'valid') 78 | 79 | logger.info(pprint.pformat(args)) 80 | logger.info(cfg) 81 | 82 | # cudnn related setting 83 | cudnn.benchmark = cfg.CUDNN.BENCHMARK 84 | torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC 85 | torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED 86 | 87 | model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( 88 | cfg, is_train=False 89 | ) 90 | 91 | if cfg.TEST.MODEL_FILE: 92 | logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) 93 | model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False) 94 | else: 95 | model_state_file = os.path.join( 96 | final_output_dir, 'model_best.pth' 97 | ) 98 | logger.info('=> loading model from {}'.format(model_state_file)) 99 | model.load_state_dict(torch.load(model_state_file)) 100 | 101 | model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() 102 | 103 | # define loss function (criterion) and optimizer 104 | if cfg.LOSS.TYPE == 'JointsMSELoss': 105 | criterion = JointsMSELoss( 106 | use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT 107 | ).cuda() 108 | elif cfg.LOSS.TYPE == 'NMTCritierion': 109 | criterion = NMTCritierion(label_smoothing=cfg.LOSS.LABEL_SMOOTHING).cuda() 110 | elif cfg.LOSS.TYPE == 'NMTNORMCritierion': 111 | criterion = NMTNORMCritierion(label_smoothing=cfg.LOSS.LABEL_SMOOTHING).cuda() 112 | elif cfg.LOSS.TYPE == 'KLDiscretLoss': 113 | criterion = KLDiscretLoss().cuda() 114 | else: 115 | criterion = L1JointLocationLoss().cuda() 116 | 117 | # Data loading code 118 | normalize = transforms.Normalize( 119 | mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] 120 | ) 121 | 122 | valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)( 123 | cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, 124 | transforms.Compose([ 125 | transforms.ToTensor(), 126 | normalize, 127 | ]), 128 | cfg.MODEL.COORD_REPRESENTATION, 129 | cfg.MODEL.SIMDR_SPLIT_RATIO 130 | ) 131 | 132 | valid_loader = torch.utils.data.DataLoader( 133 | valid_dataset, 134 | batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS), 135 | shuffle=False, 136 | num_workers=cfg.WORKERS, 137 | pin_memory=True 138 | ) 139 | 140 | # evaluate on validation set 141 | if cfg.MODEL.COORD_REPRESENTATION == 'simdr': 142 | validate_simdr( 143 | cfg, valid_loader, valid_dataset, model, criterion, 144 | final_output_dir, tb_log_dir) 145 | elif cfg.MODEL.COORD_REPRESENTATION == 'sa-simdr': 146 | validate_sa_simdr( 147 | cfg, valid_loader, valid_dataset, model, criterion, 148 | final_output_dir, tb_log_dir) 149 | elif cfg.MODEL.COORD_REPRESENTATION == 'heatmap': 150 | validate_heatmap(cfg, valid_loader, valid_dataset, model, criterion, 151 | final_output_dir, tb_log_dir) 152 | 153 | 154 | if __name__ == '__main__': 155 | main() 156 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | # ------------------------------------------------------------------------------ 7 | # The SimDR and SA-SimDR part: 8 | # Written by Yanjie Li (lyj20@mails.tsinghua.edu.cn) 9 | # ------------------------------------------------------------------------------ 10 | from __future__ import absolute_import 11 | from __future__ import division 12 | from __future__ import print_function 13 | 14 | import argparse 15 | import os 16 | import pprint 17 | import shutil 18 | 19 | import torch 20 | import torch.nn.parallel 21 | import torch.backends.cudnn as cudnn 22 | import torch.optim 23 | import torch.utils.data 24 | import torch.utils.data.distributed 25 | import torchvision.transforms as transforms 26 | from tensorboardX import SummaryWriter 27 | 28 | import _init_paths 29 | from config import cfg 30 | from config import update_config 31 | from core.loss import JointsMSELoss, NMTCritierion, NMTNORMCritierion, KLDiscretLoss 32 | from core.function import train_heatmap, train_simdr, train_sa_simdr 33 | from core.function import validate_heatmap, validate_simdr, validate_sa_simdr 34 | from utils.utils import get_optimizer 35 | from utils.utils import save_checkpoint 36 | from utils.utils import create_logger 37 | from utils.utils import get_model_summary 38 | 39 | import dataset 40 | import models 41 | 42 | 43 | def parse_args(): 44 | parser = argparse.ArgumentParser(description='Train keypoints network') 45 | # general 46 | parser.add_argument('--cfg', 47 | help='experiment configure file name', 48 | required=True, 49 | type=str) 50 | 51 | parser.add_argument('opts', 52 | help="Modify config options using the command-line", 53 | default=None, 54 | nargs=argparse.REMAINDER) 55 | 56 | # philly 57 | parser.add_argument('--modelDir', 58 | help='model directory', 59 | type=str, 60 | default='') 61 | parser.add_argument('--logDir', 62 | help='log directory', 63 | type=str, 64 | default='') 65 | parser.add_argument('--dataDir', 66 | help='data directory', 67 | type=str, 68 | default='') 69 | parser.add_argument('--prevModelDir', 70 | help='prev Model directory', 71 | type=str, 72 | default='') 73 | 74 | args = parser.parse_args() 75 | 76 | return args 77 | 78 | 79 | def main(): 80 | args = parse_args() 81 | update_config(cfg, args) 82 | 83 | logger, final_output_dir, tb_log_dir = create_logger( 84 | cfg, args.cfg, 'train') 85 | 86 | logger.info(pprint.pformat(args)) 87 | logger.info(cfg) 88 | 89 | # cudnn related setting 90 | cudnn.benchmark = cfg.CUDNN.BENCHMARK 91 | torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC 92 | torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED 93 | 94 | model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( 95 | cfg, is_train=True 96 | ) 97 | 98 | # copy model file 99 | this_dir = os.path.dirname(__file__) 100 | shutil.copy2( 101 | os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), 102 | final_output_dir) 103 | # logger.info(pprint.pformat(model)) 104 | 105 | writer_dict = { 106 | 'writer': SummaryWriter(log_dir=tb_log_dir), 107 | 'train_global_steps': 0, 108 | 'valid_global_steps': 0, 109 | } 110 | 111 | dump_input = torch.rand( 112 | (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]) 113 | ) 114 | # writer_dict['writer'].add_graph(model, (dump_input, )) 115 | 116 | # logger.info(get_model_summary(model, dump_input)) 117 | 118 | # count parameter number 119 | pytorch_total_params = sum(p.numel() for p in model.parameters()) 120 | logger.info("Total number of parameters: %d" % pytorch_total_params) 121 | 122 | model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() 123 | 124 | # define loss function (criterion) and optimizer 125 | if cfg.LOSS.TYPE == 'JointsMSELoss': 126 | criterion = JointsMSELoss( 127 | use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT 128 | ).cuda() 129 | elif cfg.LOSS.TYPE == 'NMTCritierion': 130 | criterion = NMTCritierion(label_smoothing=cfg.LOSS.LABEL_SMOOTHING).cuda() 131 | elif cfg.LOSS.TYPE == 'NMTNORMCritierion': 132 | criterion = NMTNORMCritierion(label_smoothing=cfg.LOSS.LABEL_SMOOTHING).cuda() 133 | elif cfg.LOSS.TYPE == 'KLDiscretLoss': 134 | criterion = KLDiscretLoss().cuda() 135 | else: 136 | criterion = L1JointLocationLoss().cuda() 137 | 138 | # Data loading code 139 | normalize = transforms.Normalize( 140 | mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] 141 | ) 142 | train_dataset = eval('dataset.'+cfg.DATASET.DATASET)( 143 | cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, 144 | transforms.Compose([ 145 | transforms.ToTensor(), 146 | normalize, 147 | ]), 148 | cfg.MODEL.COORD_REPRESENTATION, 149 | cfg.MODEL.SIMDR_SPLIT_RATIO 150 | ) 151 | valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)( 152 | cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, 153 | transforms.Compose([ 154 | transforms.ToTensor(), 155 | normalize, 156 | ]), 157 | cfg.MODEL.COORD_REPRESENTATION, 158 | cfg.MODEL.SIMDR_SPLIT_RATIO 159 | ) 160 | 161 | train_loader = torch.utils.data.DataLoader( 162 | train_dataset, 163 | batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS), 164 | shuffle=cfg.TRAIN.SHUFFLE, 165 | num_workers=cfg.WORKERS, 166 | pin_memory=cfg.PIN_MEMORY 167 | ) 168 | valid_loader = torch.utils.data.DataLoader( 169 | valid_dataset, 170 | batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS), 171 | shuffle=False, 172 | num_workers=cfg.WORKERS, 173 | pin_memory=cfg.PIN_MEMORY 174 | ) 175 | 176 | best_perf = 0.0 177 | best_model = False 178 | last_epoch = -1 179 | optimizer = get_optimizer(cfg, model) 180 | begin_epoch = cfg.TRAIN.BEGIN_EPOCH 181 | checkpoint_file = os.path.join( 182 | final_output_dir, 'checkpoint.pth' 183 | ) 184 | 185 | if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): 186 | logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) 187 | checkpoint = torch.load(checkpoint_file) 188 | begin_epoch = checkpoint['epoch'] 189 | best_perf = checkpoint['perf'] 190 | last_epoch = checkpoint['epoch'] 191 | model.load_state_dict(checkpoint['state_dict']) 192 | 193 | optimizer.load_state_dict(checkpoint['optimizer']) 194 | logger.info("=> loaded checkpoint '{}' (epoch {})".format( 195 | checkpoint_file, checkpoint['epoch'])) 196 | 197 | lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( 198 | optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, 199 | last_epoch=last_epoch 200 | ) 201 | 202 | for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): 203 | lr_scheduler.step() 204 | 205 | if cfg.MODEL.COORD_REPRESENTATION == 'simdr': 206 | train_simdr(cfg, train_loader, model, criterion, optimizer, epoch, 207 | final_output_dir, tb_log_dir, writer_dict) 208 | 209 | perf_indicator = validate_simdr( 210 | cfg, valid_loader, valid_dataset, model, criterion, 211 | final_output_dir, tb_log_dir, writer_dict) 212 | elif cfg.MODEL.COORD_REPRESENTATION == 'sa-simdr': 213 | train_sa_simdr(cfg, train_loader, model, criterion, optimizer, epoch, 214 | final_output_dir, tb_log_dir, writer_dict) 215 | 216 | perf_indicator = validate_sa_simdr( 217 | cfg, valid_loader, valid_dataset, model, criterion, 218 | final_output_dir, tb_log_dir, writer_dict) 219 | elif cfg.MODEL.COORD_REPRESENTATION == 'heatmap': 220 | train_heatmap(cfg, train_loader, model, criterion, optimizer, epoch, 221 | final_output_dir, tb_log_dir, writer_dict) 222 | 223 | perf_indicator = validate_heatmap( 224 | cfg, valid_loader, valid_dataset, model, criterion, 225 | final_output_dir, tb_log_dir, writer_dict 226 | ) 227 | 228 | 229 | if perf_indicator >= best_perf: 230 | best_perf = perf_indicator 231 | best_model = True 232 | else: 233 | best_model = False 234 | 235 | logger.info('=> saving checkpoint to {}'.format(final_output_dir)) 236 | save_checkpoint({ 237 | 'epoch': epoch + 1, 238 | 'model': cfg.MODEL.NAME, 239 | 'state_dict': model.state_dict(), 240 | 'best_state_dict': model.module.state_dict(), 241 | 'perf': perf_indicator, 242 | 'optimizer': optimizer.state_dict(), 243 | }, best_model, final_output_dir) 244 | 245 | final_model_state_file = os.path.join( 246 | final_output_dir, 'final_state.pth' 247 | ) 248 | logger.info('=> saving final model state to {}'.format( 249 | final_model_state_file) 250 | ) 251 | torch.save(model.module.state_dict(), final_model_state_file) 252 | writer_dict['writer'].close() 253 | 254 | 255 | if __name__ == '__main__': 256 | main() 257 | --------------------------------------------------------------------------------