├── .gitignore ├── LICENSE ├── README.md ├── experiments ├── coco │ └── resnet │ │ └── res50_256x192_d256x3_adam_lr1e-3_advmix.yaml └── mpii │ ├── hrnet │ └── w32_256x256_adam_lr1e-3_advmix.yaml │ └── resnet │ └── res50_256x256_d256x3_adam_lr1e-3_advmix.yaml ├── figures ├── AdvMix.jpg ├── Qualitative.png ├── benchmarking_results.png └── image_corruption.png ├── lib ├── Makefile ├── config │ ├── __init__.py │ ├── default.py │ └── models.py ├── core │ ├── evaluate.py │ ├── function.py │ ├── inference.py │ └── loss.py ├── dataset │ ├── JointsDataset.py │ ├── __init__.py │ ├── advaug.py │ ├── coco.py │ └── mpii.py ├── models │ ├── Unet_generator.py │ ├── __init__.py │ ├── pose_hrnet.py │ └── pose_resnet.py ├── nms │ ├── __init__.py │ ├── cpu_nms.c │ ├── cpu_nms.pyx │ ├── gpu_nms.cpp │ ├── gpu_nms.cu │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── nms.py │ ├── nms_kernel.cu │ └── setup_linux.py └── utils │ ├── __init__.py │ ├── transforms.py │ ├── utils.py │ ├── vis.py │ └── zipreader.py ├── requirements.txt ├── scripts ├── make_datasets.sh ├── test.sh └── train.sh └── tools ├── _init_parse.py ├── _init_paths.py ├── make_datasets.py ├── test_corruption.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | # mypy 2 | .mypy_cache/ 3 | .vscode 4 | *.idea* 5 | 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | *.pyc 11 | */__pycache__/ 12 | 13 | 14 | # ckpt 15 | /data/* 16 | /models/* 17 | /output/* 18 | /output_robustness/* 19 | /log/* 20 | # debug 21 | /scripts/debug.sh 22 | /scripts/debug_test.sh 23 | /scripts/make_datasets_local.sh 24 | /experiments/debug.yaml 25 | /tools/make_datasets_local.py 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Jiahang Wang, Sheng Jin, Wentao Liu, Weizhong Liu, Chen Qian, Ping Luo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AdvMix 2 | Official code for our CVPR 2021 paper: ["When Human Pose Estimation Meets Robustness: Adversarial Algorithms and Benchmarks"](https://arxiv.org/abs/2105.06152). 3 | 4 | ## Getting started 5 | * Installation 6 | ``` 7 | # clone this repo 8 | git clone https://github.com/AIprogrammer/AdvMix 9 | # install dependencies 10 | pip install -r requirements 11 | # make nms 12 | cd AdvMix 13 | cd lib 14 | make 15 | # install cocoapi 16 | # COCOAPI=/path/to/clone/cocoapi 17 | git clone https://github.com/cocodataset/cocoapi.git $COCOAPI 18 | cd $COCOAPI/PythonAPI 19 | # Install into global site-packages 20 | make install 21 | # Alternatively, if you do not have permissions or prefer 22 | # not to install the COCO API into global site-packages 23 | python3 setup.py install --user 24 | ``` 25 | 26 | * Download the datasets [COCO](https://cocodataset.org/), [MPII](http://human-pose.mpi-inf.mpg.de/), and [OCHuman](https://github.com/liruilong940607/OCHumanApi). Put them under "./data". The directory structure follows [HRNet](https://github.com/leoxiaobin/deep-high-resolution-net.pytorch). 27 | 28 | ## Benchmarking 29 | ### Contruct benchmarking datasets 30 | ``` 31 | sh scripts/make_datasets.sh 32 | ``` 33 | ### Visualization examples 34 | ![benchmark_dataset](./figures/image_corruption.png) 35 | ### Benchmark results 36 | ![benchmark_results](./figures/benchmarking_results.png) 37 | 38 | **Note: There may be small gap between the results by [Evaluation](#Evaluation) and results in our paper due to randomness of operations in package 'imagecorruptions'.** 39 | 40 | ## AdvMix 41 | ![AdvMix](./figures/AdvMix.jpg) 42 | ### Training 43 | 44 | * MPII 45 | ``` 46 | sh scripts/train.sh mpii 47 | ``` 48 | * COCO 49 | ``` 50 | sh scripts/train.sh coco 51 | ``` 52 | 53 | ### Evaluation 54 | ``` 55 | sh scripts/test.sh coco 56 | sh scripts/test.sh mpii 57 | ``` 58 | 59 | ### Quantitative results 60 | | Method | Arch | Input size | AP* | mPC | rPC | 61 | |----------|--------------------|------------|--------|--------|-------| 62 | | Standard | ResNet_50 | 256x192 | 70.4 | 47.8 | 67.9 | 63 | | AdvMix | ResNet_50 | 256x192 | 70.1 | **50.1** | **71.5** | 64 | | Standard | ResNet_101 | 256x192 | 71.4 | 49.6 | 69.5 | 65 | | AdvMix | ResNet_101 | 256x192 | 71.3 | **52.3** | **73.3** | 66 | | Standard | ResNet_152 | 256x192 | 72.0 | 50.9 | 70.7 | 67 | | AdvMix | ResNet_152 | 256x192 | 72.3 | **53.2** | **73.6** | 68 | | Standard | HRNet_W32 | 256x192 | 74.4 | 53.0 | 71.3 | 69 | | AdvMix | HRNet_W32 | 256x192 | 74.7 | **55.5** | **74.3** | 70 | | Standard | HRNet_W48 | 256x192 | 75.1 | 53.7 | 71.6 | 71 | | AdvMix | HRNet_W48 | 256x192 | 75.4 | **57.1** | **75.7** | 72 | | Standard | HrHRNet_W32 | 512x512 | 67.1 | 39.9 | 59.4 | 73 | | AdvMix | HrHRNet_W32 | 512x512 | 68.3 | **45.4** | **66.5** | 74 | 75 | 76 | Comparisons between standard training and AdvMix on COCO-C. For top-down approaches, results are obtained with detected bounding boxes of [HRNet](https://github.com/leoxiaobin/deep-high-resolution-net.pytorch/). We see that mPC and rPC are greatly improved, whilst clean performance AP* can be preserved 77 | 78 | ### Visualization results 79 | ![AdvMix](./figures/Qualitative.png) 80 | Qualitative comparisons between HRNet without and with AdvMix. For each image triplet, the images from left to right are ground truth, predicted results of Standard HRNet-W32, and predicted results of HRNet-W32 with AdvMix. 81 | 82 | # Citations 83 | If you find our work useful in your research, please consider citing: 84 | ``` 85 | @article{wang2021human, 86 | title={When Human Pose Estimation Meets Robustness: Adversarial Algorithms and Benchmarks}, 87 | author={Wang, Jiahang and Jin, Sheng and Liu, Wentao and Liu, Weizhong and Qian, Chen and Luo, Ping}, 88 | journal={arXiv preprint arXiv:2105.06152}, 89 | year={2021} 90 | } 91 | ``` 92 | 93 | # License 94 | Our research code is released under the MIT license. See [LICENSE](./LICENSE) for details. 95 | 96 | # Acknowledgments 97 | Thanks for open-source code [HRNet](https://github.com/leoxiaobin/deep-high-resolution-net.pytorch/). 98 | 99 | 100 | -------------------------------------------------------------------------------- /experiments/coco/resnet/res50_256x192_d256x3_adam_lr1e-3_advmix.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3,4,5,6,7) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth' 25 | IMAGE_SIZE: 26 | - 192 27 | - 256 28 | HEATMAP_SIZE: 29 | - 48 30 | - 64 31 | SIGMA: 2 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 50 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 128 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /experiments/mpii/hrnet/w32_256x256_adam_lr1e-3_advmix.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3,4,5,6,7) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: 'data/mpii/' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 16 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 256 33 | - 256 34 | HEATMAP_SIZE: 35 | - 64 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 128 111 | MODEL_FILE: '' 112 | FLIP_TEST: true 113 | POST_PROCESS: true 114 | SHIFT_HEATMAP: true 115 | DEBUG: 116 | DEBUG: true 117 | SAVE_BATCH_IMAGES_GT: true 118 | SAVE_BATCH_IMAGES_PRED: true 119 | SAVE_HEATMAPS_GT: true 120 | SAVE_HEATMAPS_PRED: true 121 | -------------------------------------------------------------------------------- /experiments/mpii/resnet/res50_256x256_d256x3_adam_lr1e-3_advmix.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3,4,5,6,7) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: 'data/mpii/' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | NAME: 'pose_resnet' 27 | PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth' 28 | IMAGE_SIZE: 29 | - 256 30 | - 256 31 | HEATMAP_SIZE: 32 | - 64 33 | - 64 34 | SIGMA: 2 35 | NUM_JOINTS: 16 36 | TARGET_TYPE: 'gaussian' 37 | EXTRA: 38 | FINAL_CONV_KERNEL: 1 39 | DECONV_WITH_BIAS: false 40 | NUM_DECONV_LAYERS: 3 41 | NUM_DECONV_FILTERS: 42 | - 256 43 | - 256 44 | - 256 45 | NUM_DECONV_KERNELS: 46 | - 4 47 | - 4 48 | - 4 49 | NUM_LAYERS: 50 50 | LOSS: 51 | USE_TARGET_WEIGHT: true 52 | TRAIN: 53 | BATCH_SIZE_PER_GPU: 32 54 | SHUFFLE: true 55 | BEGIN_EPOCH: 0 56 | END_EPOCH: 140 57 | OPTIMIZER: 'adam' 58 | LR: 0.001 59 | LR_FACTOR: 0.1 60 | LR_STEP: 61 | - 90 62 | - 120 63 | WD: 0.0001 64 | GAMMA1: 0.99 65 | GAMMA2: 0.0 66 | MOMENTUM: 0.9 67 | NESTEROV: false 68 | TEST: 69 | BATCH_SIZE_PER_GPU: 32 70 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 71 | BBOX_THRE: 1.0 72 | IMAGE_THRE: 0.0 73 | IN_VIS_THRE: 0.2 74 | MODEL_FILE: '' 75 | NMS_THRE: 1.0 76 | OKS_THRE: 0.9 77 | FLIP_TEST: true 78 | POST_PROCESS: true 79 | SHIFT_HEATMAP: true 80 | USE_GT_BBOX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /figures/AdvMix.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIprogrammer/AdvMix/f619fa279d9419eb452d228762c3872691e42e7d/figures/AdvMix.jpg -------------------------------------------------------------------------------- /figures/Qualitative.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIprogrammer/AdvMix/f619fa279d9419eb452d228762c3872691e42e7d/figures/Qualitative.png -------------------------------------------------------------------------------- /figures/benchmarking_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIprogrammer/AdvMix/f619fa279d9419eb452d228762c3872691e42e7d/figures/benchmarking_results.png -------------------------------------------------------------------------------- /figures/image_corruption.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIprogrammer/AdvMix/f619fa279d9419eb452d228762c3872691e42e7d/figures/image_corruption.png -------------------------------------------------------------------------------- /lib/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | cd nms; python setup_linux.py build_ext --inplace; rm -rf build; cd ../../ 3 | clean: 4 | cd nms; rm *.so; cd ../../ 5 | -------------------------------------------------------------------------------- /lib/config/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from .default import _C as cfg 8 | from .default import update_config 9 | from .models import MODEL_EXTRAS 10 | -------------------------------------------------------------------------------- /lib/config/default.py: -------------------------------------------------------------------------------- 1 | 2 | # ------------------------------------------------------------------------------ 3 | # Copyright (c) Microsoft 4 | # Licensed under the MIT License. 5 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import os 13 | 14 | from yacs.config import CfgNode as CN 15 | 16 | 17 | _C = CN() 18 | 19 | _C.OUTPUT_DIR = '' 20 | _C.LOG_DIR = '' 21 | _C.DATA_DIR = '' 22 | _C.GPUS = (0,) 23 | _C.WORKERS = 4 24 | _C.PRINT_FREQ = 20 25 | _C.AUTO_RESUME = False 26 | _C.PIN_MEMORY = True 27 | _C.RANK = 0 28 | 29 | # Cudnn related params 30 | _C.CUDNN = CN() 31 | _C.CUDNN.BENCHMARK = True 32 | _C.CUDNN.DETERMINISTIC = False 33 | _C.CUDNN.ENABLED = True 34 | 35 | # common params for NETWORK 36 | _C.MODEL = CN() 37 | _C.MODEL.NAME = 'pose_hrnet' 38 | _C.MODEL.INIT_WEIGHTS = True 39 | _C.MODEL.PRETRAINED = '' 40 | _C.MODEL.NUM_JOINTS = 17 41 | _C.MODEL.TAG_PER_JOINT = True 42 | _C.MODEL.TARGET_TYPE = 'gaussian' 43 | _C.MODEL.IMAGE_SIZE = [256, 256] 44 | _C.MODEL.HEATMAP_SIZE = [64, 64] 45 | _C.MODEL.SIGMA = 2 46 | _C.MODEL.EXTRA = CN(new_allowed=True) 47 | 48 | _C.LOSS = CN() 49 | _C.LOSS.USE_OHKM = False 50 | _C.LOSS.TOPK = 8 51 | _C.LOSS.USE_TARGET_WEIGHT = True 52 | _C.LOSS.USE_DIFFERENT_JOINTS_WEIGHT = False 53 | 54 | # DATASET related params 55 | _C.DATASET = CN() 56 | _C.DATASET.ROOT = '' 57 | _C.DATASET.ROOT_C = '' 58 | _C.DATASET.DATASET = 'mpii' 59 | _C.DATASET.TRAIN_SET = 'train' 60 | _C.DATASET.TEST_SET = 'valid' 61 | _C.DATASET.DATA_FORMAT = 'jpg' 62 | _C.DATASET.HYBRID_JOINTS_TYPE = '' 63 | _C.DATASET.SELECT_DATA = False 64 | 65 | # training data augmentation 66 | _C.DATASET.FLIP = True 67 | _C.DATASET.SCALE_FACTOR = 0.25 68 | _C.DATASET.ROT_FACTOR = 30 69 | _C.DATASET.PROB_HALF_BODY = 0.0 70 | _C.DATASET.NUM_JOINTS_HALF_BODY = 8 71 | _C.DATASET.COLOR_RGB = False 72 | # debug mini_coco 73 | _C.DATASET.MINI_COCO = False 74 | # VAL MASK 75 | _C.DATASET.VAL_FG = False 76 | _C.DATASET.VAL_MASK = False 77 | _C.DATASET.VAL_PARSING = False 78 | # train 79 | _C.TRAIN = CN() 80 | 81 | _C.TRAIN.LR_FACTOR = 0.1 82 | _C.TRAIN.LR_STEP = [90, 110] 83 | _C.TRAIN.LR = 0.001 84 | 85 | _C.TRAIN.OPTIMIZER = 'adam' 86 | _C.TRAIN.MOMENTUM = 0.9 87 | _C.TRAIN.WD = 0.0001 88 | _C.TRAIN.NESTEROV = False 89 | _C.TRAIN.GAMMA1 = 0.99 90 | _C.TRAIN.GAMMA2 = 0.0 91 | 92 | _C.TRAIN.BEGIN_EPOCH = 0 93 | _C.TRAIN.END_EPOCH = 140 94 | 95 | _C.TRAIN.RESUME = False 96 | _C.TRAIN.CHECKPOINT = '' 97 | 98 | _C.TRAIN.BATCH_SIZE_PER_GPU = 32 99 | _C.TRAIN.SHUFFLE = True 100 | 101 | # testing 102 | _C.TEST = CN() 103 | 104 | # size of images for each device 105 | _C.TEST.BATCH_SIZE_PER_GPU = 32 106 | # Test Model Epoch 107 | _C.TEST.FLIP_TEST = False 108 | _C.TEST.POST_PROCESS = False 109 | _C.TEST.SHIFT_HEATMAP = False 110 | 111 | _C.TEST.USE_GT_BBOX = False 112 | 113 | # test robustness 114 | _C.TEST.TEST_ROBUST = False 115 | _C.TEST.CORRUPTION_TYPE = '' 116 | 117 | 118 | # nms 119 | _C.TEST.IMAGE_THRE = 0.1 120 | _C.TEST.NMS_THRE = 0.6 121 | _C.TEST.SOFT_NMS = False 122 | _C.TEST.OKS_THRE = 0.5 123 | _C.TEST.IN_VIS_THRE = 0.0 124 | _C.TEST.COCO_BBOX_FILE = '' 125 | _C.TEST.BBOX_THRE = 1.0 126 | _C.TEST.MODEL_FILE = '' 127 | _C.TEST.MASK_FILE = '' 128 | 129 | # soft_argmax 130 | _C.TEST.SOFT_ARGMAX = False 131 | _C.TEST.BIAS = 0.0 132 | 133 | # debug 134 | _C.DEBUG = CN() 135 | _C.DEBUG.DEBUG = False 136 | _C.DEBUG.SAVE_BATCH_IMAGES_GT = False 137 | _C.DEBUG.SAVE_BATCH_IMAGES_PRED = False 138 | _C.DEBUG.SAVE_HEATMAPS_GT = False 139 | _C.DEBUG.SAVE_HEATMAPS_PRED = False 140 | 141 | 142 | # update config by args 143 | def update_config(cfg, args): 144 | cfg.defrost() 145 | # merge and update args 146 | cfg.merge_from_file(args.cfg) 147 | cfg.merge_from_list(args.opts) 148 | 149 | if args.modelDir: 150 | cfg.OUTPUT_DIR = args.modelDir 151 | 152 | if args.logDir: 153 | cfg.LOG_DIR = args.logDir 154 | 155 | if args.dataDir: 156 | cfg.DATA_DIR = args.dataDir 157 | 158 | if args.corruption_type: 159 | cfg.TEST.CORRUPTION_TYPE = args.corruption_type 160 | 161 | cfg.TEST.SEVERITY = args.severity 162 | cfg.TEST.TEST_ROBUST = args.test_robust 163 | 164 | cfg.DATASET.ROOT = os.path.join( 165 | cfg.DATA_DIR, cfg.DATASET.ROOT 166 | ) 167 | 168 | if cfg.DATASET.DATASET == 'coco': 169 | cfg.DATASET.ROOT_C = 'data/coco-C' 170 | else: 171 | cfg.DATASET.ROOT_C = 'data/mpii-C' 172 | 173 | cfg.DATASET.ROOT_C = os.path.join( 174 | cfg.DATA_DIR, cfg.DATASET.ROOT_C 175 | ) 176 | cfg.MODEL.PRETRAINED = os.path.join( 177 | cfg.DATA_DIR, cfg.MODEL.PRETRAINED 178 | ) 179 | if cfg.TEST.MODEL_FILE: 180 | cfg.TEST.MODEL_FILE = os.path.join( 181 | cfg.DATA_DIR, cfg.TEST.MODEL_FILE 182 | ) 183 | 184 | cfg.freeze() 185 | 186 | 187 | if __name__ == '__main__': 188 | import sys 189 | with open(sys.argv[1], 'w') as f: 190 | print(_C, file=f) 191 | 192 | -------------------------------------------------------------------------------- /lib/config/models.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from yacs.config import CfgNode as CN 12 | 13 | 14 | # pose_resnet related params 15 | POSE_RESNET = CN() 16 | POSE_RESNET.NUM_LAYERS = 50 17 | POSE_RESNET.DECONV_WITH_BIAS = False 18 | POSE_RESNET.NUM_DECONV_LAYERS = 3 19 | POSE_RESNET.NUM_DECONV_FILTERS = [256, 256, 256] 20 | POSE_RESNET.NUM_DECONV_KERNELS = [4, 4, 4] 21 | POSE_RESNET.FINAL_CONV_KERNEL = 1 22 | POSE_RESNET.PRETRAINED_LAYERS = ['*'] 23 | 24 | # pose_multi_resoluton_net related params 25 | POSE_HIGH_RESOLUTION_NET = CN() 26 | POSE_HIGH_RESOLUTION_NET.PRETRAINED_LAYERS = ['*'] 27 | POSE_HIGH_RESOLUTION_NET.STEM_INPLANES = 64 28 | POSE_HIGH_RESOLUTION_NET.FINAL_CONV_KERNEL = 1 29 | 30 | POSE_HIGH_RESOLUTION_NET.STAGE2 = CN() 31 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_MODULES = 1 32 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BRANCHES = 2 33 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BLOCKS = [4, 4] 34 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_CHANNELS = [32, 64] 35 | POSE_HIGH_RESOLUTION_NET.STAGE2.BLOCK = 'BASIC' 36 | POSE_HIGH_RESOLUTION_NET.STAGE2.FUSE_METHOD = 'SUM' 37 | 38 | POSE_HIGH_RESOLUTION_NET.STAGE3 = CN() 39 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_MODULES = 1 40 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BRANCHES = 3 41 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BLOCKS = [4, 4, 4] 42 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_CHANNELS = [32, 64, 128] 43 | POSE_HIGH_RESOLUTION_NET.STAGE3.BLOCK = 'BASIC' 44 | POSE_HIGH_RESOLUTION_NET.STAGE3.FUSE_METHOD = 'SUM' 45 | 46 | POSE_HIGH_RESOLUTION_NET.STAGE4 = CN() 47 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_MODULES = 1 48 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BRANCHES = 4 49 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] 50 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256] 51 | POSE_HIGH_RESOLUTION_NET.STAGE4.BLOCK = 'BASIC' 52 | POSE_HIGH_RESOLUTION_NET.STAGE4.FUSE_METHOD = 'SUM' 53 | 54 | 55 | MODEL_EXTRAS = { 56 | 'pose_resnet': POSE_RESNET, 57 | 'pose_high_resolution_net': POSE_HIGH_RESOLUTION_NET, 58 | } 59 | -------------------------------------------------------------------------------- /lib/core/evaluate.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | 13 | from core.inference import get_max_preds 14 | from utils.transforms import flip_back, tofloat, coord_norm, inv_coord_norm, _tocopy, _tocuda 15 | 16 | def calc_dists(preds, target, normalize): 17 | preds = preds.astype(np.float32) 18 | target = target.astype(np.float32) 19 | dists = np.zeros((preds.shape[1], preds.shape[0])) 20 | for n in range(preds.shape[0]): 21 | for c in range(preds.shape[1]): 22 | if target[n, c, 0] > 1 and target[n, c, 1] > 1: 23 | normed_preds = preds[n, c, :] / normalize[n] 24 | normed_targets = target[n, c, :] / normalize[n] 25 | dists[c, n] = np.linalg.norm(normed_preds - normed_targets) 26 | else: 27 | dists[c, n] = -1 28 | return dists 29 | 30 | 31 | def dist_acc(dists, thr=0.5): 32 | ''' Return percentage below threshold while ignoring values with a -1 ''' 33 | dist_cal = np.not_equal(dists, -1) 34 | num_dist_cal = dist_cal.sum() 35 | if num_dist_cal > 0: 36 | return np.less(dists[dist_cal], thr).sum() * 1.0 / num_dist_cal 37 | else: 38 | return -1 39 | 40 | 41 | def accuracy(outputs, target, hm_type='gaussian', thr=0.5, args=None, cfg=None): 42 | ''' 43 | Calculate accuracy according to PCK, 44 | but uses ground truth heatmap rather than x,y locations 45 | First value to be returned is average accuracy across 'idxs', 46 | followed by individual accuracies 47 | ''' 48 | if isinstance(outputs, list): 49 | for index in range(len(outputs)): 50 | outputs[index] = outputs[index].clone().detach().cpu().numpy() 51 | idx = list(range(outputs[-1].shape[1])) 52 | 53 | else: 54 | outputs = outputs.clone().detach().cpu().numpy() 55 | idx = list(range(outputs.shape[1])) 56 | 57 | if isinstance(target, list): 58 | for index in range(len(target)): 59 | target[index] = target[index].clone().detach().cpu().numpy() 60 | idx = list(range(target[-1].shape[1])) 61 | 62 | else: 63 | target = target.clone().detach().cpu().numpy() 64 | idx = list(range(target.shape[1])) 65 | 66 | norm = 1.0 67 | 68 | if hm_type == 'gaussian' and args is None: 69 | pred, _ = get_max_preds(outputs) 70 | target, _ = get_max_preds(target) 71 | h = outputs.shape[2] # y 72 | w = outputs.shape[3] # x 73 | 74 | else: 75 | assert outputs[0].ndim == 3, 'the output coord must be 3 dims' 76 | pred = outputs[0] 77 | pred = inv_coord_norm(pred, cfg, args).clone().detach().cpu().numpy() 78 | target, _ = get_max_preds(target[-1]) 79 | h = outputs[-1].shape[2] 80 | w = outputs[-1].shape[3] 81 | 82 | norm = np.ones((pred.shape[0], 2)) * np.array([h, w]) / 10 83 | 84 | dists = calc_dists(pred, target, norm) 85 | 86 | acc = np.zeros((len(idx) + 1)) 87 | avg_acc = 0 88 | cnt = 0 89 | 90 | for i in range(len(idx)): 91 | acc[i + 1] = dist_acc(dists[idx[i]]) 92 | if acc[i + 1] >= 0: 93 | avg_acc = avg_acc + acc[i + 1] 94 | cnt += 1 95 | 96 | avg_acc = avg_acc / cnt if cnt != 0 else 0 97 | if cnt != 0: 98 | acc[0] = avg_acc 99 | return acc, avg_acc, cnt, pred 100 | 101 | 102 | -------------------------------------------------------------------------------- /lib/core/function.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import time 12 | import logging 13 | import os, copy 14 | 15 | import numpy as np 16 | import torch 17 | 18 | from core.evaluate import accuracy 19 | from core.inference import get_final_preds, get_final_preds_using_softargmax, SoftArgmax2D 20 | from utils.transforms import flip_back, tofloat, coord_norm, inv_coord_norm, _tocopy, _tocuda 21 | from utils.vis import save_debug_images 22 | import torch.nn as nn 23 | from tqdm import tqdm 24 | import torch.nn.functional as F 25 | 26 | 27 | logger = logging.getLogger(__name__) 28 | 29 | 30 | def train(config, args, train_loader, model, criterion, optimizer, epoch, 31 | output_dir, tb_log_dir, writer_dict): 32 | batch_time = AverageMeter() 33 | data_time = AverageMeter() 34 | losses = AverageMeter() 35 | acc = AverageMeter() 36 | 37 | if isinstance(model, list): 38 | model = model[0].train() 39 | model_D = model[1].train() 40 | else: 41 | model.train() 42 | 43 | end = time.time() 44 | for i, (input, target, target_weight, meta) in tqdm(enumerate(train_loader)): 45 | 46 | data_time.update(time.time() - end) 47 | 48 | outputs = model(input) 49 | 50 | target = target[0].cuda(non_blocking=True) 51 | target_hm = target 52 | target_weight = target_weight.cuda(non_blocking=True) 53 | 54 | loss = criterion(outputs, target, target_weight) 55 | 56 | # compute gradient and do update step 57 | optimizer.zero_grad() 58 | loss.backward() 59 | optimizer.step() 60 | 61 | # measure accuracy and record loss 62 | losses.update(loss.item(), input.size(0)) 63 | _, avg_acc, cnt, pred = accuracy(outputs, 64 | target, args=None, cfg=config) 65 | 66 | outputs = _tocuda(outputs) 67 | 68 | acc.update(avg_acc, cnt) 69 | 70 | 71 | batch_time.update(time.time() - end) 72 | end = time.time() 73 | 74 | if i % config.PRINT_FREQ == 0: 75 | msg = 'Epoch: [{0}][{1}/{2}]\t' \ 76 | 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 77 | 'Speed {speed:.1f} samples/s\t' \ 78 | 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 79 | 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 80 | 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( 81 | epoch, i, len(train_loader), batch_time=batch_time, 82 | speed=input.size(0)/batch_time.val, 83 | data_time=data_time, loss=losses, acc=acc) 84 | logger.info(msg) 85 | 86 | writer = writer_dict['writer'] 87 | global_steps = writer_dict['train_global_steps'] 88 | writer.add_scalar('train_loss', losses.val, global_steps) 89 | writer.add_scalar('train_acc', acc.val, global_steps) 90 | writer_dict['train_global_steps'] = global_steps + 1 91 | 92 | prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) 93 | 94 | save_debug_images(config, input, meta, target_hm, pred*4, outputs, 95 | prefix) 96 | 97 | 98 | def set_require_grad(nets, requires_grad=True): 99 | if not isinstance(nets, list): 100 | nets = [nets] 101 | for net in nets: 102 | if net is not None: 103 | for param in net.parameters(): 104 | param.requires_grad = requires_grad 105 | 106 | 107 | def train_advmix(config, args, train_loader, models, criterion, optimizers, epoch, 108 | output_dir, tb_log_dir, writer_dict): 109 | batch_time = AverageMeter() 110 | data_time = AverageMeter() 111 | losses = AverageMeter() 112 | acc = AverageMeter() 113 | 114 | if isinstance(models, list): 115 | model = models[0].train() 116 | model_G = models[1].train() 117 | model_teacher = models[2].eval() 118 | else: 119 | models.train() 120 | 121 | optimizer = optimizers[0] 122 | optimizer_G = optimizers[1] 123 | 124 | end = time.time() 125 | for i, (inputs, targets, target_weights, metas) in tqdm(enumerate(train_loader)): 126 | 127 | data_time.update(time.time() - end) 128 | # mask_channel = meta['model_supervise_channel'] > 0.5 129 | if isinstance(inputs, list): 130 | inputs = [_.cuda(non_blocking=True) for _ in inputs] 131 | target = targets[0].cuda(non_blocking=True) 132 | target_weight = target_weights[0].cuda(non_blocking=True) 133 | meta = metas[0] 134 | else: 135 | inputs = inputs.cuda(non_blocking=True) 136 | 137 | G_input = torch.cat(inputs, dim=1) 138 | mix_weight = F.softmax(model_G(G_input), dim=1) 139 | 140 | set_require_grad(model, True) 141 | optimizer.zero_grad() 142 | tmp = inputs[0] * mix_weight[:,0,...].unsqueeze(dim=1) 143 | for list_index in range(1, len(inputs)): 144 | tmp += inputs[list_index] * mix_weight[:,list_index].unsqueeze(dim=1) 145 | 146 | D_output_detach = model(tmp.detach()) 147 | 148 | with torch.no_grad(): 149 | teacher_output = model_teacher(inputs[0]) 150 | 151 | loss_D_hm = criterion(D_output_detach, target, target_weight) 152 | loss_D_kd = criterion(D_output_detach, teacher_output, target_weight) 153 | loss_D = loss_D_hm * (1 - args.alpha) + loss_D_kd * args.alpha 154 | loss_D.backward() 155 | optimizer.step() 156 | 157 | # G: compute gradient and do update step 158 | set_require_grad(model, False) 159 | optimizer_G.zero_grad() 160 | outputs = model(tmp) 161 | output = outputs 162 | loss_G = -criterion(output, target, target_weight) * args.adv_loss_weight 163 | loss_G.backward() 164 | optimizer_G.step() 165 | 166 | # measure accuracy and record loss 167 | losses.update(loss_D.item(), inputs[0].size(0)) 168 | _, avg_acc, cnt, pred = accuracy(output, 169 | target, args=None, cfg=config) 170 | 171 | acc.update(avg_acc, cnt) 172 | batch_time.update(time.time() - end) 173 | end = time.time() 174 | 175 | if i % config.PRINT_FREQ == 0: 176 | msg = 'Epoch: [{0}][{1}/{2}]\t' \ 177 | 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \ 178 | 'Speed {speed:.1f} samples/s\t' \ 179 | 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \ 180 | 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 181 | 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( 182 | epoch, i, len(train_loader), batch_time=batch_time, 183 | speed=inputs[0].size(0)/batch_time.val, 184 | data_time=data_time, loss=losses, acc=acc) 185 | logger.info(msg) 186 | 187 | writer = writer_dict['writer'] 188 | global_steps = writer_dict['train_global_steps'] 189 | writer.add_scalar('train_loss', losses.val, global_steps) 190 | writer.add_scalar('train_acc', acc.val, global_steps) 191 | writer_dict['train_global_steps'] = global_steps + 1 192 | 193 | prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i) 194 | save_debug_images(config, inputs[0], copy.deepcopy(meta), target, pred*4, outputs, 195 | prefix + '_clean') 196 | save_debug_images(config, tmp, copy.deepcopy(meta), target, pred*4, outputs, 197 | prefix) 198 | 199 | 200 | def validate(config, args, val_loader, val_dataset, model, criterion, output_dir, 201 | tb_log_dir, writer_dict=None, cpu=False): 202 | batch_time = AverageMeter() 203 | losses = AverageMeter() 204 | acc = AverageMeter() 205 | 206 | # switch to evaluate mode 207 | model.eval() 208 | 209 | num_samples = len(val_dataset) 210 | all_preds = np.zeros( 211 | (num_samples, config.MODEL.NUM_JOINTS, 3), 212 | dtype=np.float32 213 | ) 214 | all_boxes = np.zeros((num_samples, 6)) 215 | image_path = [] 216 | filenames = [] 217 | imgnums = [] 218 | idx = 0 219 | feat_dict = {} 220 | 221 | with torch.no_grad(): 222 | end = time.time() 223 | time_gpu = 0. 224 | for i, (input, target, target_weight, meta) in tqdm(enumerate(val_loader)): 225 | if not cpu: 226 | input = input.cuda() 227 | # compute output 228 | torch.cuda.synchronize() 229 | infer_start = time.time() 230 | outputs = model(input) 231 | 232 | infer_end = time.time() 233 | torch.cuda.synchronize() 234 | time_gpu += (infer_end - infer_start) 235 | 236 | if isinstance(outputs, list): 237 | output = outputs[-1] 238 | else: 239 | output = outputs 240 | 241 | if config.TEST.FLIP_TEST: 242 | input_flipped = input.flip(3) 243 | outputs_flipped = model(input_flipped) 244 | 245 | if isinstance(outputs_flipped, list): 246 | output_flipped = outputs_flipped[-1] 247 | else: 248 | output_flipped = outputs_flipped 249 | 250 | output_flipped = flip_back(output_flipped.cpu().numpy(), 251 | val_dataset.flip_pairs) 252 | if not cpu: 253 | output_flipped = torch.from_numpy(output_flipped.copy()).cuda() 254 | else: 255 | output_flipped = torch.from_numpy(output_flipped.copy()) 256 | 257 | # feature is not aligned, shift flipped heatmap for higher accuracy 258 | if config.TEST.SHIFT_HEATMAP: 259 | output_flipped[:, :, :, 1:] = \ 260 | output_flipped.clone()[:, :, :, 0:-1] 261 | output = (output + output_flipped) * 0.5 262 | 263 | if not cpu: 264 | target = target[0].cuda(non_blocking=True) 265 | target_hm = target 266 | target_weight = target_weight.cuda(non_blocking=True) 267 | 268 | loss = criterion(output, target, target_weight) 269 | 270 | num_images = input.size(0) 271 | # measure accuracy and record loss 272 | losses.update(loss.item(), num_images) 273 | 274 | _, avg_acc, cnt, pred = accuracy(output, 275 | target, args=None, cfg=config) 276 | 277 | output = _tocuda(output) 278 | acc.update(avg_acc, cnt) 279 | batch_time.update(time.time() - end) 280 | end = time.time() 281 | 282 | # corresponding center scale joint 283 | c = meta['center'].numpy() 284 | s = meta['scale'].numpy() 285 | score = meta['score'].numpy() 286 | 287 | 288 | preds, maxvals = get_final_preds( 289 | config, args, output.clone().cpu().numpy(), c, s) 290 | 291 | all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] 292 | all_preds[idx:idx + num_images, :, 2:3] = maxvals 293 | # double check this all_boxes parts 294 | all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] 295 | all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] 296 | all_boxes[idx:idx + num_images, 4] = np.prod(s*200, 1) 297 | all_boxes[idx:idx + num_images, 5] = score 298 | image_path.extend(meta['image']) 299 | 300 | idx += num_images 301 | 302 | if i % config.PRINT_FREQ == 0: 303 | msg = 'Test: [{0}/{1}]\t' \ 304 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 305 | 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 306 | 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( 307 | i, len(val_loader), batch_time=batch_time, 308 | loss=losses, acc=acc) 309 | logger.info(msg) 310 | 311 | prefix = '{}_{}'.format( 312 | os.path.join(output_dir, 'val'), i 313 | ) 314 | 315 | save_debug_images(config, input, meta, target_hm, pred * 4, output, 316 | prefix) 317 | 318 | print('=> The average inference time is :', time_gpu / len(val_loader)) 319 | 320 | name_values, perf_indicator = val_dataset.evaluate( 321 | config, all_preds, output_dir, all_boxes, image_path, 322 | filenames, imgnums 323 | ) 324 | 325 | model_name = config.MODEL.NAME 326 | if isinstance(name_values, list): 327 | for name_value in name_values: 328 | _print_name_value(name_value, model_name) 329 | else: 330 | _print_name_value(name_values, model_name) 331 | 332 | if writer_dict: 333 | writer = writer_dict['writer'] 334 | global_steps = writer_dict['valid_global_steps'] 335 | writer.add_scalar( 336 | 'valid_loss', 337 | losses.avg, 338 | global_steps 339 | ) 340 | writer.add_scalar( 341 | 'valid_acc', 342 | acc.avg, 343 | global_steps 344 | ) 345 | if isinstance(name_values, list): 346 | for name_value in name_values: 347 | writer.add_scalars( 348 | 'valid', 349 | dict(name_value), 350 | global_steps 351 | ) 352 | else: 353 | writer.add_scalars( 354 | 'valid', 355 | dict(name_values), 356 | global_steps 357 | ) 358 | writer_dict['valid_global_steps'] = global_steps + 1 359 | 360 | return name_values, perf_indicator 361 | 362 | 363 | # markdown format output 364 | def _print_name_value(name_value, full_arch_name): 365 | names = name_value.keys() 366 | values = name_value.values() 367 | num_values = len(name_value) 368 | logger.info( 369 | '| Arch ' + 370 | ' '.join(['| {}'.format(name) for name in names]) + 371 | ' |' 372 | ) 373 | logger.info('|---' * (num_values+1) + '|') 374 | 375 | if len(full_arch_name) > 15: 376 | full_arch_name = full_arch_name[:8] + '...' 377 | logger.info( 378 | '| ' + full_arch_name + ' ' + 379 | ' '.join(['| {:.3f}'.format(value) for value in values]) + 380 | ' |' 381 | ) 382 | 383 | class AverageMeter(object): 384 | """Computes and stores the average and current value""" 385 | def __init__(self): 386 | self.reset() 387 | 388 | def reset(self): 389 | self.val = 0 390 | self.avg = 0 391 | self.sum = 0 392 | self.count = 0 393 | 394 | def update(self, val, n=1): 395 | self.val = val 396 | self.sum += val * n 397 | self.count += n 398 | self.avg = self.sum / self.count if self.count != 0 else 0 399 | -------------------------------------------------------------------------------- /lib/core/inference.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import math 12 | 13 | import numpy as np 14 | 15 | from utils.transforms import transform_preds 16 | import torch.nn as nn 17 | import torch 18 | 19 | 20 | 21 | 22 | def get_max_preds(batch_heatmaps): 23 | ''' 24 | get predictions from score maps 25 | heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) 26 | ''' 27 | assert isinstance(batch_heatmaps, np.ndarray), \ 28 | 'batch_heatmaps should be numpy.ndarray' 29 | assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim' 30 | 31 | batch_size = batch_heatmaps.shape[0] 32 | num_joints = batch_heatmaps.shape[1] 33 | width = batch_heatmaps.shape[3] 34 | heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) 35 | idx = np.argmax(heatmaps_reshaped, 2) 36 | maxvals = np.amax(heatmaps_reshaped, 2) 37 | 38 | maxvals = maxvals.reshape((batch_size, num_joints, 1)) 39 | idx = idx.reshape((batch_size, num_joints, 1)) 40 | 41 | preds = np.tile(idx, (1, 1, 2)).astype(np.float32) 42 | preds[:, :, 0] = (preds[:, :, 0]) % width 43 | preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) 44 | 45 | pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) 46 | pred_mask = pred_mask.astype(np.float32) 47 | 48 | preds *= pred_mask 49 | return preds, maxvals 50 | 51 | 52 | def get_final_preds(config, args, batch_heatmaps, center, scale, cal_hm_coord=True, coord=None, reg_hm=False): 53 | # default: calculate coord from heatmap 54 | if cal_hm_coord: 55 | coords, maxvals = get_max_preds(batch_heatmaps) 56 | else: 57 | coords = coord 58 | _, maxvals = get_max_preds(batch_heatmaps) 59 | 60 | heatmap_height = batch_heatmaps.shape[2] 61 | heatmap_width = batch_heatmaps.shape[3] 62 | 63 | if config.TEST.POST_PROCESS: 64 | for n in range(coords.shape[0]): 65 | for p in range(coords.shape[1]): 66 | hm = batch_heatmaps[n][p] 67 | px = int(math.floor(coords[n][p][0] + 0.5)) 68 | py = int(math.floor(coords[n][p][1] + 0.5)) 69 | if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1: 70 | diff = np.array( 71 | [ 72 | hm[py][px+1] - hm[py][px-1], 73 | hm[py+1][px]-hm[py-1][px] 74 | ] 75 | ) 76 | coords[n][p] += np.sign(diff) * .25 77 | 78 | preds = coords.copy() 79 | 80 | # Transform back the coord based on center and scale 81 | for i in range(coords.shape[0]): 82 | if coord is None: 83 | preds[i] = transform_preds( 84 | coords[i], center[i], scale[i], [heatmap_width, heatmap_height] 85 | ) 86 | if coord is not None and reg_hm: 87 | preds[i] = transform_preds( 88 | coords[i], center[i], scale[i], [heatmap_width, heatmap_height] 89 | ) 90 | # model outputs coord; not reg_hm; reg_hm = False in default 91 | if coord is not None and not reg_hm: 92 | preds[i] = transform_preds( 93 | coords[i], center[i], scale[i], [config.MODEL.IMAGE_SIZE[0], config.MODEL.IMAGE_SIZE[1]] 94 | ) 95 | return preds, maxvals 96 | 97 | class SoftArgmax2D(nn.Module): 98 | def __init__(self, height=64, width=48, beta=100): 99 | super(SoftArgmax2D, self).__init__() 100 | self.softmax = nn.Softmax(dim=-1) 101 | self.beta = beta 102 | # Note that meshgrid in pytorch behaves differently with numpy. 103 | self.WY, self.WX = torch.meshgrid(torch.arange(height, dtype=torch.float), 104 | torch.arange(width, dtype=torch.float)) 105 | 106 | def forward(self, x): 107 | b, c, h, w = x.shape 108 | device = x.device 109 | 110 | probs = self.softmax(x.view(b, c, -1) * self.beta) 111 | probs = probs.view(b, c, h, w) 112 | 113 | self.WY = self.WY.to(device) 114 | self.WX = self.WX.to(device) 115 | 116 | px = torch.sum(probs * self.WX, dim=(2, 3)) 117 | py = torch.sum(probs * self.WY, dim=(2, 3)) 118 | preds = torch.stack((px, py), dim=-1).cpu().numpy() 119 | 120 | idx = np.round(preds).astype(np.int32) 121 | maxvals = np.zeros(shape=(b, c, 1)) 122 | for bi in range(b): 123 | for ci in range(c): 124 | maxvals[bi, ci, 0] = x[bi, ci, idx[bi, ci, 1], idx[bi, ci, 0]] 125 | 126 | return preds, maxvals 127 | 128 | 129 | def get_final_preds_using_softargmax(config, batch_heatmaps, center, scale): 130 | soft_argmax = SoftArgmax2D(config.MODEL.HEATMAP_SIZE[1], config.MODEL.HEATMAP_SIZE[0], beta=160) 131 | coords, maxvals = soft_argmax(batch_heatmaps) 132 | 133 | heatmap_height = batch_heatmaps.shape[2] 134 | heatmap_width = batch_heatmaps.shape[3] 135 | 136 | batch_heatmaps = batch_heatmaps.cpu().numpy() 137 | 138 | # post-processing 139 | if config.TEST.POST_PROCESS: 140 | for n in range(coords.shape[0]): 141 | for p in range(coords.shape[1]): 142 | hm = batch_heatmaps[n][p] 143 | px = int(math.floor(coords[n][p][0] + 0.5)) 144 | py = int(math.floor(coords[n][p][1] + 0.5)) 145 | if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1: 146 | diff = np.array( 147 | [ 148 | hm[py][px + 1] - hm[py][px - 1], 149 | hm[py + 1][px] - hm[py - 1][px] 150 | ] 151 | ) 152 | coords[n][p] += np.sign(diff) * .25 153 | 154 | preds = coords.copy() 155 | 156 | # Transform back 157 | for i in range(coords.shape[0]): 158 | preds[i] = transform_preds( 159 | coords[i], center[i], scale[i], [heatmap_width, heatmap_height] 160 | ) 161 | 162 | return preds, maxvals 163 | -------------------------------------------------------------------------------- /lib/core/loss.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import, division, print_function 8 | 9 | import numpy as np 10 | 11 | import torch 12 | import torch.nn as nn 13 | 14 | 15 | class JointsMSELoss(nn.Module): 16 | def __init__(self, use_target_weight, smooth_L1=False): 17 | super(JointsMSELoss, self).__init__() 18 | if smooth_L1: 19 | self.criterion = nn.MSELoss(reduction='mean') 20 | else: 21 | self.criterion = nn.SmoothL1Loss() 22 | 23 | self.use_target_weight = use_target_weight 24 | 25 | def forward(self, output, target, target_weight): 26 | batch_size = output.size(0) 27 | num_joints = output.size(1) 28 | 29 | def tofloat(x): 30 | if x.dtype == torch.float64 or x.dtype == torch.double: 31 | x = x.float() 32 | return x 33 | 34 | output = tofloat(output) 35 | target = tofloat(target) 36 | target_weight = tofloat(target_weight) 37 | 38 | if output.dim() == 4: 39 | heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1) 40 | heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1) 41 | 42 | else: 43 | heatmaps_pred = output 44 | heatmaps_gt = target 45 | 46 | loss = 0 47 | 48 | for idx in range(num_joints): 49 | if not isinstance(heatmaps_pred, tuple) and heatmaps_pred.shape[2] == 2: 50 | heatmap_pred = heatmaps_pred[:,idx].squeeze() 51 | heatmap_gt = heatmaps_gt[:,idx].squeeze() 52 | 53 | else: 54 | heatmap_pred = heatmaps_pred[idx].squeeze() 55 | heatmap_gt = heatmaps_gt[idx].squeeze() 56 | 57 | if self.use_target_weight: 58 | loss += 0.5 * self.criterion( 59 | heatmap_pred.mul(target_weight[:, idx]), 60 | heatmap_gt.mul(target_weight[:, idx]) 61 | ) 62 | else: 63 | loss += 0.5 * self.criterion(heatmap_pred, heatmap_gt) 64 | 65 | return loss / num_joints 66 | 67 | 68 | class JointsOHKMMSELoss(nn.Module): 69 | def __init__(self, use_target_weight, topk=8): 70 | super(JointsOHKMMSELoss, self).__init__() 71 | self.criterion = nn.MSELoss(reduction='none') 72 | self.use_target_weight = use_target_weight 73 | self.topk = topk 74 | 75 | def ohkm(self, loss): 76 | ohkm_loss = 0. 77 | for i in range(loss.size()[0]): 78 | sub_loss = loss[i] 79 | topk_val, topk_idx = torch.topk( 80 | sub_loss, k=self.topk, dim=0, sorted=False 81 | ) 82 | tmp_loss = torch.gather(sub_loss, 0, topk_idx) 83 | ohkm_loss += torch.sum(tmp_loss) / self.topk 84 | ohkm_loss /= loss.size()[0] 85 | return ohkm_loss 86 | 87 | def forward(self, output, target, target_weight): 88 | batch_size = output.size(0) 89 | num_joints = output.size(1) 90 | heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1) 91 | heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1) 92 | 93 | loss = [] 94 | for idx in range(num_joints): 95 | heatmap_pred = heatmaps_pred[idx].squeeze() 96 | heatmap_gt = heatmaps_gt[idx].squeeze() 97 | if self.use_target_weight: 98 | loss.append(0.5 * self.criterion( 99 | heatmap_pred.mul(target_weight[:, idx]), 100 | heatmap_gt.mul(target_weight[:, idx]) 101 | )) 102 | else: 103 | loss.append( 104 | 0.5 * self.criterion(heatmap_pred, heatmap_gt) 105 | ) 106 | 107 | loss = [l.mean(dim=1).unsqueeze(dim=1) for l in loss] 108 | loss = torch.cat(loss, dim=1) 109 | 110 | return self.ohkm(loss) 111 | -------------------------------------------------------------------------------- /lib/dataset/JointsDataset.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import copy 12 | import logging 13 | import random 14 | 15 | import cv2, os 16 | import numpy as np 17 | import torch 18 | from torch.utils.data import Dataset 19 | 20 | from utils.transforms import get_affine_transform 21 | from utils.transforms import affine_transform 22 | from utils.transforms import fliplr_joints 23 | from imagecorruptions import corrupt, get_corruption_names 24 | from .advaug import MixCombine 25 | 26 | logger = logging.getLogger(__name__) 27 | 28 | 29 | class JointsDataset(Dataset): 30 | def __init__(self, cfg, args, root, image_set, is_train, transform=None): 31 | 32 | self.args = args 33 | self.num_joints = 0 34 | self.pixel_std = 200 35 | self.flip_pairs = [] 36 | self.parent_ids = [] 37 | 38 | self.is_train = is_train 39 | self.root = root 40 | self.image_set = image_set 41 | 42 | self.output_path = cfg.OUTPUT_DIR 43 | self.data_format = cfg.DATASET.DATA_FORMAT 44 | 45 | self.scale_factor = cfg.DATASET.SCALE_FACTOR 46 | self.rotation_factor = cfg.DATASET.ROT_FACTOR 47 | self.flip = cfg.DATASET.FLIP 48 | self.num_joints_half_body = cfg.DATASET.NUM_JOINTS_HALF_BODY 49 | self.prob_half_body = cfg.DATASET.PROB_HALF_BODY 50 | self.color_rgb = cfg.DATASET.COLOR_RGB 51 | 52 | self.target_type = cfg.MODEL.TARGET_TYPE 53 | self.image_size = np.array(cfg.MODEL.IMAGE_SIZE) 54 | self.heatmap_size = np.array(cfg.MODEL.HEATMAP_SIZE) 55 | self.sigma = cfg.MODEL.SIGMA 56 | self.use_different_joints_weight = cfg.LOSS.USE_DIFFERENT_JOINTS_WEIGHT 57 | self.joints_weight = 1 58 | 59 | self.transform = transform 60 | self.get_varaug = MixCombine() 61 | self.db = [] 62 | 63 | def _get_db(self): 64 | raise NotImplementedError 65 | 66 | def evaluate(self, cfg, preds, output_dir, *args, **kwargs): 67 | raise NotImplementedError 68 | 69 | def half_body_transform(self, joints, joints_vis): 70 | upper_joints = [] 71 | lower_joints = [] 72 | for joint_id in range(self.num_joints): 73 | if joints_vis[joint_id][0] > 0: 74 | if joint_id in self.upper_body_ids: 75 | upper_joints.append(joints[joint_id]) 76 | else: 77 | lower_joints.append(joints[joint_id]) 78 | 79 | if np.random.randn() < 0.5 and len(upper_joints) > 2: 80 | selected_joints = upper_joints 81 | else: 82 | selected_joints = lower_joints \ 83 | if len(lower_joints) > 2 else upper_joints 84 | 85 | if len(selected_joints) < 2: 86 | return None, None 87 | 88 | selected_joints = np.array(selected_joints, dtype=np.float32) 89 | center = selected_joints.mean(axis=0)[:2] 90 | 91 | left_top = np.amin(selected_joints, axis=0) 92 | right_bottom = np.amax(selected_joints, axis=0) 93 | 94 | w = right_bottom[0] - left_top[0] 95 | h = right_bottom[1] - left_top[1] 96 | 97 | if w > self.aspect_ratio * h: 98 | h = w * 1.0 / self.aspect_ratio 99 | elif w < self.aspect_ratio * h: 100 | w = h * self.aspect_ratio 101 | 102 | scale = np.array( 103 | [ 104 | w * 1.0 / self.pixel_std, 105 | h * 1.0 / self.pixel_std 106 | ], 107 | dtype=np.float32 108 | ) 109 | 110 | scale = scale * 1.5 111 | return center, scale 112 | 113 | def __len__(self,): 114 | return len(self.db) 115 | 116 | 117 | def __getitem__(self, idx): 118 | if self.args.sample_times == 1 or not self.is_train: 119 | input, target, target_weight, meta = self.get_clean(idx) 120 | return input, target, target_weight, meta 121 | else: 122 | input_list, target_list, target_weight_list, meta_list = [],[],[],[] 123 | meta, input_base = self.get_base(idx) 124 | get_cleans = ['clean', 'autoaug', 'gridmask'] 125 | for sample in range(len(get_cleans)): 126 | get_clean = get_cleans[sample] 127 | input, target, target_weight, meta = self.get_var(meta, input_base, get_clean=get_clean) 128 | input_list.append(input) 129 | target_list.append(target) 130 | target_weight_list.append(target_weight) 131 | meta_list.append(meta) 132 | 133 | return input_list, target_list, target_weight_list, meta_list 134 | 135 | def get_base(self, idx): 136 | db_rec = copy.deepcopy(self.db[idx]) 137 | 138 | image_file = db_rec['image'] 139 | filename = db_rec['filename'] if 'filename' in db_rec else '' 140 | imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' 141 | 142 | if self.data_format == 'zip': 143 | from utils import zipreader 144 | data_numpy = zipreader.imread( 145 | image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION 146 | ) 147 | else: 148 | data_numpy = cv2.imread( 149 | image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION 150 | ) 151 | 152 | if self.color_rgb: 153 | data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) 154 | 155 | if data_numpy is None: 156 | logger.error('=> fail to read {}'.format(image_file)) 157 | raise ValueError('Fail to read {}'.format(image_file)) 158 | 159 | joints = db_rec['joints_3d'] 160 | joints_vis = db_rec['joints_3d_vis'] 161 | 162 | c = db_rec['center'] 163 | s = db_rec['scale'] 164 | score = db_rec['score'] if 'score' in db_rec else 1 165 | r = 0 166 | 167 | if self.is_train: 168 | if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body 169 | and np.random.rand() < self.prob_half_body): 170 | c_half_body, s_half_body = self.half_body_transform( 171 | joints, joints_vis 172 | ) 173 | 174 | if c_half_body is not None and s_half_body is not None: 175 | c, s = c_half_body, s_half_body 176 | 177 | sf = self.scale_factor 178 | rf = self.rotation_factor 179 | s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) 180 | 181 | r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ 182 | if random.random() <= 0.6 else 0 183 | 184 | if self.flip and random.random() <= 0.5: 185 | data_numpy = data_numpy[:, ::-1, :] 186 | joints, joints_vis = fliplr_joints( 187 | joints, joints_vis, data_numpy.shape[1], self.flip_pairs) 188 | c[0] = data_numpy.shape[1] - c[0] - 1 189 | 190 | trans = get_affine_transform(c, s, r, self.image_size) 191 | input = cv2.warpAffine( 192 | data_numpy, 193 | trans, 194 | (int(self.image_size[0]), int(self.image_size[1])), 195 | flags=cv2.INTER_LINEAR) 196 | 197 | for i in range(self.num_joints): 198 | if joints_vis[i, 0] > 0.0: 199 | joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) 200 | 201 | if 'style' in filename: 202 | datasetname = 'style' 203 | else: 204 | datasetname = 'clean' 205 | 206 | if 'instance_index' not in db_rec: 207 | db_rec['instance_index'] = -1 208 | 209 | meta = { 210 | 'image': image_file, 211 | 'filename': filename, 212 | 'imgnum': imgnum, 213 | 'joints': joints, 214 | 'joints_vis': joints_vis, 215 | 'center': c, 216 | 'scale': s, 217 | 'rotation': r, 218 | 'score': score, 219 | 'dataset': datasetname, 220 | 'instance_index':db_rec['instance_index'] 221 | } 222 | 223 | return meta, input 224 | 225 | def get_var(self, meta, input, get_clean=False): 226 | joints = meta['joints'] 227 | joints_vis = meta['joints_vis'] 228 | 229 | inputs = { 230 | 'data_numpy':input, 231 | 'img_label':1, 232 | 'joints_3d':joints, 233 | 'joints_3d_vis':joints_vis, 234 | 'dataset':meta['dataset'] 235 | } 236 | 237 | inputs, _ = self.get_varaug((inputs, get_clean, self.args), self.transform) 238 | 239 | input = inputs['data_numpy'] 240 | joints = inputs['joints_3d'] 241 | joints_vis = inputs['joints_3d_vis'] 242 | 243 | target, target_weight = self.generate_target(joints, joints_vis) 244 | 245 | if isinstance(target, list): 246 | for index in range(len(target)): 247 | target[index] = torch.from_numpy(target[index]) 248 | else: 249 | target = torch.from_numpy(target) 250 | 251 | target_weight = torch.from_numpy(target_weight) 252 | 253 | if isinstance(target, list): 254 | return input, target[0], target_weight, meta 255 | else: 256 | return input, target, target_weight, meta 257 | 258 | def get_clean(self, idx): 259 | corruptions = [ 260 | 'gaussian_noise', 'shot_noise', 'impulse_noise', 261 | 'defocus_blur', 'glass_blur', 'motion_blur', 'zoom_blur', 262 | 'snow', 'frost', 'fog', 'brightness', 263 | 'contrast', 'elastic_transform', 'pixelate', 'jpeg_compression', 264 | ] 265 | db_rec = copy.deepcopy(self.db[idx]) 266 | 267 | image_file = db_rec['image'] 268 | filename = db_rec['filename'] if 'filename' in db_rec else '' 269 | imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' 270 | 271 | if self.data_format == 'zip': 272 | from utils import zipreader 273 | data_numpy = zipreader.imread( 274 | image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION 275 | ) 276 | else: 277 | data_numpy = cv2.imread( 278 | image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION 279 | ) 280 | 281 | if self.color_rgb: 282 | data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) 283 | 284 | if self.args.random_corruption: 285 | print('=> random augmentation') 286 | data_numpy = corrupt(data_numpy, corruption_name=random.choice(corruptions), severity=random.randint(1,5)) 287 | 288 | if data_numpy is None: 289 | logger.error('=> fail to read {}'.format(image_file)) 290 | raise ValueError('Fail to read {}'.format(image_file)) 291 | 292 | joints = db_rec['joints_3d'] 293 | joints_vis = db_rec['joints_3d_vis'] 294 | 295 | c = db_rec['center'] 296 | s = db_rec['scale'] 297 | score = db_rec['score'] if 'score' in db_rec else 1 298 | r = 0 299 | 300 | if self.is_train: 301 | if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body 302 | and np.random.rand() < self.prob_half_body): 303 | c_half_body, s_half_body = self.half_body_transform( 304 | joints, joints_vis 305 | ) 306 | 307 | if c_half_body is not None and s_half_body is not None: 308 | c, s = c_half_body, s_half_body 309 | 310 | sf = self.scale_factor 311 | rf = self.rotation_factor 312 | s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) 313 | 314 | r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ 315 | if random.random() <= 0.6 else 0 316 | 317 | if self.flip and random.random() <= 0.5: 318 | data_numpy = data_numpy[:, ::-1, :] 319 | joints, joints_vis = fliplr_joints( 320 | joints, joints_vis, data_numpy.shape[1], self.flip_pairs) 321 | c[0] = data_numpy.shape[1] - c[0] - 1 322 | 323 | 324 | trans = get_affine_transform(c, s, r, self.image_size) 325 | input = cv2.warpAffine( 326 | data_numpy, 327 | trans, 328 | (int(self.image_size[0]), int(self.image_size[1])), 329 | flags=cv2.INTER_LINEAR) 330 | 331 | if self.transform: 332 | input = self.transform(input) 333 | 334 | for i in range(self.num_joints): 335 | if joints_vis[i, 0] > 0.0: 336 | joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) 337 | 338 | target, target_weight = self.generate_target(joints, joints_vis) 339 | 340 | if isinstance(target, list): 341 | for index in range(len(target)): 342 | target[index] = torch.from_numpy(target[index]) 343 | else: 344 | target = torch.from_numpy(target) 345 | 346 | target_weight = torch.from_numpy(target_weight) 347 | 348 | if 'instance_index' not in db_rec: 349 | db_rec['instance_index'] = -1 350 | 351 | meta = { 352 | 'image': image_file, 353 | 'filename': filename, 354 | 'imgnum': imgnum, 355 | 'joints': joints, 356 | 'joints_vis': joints_vis, 357 | 'center': c, 358 | 'scale': s, 359 | 'rotation': r, 360 | 'score': score, 361 | 'instance_index':db_rec['instance_index'] 362 | } 363 | 364 | return input, target, target_weight, meta 365 | 366 | def select_data(self, db): 367 | db_selected = [] 368 | for rec in db: 369 | num_vis = 0 370 | joints_x = 0.0 371 | joints_y = 0.0 372 | for joint, joint_vis in zip( 373 | rec['joints_3d'], rec['joints_3d_vis']): 374 | if joint_vis[0] <= 0: 375 | continue 376 | num_vis += 1 377 | 378 | joints_x += joint[0] 379 | joints_y += joint[1] 380 | if num_vis == 0: 381 | continue 382 | 383 | joints_x, joints_y = joints_x / num_vis, joints_y / num_vis 384 | 385 | area = rec['scale'][0] * rec['scale'][1] * (self.pixel_std**2) 386 | 387 | joints_center = np.array([joints_x, joints_y]) 388 | bbox_center = np.array(rec['center']) 389 | diff_norm2 = np.linalg.norm((joints_center-bbox_center), 2) 390 | ks = np.exp(-1.0*(diff_norm2**2) / ((0.2)**2*2.0*area)) 391 | 392 | metric = (0.2 / 16) * num_vis + 0.45 - 0.2 / 16 393 | 394 | if ks > metric: 395 | db_selected.append(rec) 396 | 397 | logger.info('=> num db: {}'.format(len(db))) 398 | logger.info('=> num selected db: {}'.format(len(db_selected))) 399 | return db_selected 400 | 401 | def get_forground_image(self, img): 402 | mask = np.zeros(img.shape[:2],np.uint8) 403 | bgdModel = np.zeros((1,65),np.float64) 404 | fgdModel = np.zeros((1,65),np.float64) 405 | rect = (0, 0, img.shape[1] - 1, img.shape[0] - 1) 406 | cv2.grabCut(img,mask,rect,bgdModel,fgdModel,5,cv2.GC_INIT_WITH_RECT) 407 | mask2 = np.where((mask==2)|(mask==0),0,1).astype('uint8') 408 | img = img*mask2[:,:,np.newaxis] 409 | cv2.imwrite(os.path.dirname((img)) + '_mask' + '/' + os.path.basename(img), img) 410 | return img 411 | 412 | def generate_target(self, joints, joints_vis): 413 | ''' 414 | :param joints: [num_joints, 3] 415 | :param joints_vis: [num_joints, 3] 416 | :return: target, target_weight(1: visible, 0: invisible) 417 | ''' 418 | target_weight = np.ones((self.num_joints, 1), dtype=np.float32) 419 | target_weight[:, 0] = joints_vis[:, 0] 420 | 421 | assert self.target_type == 'gaussian', \ 422 | 'Only support gaussian map now!' 423 | 424 | if self.target_type == 'gaussian': 425 | target = [np.zeros((self.num_joints, 426 | self.heatmap_size[1], 427 | self.heatmap_size[0]), 428 | dtype=np.float32), np.zeros((self.num_joints, 2), dtype=np.float32)] 429 | tmp_size = self.sigma * 3 430 | if False: 431 | tmp_hm = self.heatmap_size 432 | for joint_id in range(self.num_joints): 433 | heatmap_vis = joints_vis[joint_id, 0] 434 | target_weight[joint_id] = heatmap_vis 435 | feat_stride = self.image_size / tmp_hm 436 | mu_x = joints[joint_id][0] / feat_stride[0] 437 | mu_y = joints[joint_id][1] / feat_stride[1] 438 | ul = [mu_x - tmp_size, mu_y - tmp_size] 439 | br = [mu_x + tmp_size + 1, mu_y + tmp_size + 1] 440 | if ul[0] >= tmp_hm[0] or ul[1] >= tmp_hm[1] or br[0] < 0 or br[1] < 0: 441 | target_weight[joint_id] = 0 442 | 443 | if target_weight[joint_id] == 0: 444 | continue 445 | 446 | x = np.arange(0, tmp_hm[0], 1, np.float32) 447 | y = np.arange(0, tmp_hm[1], 1, np.float32) 448 | y = y[:, np.newaxis] 449 | 450 | v = target_weight[joint_id] 451 | if v > 0.5: 452 | target[joint_id] = np.exp(- ((x - mu_x) ** 2 + (y - mu_y) ** 2) / (2 * self.sigma ** 2)) 453 | 454 | else: 455 | for joint_id in range(self.num_joints): 456 | feat_stride = self.image_size / self.heatmap_size 457 | mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5) 458 | mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5) 459 | # Check that any part of the gaussian is in-bounds 460 | ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] 461 | br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] 462 | 463 | if ul[0] >= self.heatmap_size[0] or ul[1] >= self.heatmap_size[1] \ 464 | or br[0] < 0 or br[1] < 0: 465 | target_weight[joint_id] = 0 466 | continue 467 | 468 | # Generate gaussian 469 | size = 2 * tmp_size + 1 470 | x = np.arange(0, size, 1, np.float32) 471 | y = x[:, np.newaxis] 472 | x0 = y0 = size // 2 473 | # The gaussian is not normalized, we want the center value to equal 1 474 | g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * self.sigma ** 2)) 475 | 476 | g_x = max(0, -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0] 477 | g_y = max(0, -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1] 478 | img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0]) 479 | img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1]) 480 | 481 | v = target_weight[joint_id] 482 | if v > 0.5: 483 | target[0][joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \ 484 | g[g_y[0]:g_y[1], g_x[0]:g_x[1]] 485 | 486 | target[1][joint_id] = np.array([mu_x, mu_y], dtype=np.float32) 487 | 488 | if self.use_different_joints_weight: 489 | target_weight = np.multiply(target_weight, self.joints_weight) 490 | 491 | return target, target_weight 492 | 493 | 494 | 495 | 496 | 497 | -------------------------------------------------------------------------------- /lib/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from .mpii import MPIIDataset as mpii 12 | from .coco import COCODataset as coco 13 | -------------------------------------------------------------------------------- /lib/dataset/advaug.py: -------------------------------------------------------------------------------- 1 | from PIL import Image, ImageEnhance, ImageOps 2 | import numpy as np 3 | import random 4 | import torch 5 | import logging 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class ImageNetPolicy(object): 11 | """ Randomly choose one of the best 24 Sub-policies on ImageNet. 12 | Example: 13 | >>> policy = ImageNetPolicy() 14 | >>> transformed = policy(image) 15 | Example as a PyTorch Transform: 16 | >>> transform=transforms.Compose([ 17 | >>> transforms.Resize(256), 18 | >>> ImageNetPolicy(), 19 | >>> transforms.ToTensor()]) 20 | """ 21 | def __init__(self, fillcolor=(128, 128, 128)): 22 | self.policies = [ 23 | SubPolicy(0.8, "equalize", 8, 0.6, "equalize", 3, fillcolor), 24 | SubPolicy(0.6, "posterize", 7, 0.6, "posterize", 6, fillcolor), 25 | SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor), 26 | SubPolicy(0.6, "solarize", 3, 0.6, "equalize", 7, fillcolor), 27 | SubPolicy(0.8, "posterize", 5, 1.0, "equalize", 2, fillcolor), 28 | SubPolicy(0.6, "equalize", 8, 0.4, "posterize", 6, fillcolor), 29 | SubPolicy(0.0, "equalize", 7, 0.8, "equalize", 8, fillcolor), 30 | SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor), 31 | SubPolicy(0.4, "sharpness", 7, 0.6, "invert", 8, fillcolor), 32 | SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor), 33 | SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor), 34 | SubPolicy(0.8, "equalize", 8, 0.6, "equalize", 3, fillcolor) 35 | ] 36 | 37 | def __call__(self, img): 38 | policy_idx = random.randint(0, len(self.policies) - 1) 39 | return self.policies[policy_idx](img) 40 | 41 | def __repr__(self): 42 | return "AutoAugment ImageNet Policy" 43 | 44 | 45 | 46 | class SubPolicy(object): 47 | def __init__(self, p1, operation1, magnitude_idx1, p2, operation2, magnitude_idx2, fillcolor=(128, 128, 128)): 48 | ranges = { 49 | "shearX": np.linspace(0, 0.3, 10), 50 | "shearY": np.linspace(0, 0.3, 10), 51 | "translateX": np.linspace(0, 150 / 331, 10), 52 | "translateY": np.linspace(0, 150 / 331, 10), 53 | "rotate": np.linspace(0, 30, 10), 54 | "color": np.linspace(0.0, 0.9, 10), 55 | "posterize": np.round(np.linspace(8, 4, 10), 0).astype(np.int), 56 | "solarize": np.linspace(256, 0, 10), 57 | "contrast": np.linspace(0.0, 0.9, 10), 58 | "sharpness": np.linspace(0.0, 0.9, 10), 59 | "brightness": np.linspace(0.0, 0.9, 10), 60 | "autocontrast": [0] * 10, 61 | "equalize": [0] * 10, 62 | "invert": [0] * 10 63 | } 64 | 65 | def rotate_with_fill(img, magnitude): 66 | rot = img.convert("RGBA").rotate(magnitude) 67 | return Image.composite(rot, Image.new("RGBA", rot.size, (128,) * 4), rot).convert(img.mode) 68 | 69 | func = { 70 | "shearX": lambda img, magnitude: img.transform( 71 | img.size, Image.AFFINE, (1, magnitude * random.choice([-1, 1]), 0, 0, 1, 0), 72 | Image.BICUBIC, fillcolor=fillcolor), 73 | "shearY": lambda img, magnitude: img.transform( 74 | img.size, Image.AFFINE, (1, 0, 0, magnitude * random.choice([-1, 1]), 1, 0), 75 | Image.BICUBIC, fillcolor=fillcolor), 76 | "translateX": lambda img, magnitude: img.transform( 77 | img.size, Image.AFFINE, (1, 0, magnitude * img.size[0] * random.choice([-1, 1]), 0, 1, 0), 78 | fillcolor=fillcolor), 79 | "translateY": lambda img, magnitude: img.transform( 80 | img.size, Image.AFFINE, (1, 0, 0, 0, 1, magnitude * img.size[1] * random.choice([-1, 1])), 81 | fillcolor=fillcolor), 82 | "rotate": lambda img, magnitude: rotate_with_fill(img, magnitude), 83 | "color": lambda img, magnitude: ImageEnhance.Color(img).enhance(1 + magnitude * random.choice([-1, 1])), 84 | "posterize": lambda img, magnitude: ImageOps.posterize(img, magnitude), 85 | "solarize": lambda img, magnitude: ImageOps.solarize(img, magnitude), 86 | "contrast": lambda img, magnitude: ImageEnhance.Contrast(img).enhance( 87 | 1 + magnitude * random.choice([-1, 1])), 88 | "sharpness": lambda img, magnitude: ImageEnhance.Sharpness(img).enhance( 89 | 1 + magnitude * random.choice([-1, 1])), 90 | "brightness": lambda img, magnitude: ImageEnhance.Brightness(img).enhance( 91 | 1 + magnitude * random.choice([-1, 1])), 92 | "autocontrast": lambda img, magnitude: ImageOps.autocontrast(img), 93 | "equalize": lambda img, magnitude: ImageOps.equalize(img), 94 | "invert": lambda img, magnitude: ImageOps.invert(img) 95 | } 96 | self.p1 = p1 97 | self.operation1 = func[operation1] 98 | self.magnitude1 = ranges[operation1][magnitude_idx1] 99 | self.p2 = p2 100 | self.operation2 = func[operation2] 101 | self.magnitude2 = ranges[operation2][magnitude_idx2] 102 | 103 | 104 | def __call__(self, img): 105 | if random.random() < self.p1: img = self.operation1(img, self.magnitude1) 106 | if random.random() < self.p2: img = self.operation2(img, self.magnitude2) 107 | return img 108 | 109 | 110 | 111 | def grid_aug(cfg, img, joints, joints_vis, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1., db_rec=None): 112 | if np.random.rand() > prob: 113 | return img, joints, joints_vis,db_rec 114 | db_rec['img_label'] = 1 115 | h = img.size(1) 116 | w = img.size(2) 117 | d1 = 2 118 | d2 = min(h, w) 119 | hh = int(1.5*h) 120 | ww = int(1.5*w) 121 | d = np.random.randint(d1, d2) 122 | if ratio == 1: 123 | l = np.random.randint(1, d) 124 | else: 125 | l = min(max(int(d*ratio+0.5),1),d-1) 126 | mask = np.ones((hh, ww), np.float32) 127 | st_h = np.random.randint(d) 128 | st_w = np.random.randint(d) 129 | if use_h: 130 | for i in range(hh//d): 131 | s = d*i + st_h 132 | t = min(s+l, hh) 133 | mask[s:t,:] *= 0 134 | if use_w: 135 | for i in range(ww//d): 136 | s = d*i + st_w 137 | t = min(s+l, ww) 138 | mask[:,s:t] *= 0 139 | 140 | r = np.random.randint(rotate) 141 | mask = Image.fromarray(np.uint8(mask)) 142 | mask = mask.rotate(r) 143 | mask = np.asarray(mask) 144 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w] 145 | 146 | mask = torch.from_numpy(mask).float() 147 | if mode == 1: 148 | mask = 1-mask 149 | 150 | tmp_mask = mask 151 | mask = mask.expand_as(img) 152 | if offset: 153 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float() 154 | offset = (1 - mask) * offset 155 | img = img * mask + offset 156 | else: 157 | img = img * mask 158 | 159 | for joint_id in range(cfg.joints_num): 160 | joint = joints[joint_id][:2] 161 | tmp_x = min(int(joint[0]), tmp_mask.shape[1] - 1) 162 | tmp_x = max(tmp_x, 0) 163 | tmp_y = min(int(joint[1]), tmp_mask.shape[0] - 1) 164 | tmp_y = max(tmp_y, 0) 165 | 166 | if tmp_mask[tmp_y, tmp_x] == 0: 167 | joints_vis[joint_id][0] = 0 168 | joints_vis[joint_id][1] = 0 169 | 170 | return img, joints, joints_vis,db_rec 171 | 172 | 173 | class MixCombine(object): 174 | def __init__(self, to_float32=False): 175 | self.to_float32 = to_float32 176 | self.autoaug = ImageNetPolicy(fillcolor=(128, 128, 128)) 177 | def __call__(self, inputs, transform): 178 | db_rec, get_clean, cfg = inputs 179 | if get_clean != 'clean': 180 | if get_clean == 'autoaug': 181 | if db_rec['dataset'] != 'style' or not cfg.sp_style: 182 | db_rec['img_label'] = 1 183 | data_numpy = db_rec['data_numpy'] 184 | tmp_img = Image.fromarray(data_numpy.astype(np.uint8)) 185 | data_numpy = self.autoaug(tmp_img) 186 | db_rec['data_numpy'] = np.array(data_numpy) 187 | db_rec['data_numpy'] = transform(db_rec['data_numpy']) 188 | 189 | elif get_clean == 'gridmask': 190 | db_rec['data_numpy'] = transform(db_rec['data_numpy']) 191 | if db_rec['dataset'] != 'style' or not cfg.sp_style: 192 | rotate = 1 193 | offset=False 194 | ratio = 0.5 195 | mode=1 196 | prob = 0.7 197 | self.st_prob = prob 198 | data_numpy = db_rec['data_numpy'] 199 | joints = db_rec['joints_3d'] 200 | joints_vis = db_rec['joints_3d_vis'] 201 | 202 | data_numpy, joints, joints_vis, db_rec = grid_aug(cfg, data_numpy, joints, joints_vis, True, True, rotate, offset, ratio, mode, prob, db_rec) 203 | 204 | db_rec['data_numpy'] = data_numpy 205 | db_rec['joints_3d'] = joints 206 | db_rec['joints_3d_vis'] = joints_vis 207 | 208 | else: 209 | db_rec['data_numpy'] = transform(db_rec['data_numpy']) 210 | 211 | return db_rec, cfg 212 | 213 | def set_prob(self, epoch, max_epoch): 214 | self.prob = self.st_prob * epoch / max_epoch 215 | -------------------------------------------------------------------------------- /lib/dataset/coco.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from collections import defaultdict 12 | from collections import OrderedDict 13 | import logging 14 | import os, cv2 15 | 16 | from pycocotools.coco import COCO 17 | from pycocotools.cocoeval import COCOeval 18 | import json_tricks as json 19 | import numpy as np 20 | 21 | from dataset.JointsDataset import JointsDataset 22 | from nms.nms import oks_nms 23 | from nms.nms import soft_oks_nms 24 | 25 | 26 | logger = logging.getLogger(__name__) 27 | 28 | 29 | class COCODataset(JointsDataset): 30 | def __init__(self, cfg, args ,root, image_set, is_train, transform=None): 31 | super().__init__(cfg, args, root, image_set, is_train, transform) 32 | self.args = args 33 | self.nms_thre = cfg.TEST.NMS_THRE 34 | self.image_thre = cfg.TEST.IMAGE_THRE 35 | self.soft_nms = cfg.TEST.SOFT_NMS 36 | self.oks_thre = cfg.TEST.OKS_THRE 37 | self.in_vis_thre = cfg.TEST.IN_VIS_THRE 38 | self.bbox_file = cfg.TEST.COCO_BBOX_FILE 39 | self.use_gt_bbox = cfg.TEST.USE_GT_BBOX 40 | self.image_width = cfg.MODEL.IMAGE_SIZE[0] 41 | self.image_height = cfg.MODEL.IMAGE_SIZE[1] 42 | self.aspect_ratio = self.image_width * 1.0 / self.image_height 43 | self.pixel_std = 200 44 | 45 | # add paramters for test robustness 46 | self.test_robust = cfg.TEST.TEST_ROBUST 47 | self.corruption_type = cfg.TEST.CORRUPTION_TYPE 48 | self.root_c = cfg.DATASET.ROOT_C 49 | self.severity = cfg.TEST.SEVERITY 50 | self.mini_coco = cfg.DATASET.MINI_COCO 51 | self.coco = COCO(self._get_ann_file_keypoint()) 52 | 53 | cats = [cat['name'] 54 | for cat in self.coco.loadCats(self.coco.getCatIds())] 55 | self.classes = ['__background__'] + cats 56 | logger.info('=> classes: {}'.format(self.classes)) 57 | self.num_classes = len(self.classes) 58 | self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) 59 | self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) 60 | self._coco_ind_to_class_ind = dict( 61 | [ 62 | (self._class_to_coco_ind[cls], self._class_to_ind[cls]) 63 | for cls in self.classes[1:] 64 | ] 65 | ) 66 | self.image_set_index = self._load_image_set_index() 67 | self.num_images = len(self.image_set_index) 68 | logger.info('=> num_images: {}'.format(self.num_images)) 69 | 70 | self.num_joints = 17 71 | self.flip_pairs = [[1, 2], [3, 4], [5, 6], [7, 8], 72 | [9, 10], [11, 12], [13, 14], [15, 16]] 73 | self.parent_ids = None 74 | self.upper_body_ids = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) 75 | self.lower_body_ids = (11, 12, 13, 14, 15, 16) 76 | 77 | self.joints_weight = np.array( 78 | [ 79 | 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 80 | 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, 1.5 81 | ], 82 | dtype=np.float32 83 | ).reshape((self.num_joints, 1)) 84 | 85 | self.db = self._get_db() 86 | 87 | if is_train and cfg.DATASET.SELECT_DATA: 88 | self.db = self.select_data(self.db) 89 | 90 | logger.info('=> load {} samples'.format(len(self.db))) 91 | 92 | def _get_ann_file_keypoint(self): 93 | """ self.root / annotations / person_keypoints_train2017.json """ 94 | prefix = 'person_keypoints' \ 95 | if 'test' not in self.image_set else 'image_info' 96 | 97 | json_path = os.path.join( 98 | self.root, 99 | 'annotations', 100 | prefix + '_' + self.image_set + '.json' 101 | ) 102 | if not os.path.exists(json_path): 103 | json_path = os.path.join( 104 | 'data/coco', 105 | 'annotations', 106 | prefix + '_' + self.image_set + '.json' 107 | ) 108 | return json_path 109 | 110 | def _load_image_set_index(self): 111 | """ image id: int """ 112 | if self.mini_coco: 113 | image_ids = self.coco.getImgIds()[:200] 114 | else: 115 | image_ids = self.coco.getImgIds() 116 | return image_ids 117 | 118 | def _get_db(self): 119 | if self.is_train or self.use_gt_bbox: 120 | gt_db = self._load_coco_keypoint_annotations() 121 | else: 122 | if self.mini_coco: 123 | gt_db = self._load_coco_keypoint_annotations() 124 | else: 125 | gt_db = self._load_coco_person_detection_results() 126 | return gt_db 127 | 128 | def _load_coco_keypoint_annotations(self): 129 | """ ground truth bbox and keypoints """ 130 | gt_db = [] 131 | for index in self.image_set_index: 132 | gt_db.extend(self._load_coco_keypoint_annotation_kernal(index)) 133 | return gt_db 134 | 135 | def _load_coco_keypoint_annotation_kernal(self, index): 136 | """ 137 | coco ann: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] 138 | iscrowd: 139 | crowd instances are handled by marking their overlaps with all categories to -1 140 | and later excluded in training 141 | bbox: 142 | [x1, y1, w, h] 143 | :param index: coco image id 144 | :return: db entry 145 | """ 146 | im_ann = self.coco.loadImgs(index)[0] 147 | width = im_ann['width'] 148 | height = im_ann['height'] 149 | 150 | annIds = self.coco.getAnnIds(imgIds=index, iscrowd=False) 151 | objs = self.coco.loadAnns(annIds) 152 | 153 | valid_objs = [] 154 | for obj in objs: 155 | x, y, w, h = obj['bbox'] 156 | x1 = np.max((0, x)) 157 | y1 = np.max((0, y)) 158 | x2 = np.min((width - 1, x1 + np.max((0, w - 1)))) 159 | y2 = np.min((height - 1, y1 + np.max((0, h - 1)))) 160 | if obj['area'] > 0 and x2 >= x1 and y2 >= y1: 161 | obj['clean_bbox'] = [x1, y1, x2-x1, y2-y1] 162 | valid_objs.append(obj) 163 | objs = valid_objs 164 | 165 | rec = [] 166 | for kobj, obj in enumerate(objs): 167 | cls = self._coco_ind_to_class_ind[obj['category_id']] 168 | if cls != 1: 169 | continue 170 | 171 | if max(obj['keypoints']) == 0: 172 | continue 173 | 174 | joints_3d = np.zeros((self.num_joints, 3), dtype=np.float) 175 | joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float) 176 | for ipt in range(self.num_joints): 177 | joints_3d[ipt, 0] = obj['keypoints'][ipt * 3 + 0] 178 | joints_3d[ipt, 1] = obj['keypoints'][ipt * 3 + 1] 179 | joints_3d[ipt, 2] = 0 180 | t_vis = obj['keypoints'][ipt * 3 + 2] 181 | if t_vis > 1: 182 | t_vis = 1 183 | joints_3d_vis[ipt, 0] = t_vis 184 | joints_3d_vis[ipt, 1] = t_vis 185 | joints_3d_vis[ipt, 2] = 0 186 | 187 | center, scale = self._box2cs(obj['clean_bbox'][:4]) 188 | rec.append({ 189 | 'image': self.image_path_from_index(index), 190 | 'center': center, 191 | 'scale': scale, 192 | 'joints_3d': joints_3d, 193 | 'joints_3d_vis': joints_3d_vis, 194 | 'filename': '', 195 | 'imgnum': 0, 196 | 'instance_index':str(index) + '_' + str(kobj) 197 | }) 198 | 199 | return rec 200 | 201 | def _box2cs(self, box): 202 | x, y, w, h = box[:4] 203 | return self._xywh2cs(x, y, w, h) 204 | 205 | def _xywh2cs(self, x, y, w, h): 206 | center = np.zeros((2), dtype=np.float32) 207 | center[0] = x + w * 0.5 208 | center[1] = y + h * 0.5 209 | 210 | if w > self.aspect_ratio * h: 211 | h = w * 1.0 / self.aspect_ratio 212 | elif w < self.aspect_ratio * h: 213 | w = h * self.aspect_ratio 214 | scale = np.array( 215 | [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], 216 | dtype=np.float32) 217 | 218 | if center[0] != -1: 219 | scale = scale * 1.25 220 | return center, scale 221 | 222 | def image_path_from_index(self, index): 223 | """ example: images / train2017 / 000000119993.jpg """ 224 | # testing the robustnss 225 | if self.test_robust and self.corruption_type != 'clean': 226 | file_name = self.corruption_type + '/' + str(self.severity) + '/' + '%012d.jpg' % index 227 | else: 228 | file_name = '%012d.jpg' % index 229 | 230 | if '2014' in self.image_set: 231 | file_name = 'COCO_%s_' % self.image_set + file_name 232 | 233 | prefix = 'test2017' if 'test' in self.image_set else self.image_set 234 | 235 | data_name = prefix + '.zip@' if self.data_format == 'zip' else prefix 236 | 237 | if self.test_robust and self.corruption_type != 'clean': 238 | image_path = os.path.join( 239 | self.root_c, file_name) 240 | else: 241 | if 'stylize_image' in self.root: 242 | image_path = os.path.join( 243 | self.root, file_name) 244 | else: 245 | image_path = os.path.join( 246 | self.root, data_name, file_name) 247 | return image_path 248 | 249 | def _load_coco_person_detection_results(self): 250 | all_boxes = None 251 | with open(self.bbox_file, 'r') as f: 252 | all_boxes = json.load(f) 253 | 254 | if not all_boxes: 255 | logger.error('=> Load %s fail!' % self.bbox_file) 256 | return None 257 | 258 | logger.info('=> Total boxes: {}'.format(len(all_boxes))) 259 | 260 | kpt_db = [] 261 | num_boxes = 0 262 | if self.mini_coco: 263 | all_boxes = all_boxes[:100] 264 | 265 | for n_img in range(0, len(all_boxes)): 266 | det_res = all_boxes[n_img] 267 | if det_res['category_id'] != 1: 268 | continue 269 | img_name = self.image_path_from_index(det_res['image_id']) 270 | box = det_res['bbox'] 271 | score = det_res['score'] 272 | 273 | if score < self.image_thre: 274 | continue 275 | 276 | num_boxes = num_boxes + 1 277 | 278 | center, scale = self._box2cs(box) 279 | joints_3d = np.zeros((self.num_joints, 3), dtype=np.float) 280 | joints_3d_vis = np.ones( 281 | (self.num_joints, 3), dtype=np.float) 282 | kpt_db.append({ 283 | 'image': img_name, 284 | 'center': center, 285 | 'scale': scale, 286 | 'score': score, 287 | 'joints_3d': joints_3d, 288 | 'joints_3d_vis': joints_3d_vis, 289 | }) 290 | 291 | logger.info('=> Total boxes after fliter low score@{}: {}'.format( 292 | self.image_thre, num_boxes)) 293 | return kpt_db 294 | 295 | def evaluate(self, cfg, preds, output_dir, all_boxes, img_path, 296 | *args, **kwargs): 297 | rank = cfg.RANK 298 | 299 | res_folder = os.path.join(output_dir, 'results') 300 | if not os.path.exists(res_folder): 301 | try: 302 | os.makedirs(res_folder) 303 | except Exception: 304 | logger.error('Fail to make {}'.format(res_folder)) 305 | 306 | if self.test_robust and self.corruption_type != 'clean': 307 | 308 | res_file = os.path.join( 309 | res_folder, 'keypoints_{}_results_{}_{}_{}.json'.format( 310 | self.image_set, rank, self.corruption_type, self.severity) 311 | ) 312 | 313 | else: 314 | res_file = os.path.join( 315 | res_folder, 'keypoints_{}_results_{}.json'.format( 316 | self.image_set, rank) 317 | ) 318 | 319 | _kpts = [] 320 | 321 | for idx, kpt in enumerate(preds): 322 | image_idx = int(img_path[idx][-16:-4]) 323 | _kpts.append({ 324 | 'keypoints': kpt, 325 | 'center': all_boxes[idx][0:2], 326 | 'scale': all_boxes[idx][2:4], 327 | 'area': all_boxes[idx][4], 328 | 'score': all_boxes[idx][5], 329 | 'image': image_idx 330 | }) 331 | kpts = defaultdict(list) 332 | for kpt in _kpts: 333 | kpts[kpt['image']].append(kpt) 334 | 335 | num_joints = self.num_joints 336 | in_vis_thre = self.in_vis_thre 337 | oks_thre = self.oks_thre 338 | oks_nmsed_kpts = [] 339 | for img in kpts.keys(): 340 | img_kpts = kpts[img] 341 | for n_p in img_kpts: 342 | box_score = n_p['score'] 343 | kpt_score = 0 344 | valid_num = 0 345 | for n_jt in range(0, num_joints): 346 | t_s = n_p['keypoints'][n_jt][2] 347 | if t_s > in_vis_thre: 348 | kpt_score = kpt_score + t_s 349 | valid_num = valid_num + 1 350 | if valid_num != 0: 351 | kpt_score = kpt_score / valid_num 352 | 353 | n_p['score'] = kpt_score * box_score 354 | 355 | if self.soft_nms: 356 | keep = soft_oks_nms( 357 | [img_kpts[i] for i in range(len(img_kpts))], 358 | oks_thre 359 | ) 360 | else: 361 | keep = oks_nms( 362 | [img_kpts[i] for i in range(len(img_kpts))], 363 | oks_thre 364 | ) 365 | 366 | if len(keep) == 0: 367 | oks_nmsed_kpts.append(img_kpts) 368 | else: 369 | oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) 370 | 371 | self._write_coco_keypoint_results( 372 | oks_nmsed_kpts, res_file) 373 | if 'test' not in self.image_set: 374 | info_str = self._do_python_keypoint_eval( 375 | res_file, res_folder) 376 | name_value = OrderedDict(info_str) 377 | return name_value, name_value['AP'] 378 | else: 379 | return {'Null': 0}, 0 380 | 381 | def _write_coco_keypoint_results(self, keypoints, res_file): 382 | data_pack = [ 383 | { 384 | 'cat_id': self._class_to_coco_ind[cls], 385 | 'cls_ind': cls_ind, 386 | 'cls': cls, 387 | 'ann_type': 'keypoints', 388 | 'keypoints': keypoints 389 | } 390 | for cls_ind, cls in enumerate(self.classes) if not cls == '__background__' 391 | ] 392 | results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) 393 | logger.info('=> writing results json to %s' % res_file) 394 | with open(res_file, 'w') as f: 395 | json.dump(results, f, sort_keys=True, indent=4) 396 | try: 397 | json.load(open(res_file)) 398 | except Exception: 399 | content = [] 400 | with open(res_file, 'r') as f: 401 | for line in f: 402 | content.append(line) 403 | content[-1] = ']' 404 | with open(res_file, 'w') as f: 405 | for c in content: 406 | f.write(c) 407 | 408 | def _coco_keypoint_results_one_category_kernel(self, data_pack): 409 | cat_id = data_pack['cat_id'] 410 | keypoints = data_pack['keypoints'] 411 | cat_results = [] 412 | for img_kpts in keypoints: 413 | if len(img_kpts) == 0: 414 | continue 415 | 416 | _key_points = np.array([img_kpts[k]['keypoints'] 417 | for k in range(len(img_kpts))]) 418 | key_points = np.zeros( 419 | (_key_points.shape[0], self.num_joints * 3), dtype=np.float 420 | ) 421 | for ipt in range(self.num_joints): 422 | key_points[:, ipt * 3 + 0] = _key_points[:, ipt, 0] 423 | key_points[:, ipt * 3 + 1] = _key_points[:, ipt, 1] 424 | key_points[:, ipt * 3 + 2] = _key_points[:, ipt, 2] 425 | 426 | result = [ 427 | { 428 | 'image_id': img_kpts[k]['image'], 429 | 'category_id': cat_id, 430 | 'keypoints': list(key_points[k]), 431 | 'score': img_kpts[k]['score'], 432 | 'center': list(img_kpts[k]['center']), 433 | 'scale': list(img_kpts[k]['scale']) 434 | } 435 | for k in range(len(img_kpts)) 436 | ] 437 | cat_results.extend(result) 438 | 439 | return cat_results 440 | 441 | def _do_python_keypoint_eval(self, res_file, res_folder): 442 | coco_dt = self.coco.loadRes(res_file) 443 | coco_eval = COCOeval(self.coco, coco_dt, 'keypoints') 444 | coco_eval.params.useSegm = None 445 | coco_eval.evaluate() 446 | coco_eval.accumulate() 447 | coco_eval.summarize() 448 | 449 | stats_names = ['AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)'] 450 | info_str = [] 451 | for ind, name in enumerate(stats_names): 452 | info_str.append((name, coco_eval.stats[ind])) 453 | 454 | return info_str 455 | -------------------------------------------------------------------------------- /lib/dataset/mpii.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import logging 12 | import os 13 | import json_tricks as json 14 | from collections import OrderedDict 15 | 16 | import numpy as np 17 | from scipy.io import loadmat, savemat 18 | 19 | from dataset.JointsDataset import JointsDataset 20 | 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | 25 | class MPIIDataset(JointsDataset): 26 | def __init__(self, cfg, args, root, image_set, is_train, transform=None): 27 | super().__init__(cfg, args, root, image_set, is_train, transform) 28 | 29 | # add paramters for test robustness 30 | self.args = args 31 | self.test_robust = cfg.TEST.TEST_ROBUST 32 | self.corruption_type = cfg.TEST.CORRUPTION_TYPE 33 | self.root_c = cfg.DATASET.ROOT_C 34 | self.severity = cfg.TEST.SEVERITY 35 | 36 | self.num_joints = 16 37 | self.flip_pairs = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]] 38 | self.parent_ids = [1, 2, 6, 6, 3, 4, 6, 6, 7, 8, 11, 12, 7, 7, 13, 14] 39 | 40 | self.upper_body_ids = (7, 8, 9, 10, 11, 12, 13, 14, 15) 41 | self.lower_body_ids = (0, 1, 2, 3, 4, 5, 6) 42 | 43 | self.db = self._get_db() 44 | 45 | if is_train and cfg.DATASET.SELECT_DATA: 46 | self.db = self.select_data(self.db) 47 | 48 | logger.info('=> load {} samples'.format(len(self.db))) 49 | 50 | def _get_db(self): 51 | 52 | file_name = os.path.join( 53 | self.root, 'annot', self.image_set+'.json' 54 | ) 55 | with open(file_name) as anno_file: 56 | anno = json.load(anno_file) 57 | 58 | gt_db = [] 59 | for a in anno: 60 | 61 | if self.test_robust and self.corruption_type != 'clean': 62 | image_name = self.corruption_type + '/' + str(self.severity) + '/' + a['image'] 63 | else: 64 | image_name = a['image'] 65 | 66 | c = np.array(a['center'], dtype=np.float) 67 | s = np.array([a['scale'], a['scale']], dtype=np.float) 68 | 69 | # Adjust center/scale slightly to avoid cropping limbs 70 | if c[0] != -1: 71 | c[1] = c[1] + 15 * s[1] 72 | s = s * 1.25 73 | 74 | # MPII uses matlab format, index is based 1, 75 | # we should first convert to 0-based index 76 | c = c - 1 77 | 78 | joints_3d = np.zeros((self.num_joints, 3), dtype=np.float) 79 | joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float) 80 | if self.image_set != 'test': 81 | joints = np.array(a['joints']) 82 | joints[:, 0:2] = joints[:, 0:2] - 1 83 | joints_vis = np.array(a['joints_vis']) 84 | assert len(joints) == self.num_joints, \ 85 | 'joint num diff: {} vs {}'.format(len(joints), 86 | self.num_joints) 87 | 88 | joints_3d[:, 0:2] = joints[:, 0:2] 89 | joints_3d_vis[:, 0] = joints_vis[:] 90 | joints_3d_vis[:, 1] = joints_vis[:] 91 | 92 | image_dir = 'images.zip@' if self.data_format == 'zip' else 'images' 93 | 94 | if self.test_robust and self.corruption_type != 'clean': 95 | image_path = os.path.join( 96 | self.root_c, image_name) 97 | else: 98 | image_path = os.path.join(self.root, image_dir, image_name) 99 | 100 | gt_db.append( 101 | { 102 | 'image': image_path, 103 | 'center': c, 104 | 'scale': s, 105 | 'joints_3d': joints_3d, 106 | 'joints_3d_vis': joints_3d_vis, 107 | 'filename': '', 108 | 'imgnum': 0, 109 | } 110 | ) 111 | 112 | return gt_db 113 | 114 | def evaluate(self, cfg, preds, output_dir, *args, **kwargs): 115 | # convert 0-based index to 1-based index 116 | preds = preds[:, :, 0:2] + 1.0 117 | 118 | if output_dir: 119 | if self.test_robust: 120 | pred_file = os.path.join(output_dir, '{}_{}_pred.mat'.format(self.corruption_type, self.severity)) 121 | savemat(pred_file, mdict={'preds': preds}) 122 | else: 123 | pred_file = os.path.join(output_dir, 'pred.mat') 124 | savemat(pred_file, mdict={'preds': preds}) 125 | 126 | if 'test' in cfg.DATASET.TEST_SET: 127 | return {'Null': 0.0}, 0.0 128 | 129 | SC_BIAS = 0.6 130 | threshold = 0.5 131 | 132 | gt_file = os.path.join(cfg.DATASET.ROOT, 133 | 'annot', 134 | 'gt_{}.mat'.format(cfg.DATASET.TEST_SET)) 135 | gt_dict = loadmat(gt_file) 136 | dataset_joints = gt_dict['dataset_joints'] 137 | jnt_missing = gt_dict['jnt_missing'] 138 | pos_gt_src = gt_dict['pos_gt_src'] 139 | headboxes_src = gt_dict['headboxes_src'] 140 | 141 | pos_pred_src = np.transpose(preds, [1, 2, 0]) 142 | 143 | head = np.where(dataset_joints == 'head')[1][0] 144 | lsho = np.where(dataset_joints == 'lsho')[1][0] 145 | lelb = np.where(dataset_joints == 'lelb')[1][0] 146 | lwri = np.where(dataset_joints == 'lwri')[1][0] 147 | lhip = np.where(dataset_joints == 'lhip')[1][0] 148 | lkne = np.where(dataset_joints == 'lkne')[1][0] 149 | lank = np.where(dataset_joints == 'lank')[1][0] 150 | 151 | rsho = np.where(dataset_joints == 'rsho')[1][0] 152 | relb = np.where(dataset_joints == 'relb')[1][0] 153 | rwri = np.where(dataset_joints == 'rwri')[1][0] 154 | rkne = np.where(dataset_joints == 'rkne')[1][0] 155 | rank = np.where(dataset_joints == 'rank')[1][0] 156 | rhip = np.where(dataset_joints == 'rhip')[1][0] 157 | 158 | jnt_visible = 1 - jnt_missing 159 | uv_error = pos_pred_src - pos_gt_src 160 | uv_err = np.linalg.norm(uv_error, axis=1) 161 | headsizes = headboxes_src[1, :, :] - headboxes_src[0, :, :] 162 | headsizes = np.linalg.norm(headsizes, axis=0) 163 | headsizes *= SC_BIAS 164 | scale = np.multiply(headsizes, np.ones((len(uv_err), 1))) 165 | scaled_uv_err = np.divide(uv_err, scale) 166 | scaled_uv_err = np.multiply(scaled_uv_err, jnt_visible) 167 | jnt_count = np.sum(jnt_visible, axis=1) 168 | less_than_threshold = np.multiply((scaled_uv_err <= threshold), 169 | jnt_visible) 170 | PCKh = np.divide(100.*np.sum(less_than_threshold, axis=1), jnt_count) 171 | 172 | rng = np.arange(0, 0.5+0.01, 0.01) 173 | pckAll = np.zeros((len(rng), 16)) 174 | 175 | for r in range(len(rng)): 176 | threshold = rng[r] 177 | less_than_threshold = np.multiply(scaled_uv_err <= threshold, 178 | jnt_visible) 179 | pckAll[r, :] = np.divide(100.*np.sum(less_than_threshold, axis=1), 180 | jnt_count) 181 | 182 | PCKh = np.ma.array(PCKh, mask=False) 183 | PCKh.mask[6:8] = True 184 | 185 | jnt_count = np.ma.array(jnt_count, mask=False) 186 | jnt_count.mask[6:8] = True 187 | jnt_ratio = jnt_count / np.sum(jnt_count).astype(np.float64) 188 | 189 | name_value = [ 190 | ('Head', PCKh[head]), 191 | ('Shoulder', 0.5 * (PCKh[lsho] + PCKh[rsho])), 192 | ('Elbow', 0.5 * (PCKh[lelb] + PCKh[relb])), 193 | ('Wrist', 0.5 * (PCKh[lwri] + PCKh[rwri])), 194 | ('Hip', 0.5 * (PCKh[lhip] + PCKh[rhip])), 195 | ('Knee', 0.5 * (PCKh[lkne] + PCKh[rkne])), 196 | ('Ankle', 0.5 * (PCKh[lank] + PCKh[rank])), 197 | ('Mean', np.sum(PCKh * jnt_ratio)), 198 | ('Mean@0.1', np.sum(pckAll[11, :] * jnt_ratio)) 199 | ] 200 | name_value = OrderedDict(name_value) 201 | 202 | return name_value, name_value['Mean'] -------------------------------------------------------------------------------- /lib/models/Unet_generator.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import logging 7 | 8 | import torch 9 | import torch.nn as nn 10 | 11 | import functools 12 | 13 | class UnetBlock(nn.Module): 14 | """Defines the Unet submodule with skip connection. 15 | X -------------------identity---------------------- 16 | |-- downsampling -- |submodule| -- upsampling --| 17 | """ 18 | def __init__(self, outer_nc, inner_nc, input_nc=None, 19 | submodule=None, outermost=False, innermost=False, norm_layer=nn.InstanceNorm2d, use_dropout=False, with_tanh=True): 20 | """Construct a Unet submodule with skip connections. 21 | 22 | Parameters: 23 | outer_nc (int) -- the number of filters in the outer conv layer 24 | inner_nc (int) -- the number of filters in the inner conv layer 25 | input_nc (int) -- the number of channels in input images/features 26 | submodule (UnetSkipConnectionBlock) -- previously defined submodules 27 | outermost (bool) -- if this module is the outermost module 28 | innermost (bool) -- if this module is the innermost module 29 | norm_layer -- normalization layer 30 | user_dropout (bool) -- if use dropout layers. 31 | """ 32 | super(UnetBlock, self).__init__() 33 | self.outermost = outermost 34 | if type(norm_layer) == functools.partial: 35 | use_bias = norm_layer.func == nn.InstanceNorm2d 36 | else: 37 | use_bias = norm_layer == nn.InstanceNorm2d 38 | if input_nc is None: 39 | input_nc = outer_nc 40 | downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4, 41 | stride=2, padding=1, bias=use_bias) 42 | downrelu = nn.LeakyReLU(0.2, True) 43 | downnorm = norm_layer(inner_nc) 44 | uprelu = nn.ReLU(True) 45 | upnorm = norm_layer(outer_nc) 46 | 47 | if outermost: 48 | upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, 49 | kernel_size=4, stride=2, 50 | padding=1) 51 | down = [downconv] 52 | if with_tanh: 53 | up = [uprelu, upconv, nn.Tanh()] 54 | else: 55 | up = [uprelu, upconv] 56 | model = down + [submodule] + up 57 | elif innermost: 58 | upconv = nn.ConvTranspose2d(inner_nc, outer_nc, 59 | kernel_size=4, stride=2, 60 | padding=1, bias=use_bias) 61 | down = [downrelu, downconv] 62 | 63 | up = [uprelu, upconv, upnorm] 64 | model = down + up 65 | else: 66 | upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, 67 | kernel_size=4, stride=2, 68 | padding=1, bias=use_bias) 69 | down = [downrelu, downconv, downnorm] 70 | up = [uprelu, upconv, upnorm] 71 | 72 | if use_dropout: 73 | model = down + [submodule] + up + [nn.Dropout(0.5)] 74 | else: 75 | model = down + [submodule] + up 76 | 77 | self.model = nn.Sequential(*model) 78 | 79 | def forward(self, x): 80 | if self.outermost: 81 | return self.model(x) 82 | else: 83 | return torch.cat([x, self.model(x)], 1) 84 | 85 | class UnetGenerator(nn.Module): 86 | """Create a Unet-based generator""" 87 | 88 | def __init__(self, input_nc, output_nc, num_downs, ngf=64, norm_layer=nn.InstanceNorm2d, use_dropout=False, with_tanh=False): 89 | """Construct a Unet generator 90 | Parameters: 91 | input_nc (int) -- the number of channels in input images 92 | output_nc (int) -- the number of channels in output images 93 | num_downs (int) -- the number of downsamplings in UNet. For example, # if |num_downs| == 7, 94 | image of size 128x128 will become of size 1x1 # at the bottleneck 95 | ngf (int) -- the number of filters in the last conv layer 96 | norm_layer -- normalization layer 97 | 98 | We construct the U-Net from the innermost layer to the outermost layer. 99 | It is a recursive process. 100 | """ 101 | 102 | super(UnetGenerator, self).__init__() 103 | unet_block = UnetBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True, with_tanh=with_tanh) # add the innermost layer 104 | for i in range(num_downs - 5): 105 | unet_block = UnetBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout, with_tanh=with_tanh) 106 | unet_block = UnetBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, with_tanh=with_tanh) 107 | unet_block = UnetBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer, with_tanh=with_tanh) 108 | unet_block = UnetBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer, with_tanh=with_tanh) 109 | self.model = UnetBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer, with_tanh=with_tanh) # add the outermost layer 110 | def forward(self, input): 111 | """Standard forward""" 112 | return self.model(input) 113 | -------------------------------------------------------------------------------- /lib/models/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import models.pose_resnet 16 | import models.pose_hrnet 17 | import models.Unet_generator -------------------------------------------------------------------------------- /lib/models/pose_hrnet.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import logging 13 | 14 | import torch 15 | import torch.nn as nn 16 | 17 | 18 | BN_MOMENTUM = 0.1 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | def conv3x3(in_planes, out_planes, stride=1): 23 | """3x3 convolution with padding""" 24 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 25 | padding=1, bias=False) 26 | 27 | 28 | class BasicBlock(nn.Module): 29 | expansion = 1 30 | 31 | def __init__(self, inplanes, planes, stride=1, downsample=None): 32 | super(BasicBlock, self).__init__() 33 | self.conv1 = conv3x3(inplanes, planes, stride) 34 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 35 | self.relu = nn.ReLU(inplace=True) 36 | self.conv2 = conv3x3(planes, planes) 37 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 38 | self.downsample = downsample 39 | self.stride = stride 40 | 41 | def forward(self, x): 42 | residual = x 43 | 44 | out = self.conv1(x) 45 | out = self.bn1(out) 46 | out = self.relu(out) 47 | 48 | out = self.conv2(out) 49 | out = self.bn2(out) 50 | 51 | if self.downsample is not None: 52 | residual = self.downsample(x) 53 | 54 | out += residual 55 | out = self.relu(out) 56 | 57 | return out 58 | 59 | 60 | class Bottleneck(nn.Module): 61 | expansion = 4 62 | 63 | def __init__(self, inplanes, planes, stride=1, downsample=None): 64 | super(Bottleneck, self).__init__() 65 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 66 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 67 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 68 | padding=1, bias=False) 69 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 70 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, 71 | bias=False) 72 | self.bn3 = nn.BatchNorm2d(planes * self.expansion, 73 | momentum=BN_MOMENTUM) 74 | self.relu = nn.ReLU(inplace=True) 75 | self.downsample = downsample 76 | self.stride = stride 77 | 78 | def forward(self, x): 79 | residual = x 80 | 81 | out = self.conv1(x) 82 | out = self.bn1(out) 83 | out = self.relu(out) 84 | 85 | out = self.conv2(out) 86 | out = self.bn2(out) 87 | out = self.relu(out) 88 | 89 | out = self.conv3(out) 90 | out = self.bn3(out) 91 | 92 | if self.downsample is not None: 93 | residual = self.downsample(x) 94 | 95 | out += residual 96 | out = self.relu(out) 97 | 98 | return out 99 | 100 | 101 | class HighResolutionModule(nn.Module): 102 | def __init__(self, num_branches, blocks, num_blocks, num_inchannels, 103 | num_channels, fuse_method, multi_scale_output=True): 104 | super(HighResolutionModule, self).__init__() 105 | self._check_branches( 106 | num_branches, blocks, num_blocks, num_inchannels, num_channels) 107 | 108 | self.num_inchannels = num_inchannels 109 | self.fuse_method = fuse_method 110 | self.num_branches = num_branches 111 | 112 | self.multi_scale_output = multi_scale_output 113 | 114 | self.branches = self._make_branches( 115 | num_branches, blocks, num_blocks, num_channels) 116 | self.fuse_layers = self._make_fuse_layers() 117 | self.relu = nn.ReLU(True) 118 | 119 | def _check_branches(self, num_branches, blocks, num_blocks, 120 | num_inchannels, num_channels): 121 | if num_branches != len(num_blocks): 122 | error_msg = 'NUM_BRANCHES({}) <> NUM_BLOCKS({})'.format( 123 | num_branches, len(num_blocks)) 124 | logger.error(error_msg) 125 | raise ValueError(error_msg) 126 | 127 | if num_branches != len(num_channels): 128 | error_msg = 'NUM_BRANCHES({}) <> NUM_CHANNELS({})'.format( 129 | num_branches, len(num_channels)) 130 | logger.error(error_msg) 131 | raise ValueError(error_msg) 132 | 133 | if num_branches != len(num_inchannels): 134 | error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format( 135 | num_branches, len(num_inchannels)) 136 | logger.error(error_msg) 137 | raise ValueError(error_msg) 138 | 139 | def _make_one_branch(self, branch_index, block, num_blocks, num_channels, 140 | stride=1): 141 | downsample = None 142 | if stride != 1 or \ 143 | self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion: 144 | downsample = nn.Sequential( 145 | nn.Conv2d( 146 | self.num_inchannels[branch_index], 147 | num_channels[branch_index] * block.expansion, 148 | kernel_size=1, stride=stride, bias=False 149 | ), 150 | nn.BatchNorm2d( 151 | num_channels[branch_index] * block.expansion, 152 | momentum=BN_MOMENTUM 153 | ), 154 | ) 155 | 156 | layers = [] 157 | layers.append( 158 | block( 159 | self.num_inchannels[branch_index], 160 | num_channels[branch_index], 161 | stride, 162 | downsample 163 | ) 164 | ) 165 | self.num_inchannels[branch_index] = \ 166 | num_channels[branch_index] * block.expansion 167 | for i in range(1, num_blocks[branch_index]): 168 | layers.append( 169 | block( 170 | self.num_inchannels[branch_index], 171 | num_channels[branch_index] 172 | ) 173 | ) 174 | 175 | return nn.Sequential(*layers) 176 | 177 | def _make_branches(self, num_branches, block, num_blocks, num_channels): 178 | branches = [] 179 | 180 | for i in range(num_branches): 181 | branches.append( 182 | self._make_one_branch(i, block, num_blocks, num_channels) 183 | ) 184 | 185 | return nn.ModuleList(branches) 186 | 187 | def _make_fuse_layers(self): 188 | if self.num_branches == 1: 189 | return None 190 | 191 | num_branches = self.num_branches 192 | num_inchannels = self.num_inchannels 193 | fuse_layers = [] 194 | for i in range(num_branches if self.multi_scale_output else 1): 195 | fuse_layer = [] 196 | for j in range(num_branches): 197 | if j > i: 198 | fuse_layer.append( 199 | nn.Sequential( 200 | nn.Conv2d( 201 | num_inchannels[j], 202 | num_inchannels[i], 203 | 1, 1, 0, bias=False 204 | ), 205 | nn.BatchNorm2d(num_inchannels[i]), 206 | nn.Upsample(scale_factor=2**(j-i), mode='nearest') 207 | ) 208 | ) 209 | elif j == i: 210 | fuse_layer.append(None) 211 | else: 212 | conv3x3s = [] 213 | for k in range(i-j): 214 | if k == i - j - 1: 215 | num_outchannels_conv3x3 = num_inchannels[i] 216 | conv3x3s.append( 217 | nn.Sequential( 218 | nn.Conv2d( 219 | num_inchannels[j], 220 | num_outchannels_conv3x3, 221 | 3, 2, 1, bias=False 222 | ), 223 | nn.BatchNorm2d(num_outchannels_conv3x3) 224 | ) 225 | ) 226 | else: 227 | num_outchannels_conv3x3 = num_inchannels[j] 228 | conv3x3s.append( 229 | nn.Sequential( 230 | nn.Conv2d( 231 | num_inchannels[j], 232 | num_outchannels_conv3x3, 233 | 3, 2, 1, bias=False 234 | ), 235 | nn.BatchNorm2d(num_outchannels_conv3x3), 236 | nn.ReLU(True) 237 | ) 238 | ) 239 | fuse_layer.append(nn.Sequential(*conv3x3s)) 240 | fuse_layers.append(nn.ModuleList(fuse_layer)) 241 | 242 | return nn.ModuleList(fuse_layers) 243 | 244 | def get_num_inchannels(self): 245 | return self.num_inchannels 246 | 247 | def forward(self, x): 248 | if self.num_branches == 1: 249 | return [self.branches[0](x[0])] 250 | 251 | for i in range(self.num_branches): 252 | x[i] = self.branches[i](x[i]) 253 | 254 | x_fuse = [] 255 | 256 | for i in range(len(self.fuse_layers)): 257 | y = x[0] if i == 0 else self.fuse_layers[i][0](x[0]) 258 | for j in range(1, self.num_branches): 259 | if i == j: 260 | y = y + x[j] 261 | else: 262 | y = y + self.fuse_layers[i][j](x[j]) 263 | x_fuse.append(self.relu(y)) 264 | 265 | return x_fuse 266 | 267 | 268 | blocks_dict = { 269 | 'BASIC': BasicBlock, 270 | 'BOTTLENECK': Bottleneck 271 | } 272 | 273 | 274 | class PoseHighResolutionNet(nn.Module): 275 | 276 | def __init__(self, cfg, **kwargs): 277 | self.inplanes = 64 278 | extra = cfg['MODEL']['EXTRA'] 279 | super(PoseHighResolutionNet, self).__init__() 280 | 281 | # stem net 282 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, 283 | bias=False) 284 | self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) 285 | self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, 286 | bias=False) 287 | self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) 288 | self.relu = nn.ReLU(inplace=True) 289 | self.layer1 = self._make_layer(Bottleneck, 64, 4) 290 | 291 | self.stage2_cfg = extra['STAGE2'] 292 | num_channels = self.stage2_cfg['NUM_CHANNELS'] 293 | block = blocks_dict[self.stage2_cfg['BLOCK']] 294 | num_channels = [ 295 | num_channels[i] * block.expansion for i in range(len(num_channels)) 296 | ] 297 | self.transition1 = self._make_transition_layer([256], num_channels) 298 | self.stage2, pre_stage_channels = self._make_stage( 299 | self.stage2_cfg, num_channels) 300 | 301 | self.stage3_cfg = extra['STAGE3'] 302 | num_channels = self.stage3_cfg['NUM_CHANNELS'] 303 | block = blocks_dict[self.stage3_cfg['BLOCK']] 304 | num_channels = [ 305 | num_channels[i] * block.expansion for i in range(len(num_channels)) 306 | ] 307 | self.transition2 = self._make_transition_layer( 308 | pre_stage_channels, num_channels) 309 | self.stage3, pre_stage_channels = self._make_stage( 310 | self.stage3_cfg, num_channels) 311 | 312 | self.stage4_cfg = extra['STAGE4'] 313 | num_channels = self.stage4_cfg['NUM_CHANNELS'] 314 | block = blocks_dict[self.stage4_cfg['BLOCK']] 315 | num_channels = [ 316 | num_channels[i] * block.expansion for i in range(len(num_channels)) 317 | ] 318 | self.transition3 = self._make_transition_layer( 319 | pre_stage_channels, num_channels) 320 | self.stage4, pre_stage_channels = self._make_stage( 321 | self.stage4_cfg, num_channels, multi_scale_output=False) 322 | 323 | self.final_layer = nn.Conv2d( 324 | in_channels=pre_stage_channels[0], 325 | out_channels=cfg['MODEL']['NUM_JOINTS'], 326 | kernel_size=extra['FINAL_CONV_KERNEL'], 327 | stride=1, 328 | padding=1 if extra['FINAL_CONV_KERNEL'] == 3 else 0 329 | ) 330 | 331 | self.pretrained_layers = extra['PRETRAINED_LAYERS'] 332 | 333 | def _make_transition_layer( 334 | self, num_channels_pre_layer, num_channels_cur_layer): 335 | num_branches_cur = len(num_channels_cur_layer) 336 | num_branches_pre = len(num_channels_pre_layer) 337 | 338 | transition_layers = [] 339 | for i in range(num_branches_cur): 340 | if i < num_branches_pre: 341 | if num_channels_cur_layer[i] != num_channels_pre_layer[i]: 342 | transition_layers.append( 343 | nn.Sequential( 344 | nn.Conv2d( 345 | num_channels_pre_layer[i], 346 | num_channels_cur_layer[i], 347 | 3, 1, 1, bias=False 348 | ), 349 | nn.BatchNorm2d(num_channels_cur_layer[i]), 350 | nn.ReLU(inplace=True) 351 | ) 352 | ) 353 | else: 354 | transition_layers.append(None) 355 | else: 356 | conv3x3s = [] 357 | for j in range(i+1-num_branches_pre): 358 | inchannels = num_channels_pre_layer[-1] 359 | outchannels = num_channels_cur_layer[i] \ 360 | if j == i-num_branches_pre else inchannels 361 | conv3x3s.append( 362 | nn.Sequential( 363 | nn.Conv2d( 364 | inchannels, outchannels, 3, 2, 1, bias=False 365 | ), 366 | nn.BatchNorm2d(outchannels), 367 | nn.ReLU(inplace=True) 368 | ) 369 | ) 370 | transition_layers.append(nn.Sequential(*conv3x3s)) 371 | 372 | return nn.ModuleList(transition_layers) 373 | 374 | def _make_layer(self, block, planes, blocks, stride=1): 375 | downsample = None 376 | if stride != 1 or self.inplanes != planes * block.expansion: 377 | downsample = nn.Sequential( 378 | nn.Conv2d( 379 | self.inplanes, planes * block.expansion, 380 | kernel_size=1, stride=stride, bias=False 381 | ), 382 | nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), 383 | ) 384 | 385 | layers = [] 386 | layers.append(block(self.inplanes, planes, stride, downsample)) 387 | self.inplanes = planes * block.expansion 388 | for i in range(1, blocks): 389 | layers.append(block(self.inplanes, planes)) 390 | 391 | return nn.Sequential(*layers) 392 | 393 | def _make_stage(self, layer_config, num_inchannels, 394 | multi_scale_output=True): 395 | num_modules = layer_config['NUM_MODULES'] 396 | num_branches = layer_config['NUM_BRANCHES'] 397 | num_blocks = layer_config['NUM_BLOCKS'] 398 | num_channels = layer_config['NUM_CHANNELS'] 399 | block = blocks_dict[layer_config['BLOCK']] 400 | fuse_method = layer_config['FUSE_METHOD'] 401 | 402 | modules = [] 403 | for i in range(num_modules): 404 | # multi_scale_output is only used last module 405 | if not multi_scale_output and i == num_modules - 1: 406 | reset_multi_scale_output = False 407 | else: 408 | reset_multi_scale_output = True 409 | 410 | modules.append( 411 | HighResolutionModule( 412 | num_branches, 413 | block, 414 | num_blocks, 415 | num_inchannels, 416 | num_channels, 417 | fuse_method, 418 | reset_multi_scale_output 419 | ) 420 | ) 421 | num_inchannels = modules[-1].get_num_inchannels() 422 | 423 | return nn.Sequential(*modules), num_inchannels 424 | 425 | def forward(self, x): 426 | x = self.conv1(x) 427 | x = self.bn1(x) 428 | x = self.relu(x) 429 | x = self.conv2(x) 430 | x = self.bn2(x) 431 | x = self.relu(x) 432 | x = self.layer1(x) 433 | 434 | x_list = [] 435 | for i in range(self.stage2_cfg['NUM_BRANCHES']): 436 | if self.transition1[i] is not None: 437 | x_list.append(self.transition1[i](x)) 438 | else: 439 | x_list.append(x) 440 | y_list = self.stage2(x_list) 441 | 442 | x_list = [] 443 | for i in range(self.stage3_cfg['NUM_BRANCHES']): 444 | if self.transition2[i] is not None: 445 | x_list.append(self.transition2[i](y_list[-1])) 446 | else: 447 | x_list.append(y_list[i]) 448 | y_list = self.stage3(x_list) 449 | 450 | x_list = [] 451 | for i in range(self.stage4_cfg['NUM_BRANCHES']): 452 | if self.transition3[i] is not None: 453 | x_list.append(self.transition3[i](y_list[-1])) 454 | else: 455 | x_list.append(y_list[i]) 456 | y_list = self.stage4(x_list) 457 | 458 | x = self.final_layer(y_list[0]) 459 | 460 | return x 461 | 462 | def init_weights(self, pretrained=''): 463 | logger.info('=> init weights from normal distribution') 464 | for m in self.modules(): 465 | if isinstance(m, nn.Conv2d): 466 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 467 | nn.init.normal_(m.weight, std=0.001) 468 | for name, _ in m.named_parameters(): 469 | if name in ['bias']: 470 | nn.init.constant_(m.bias, 0) 471 | elif isinstance(m, nn.BatchNorm2d): 472 | nn.init.constant_(m.weight, 1) 473 | nn.init.constant_(m.bias, 0) 474 | elif isinstance(m, nn.ConvTranspose2d): 475 | nn.init.normal_(m.weight, std=0.001) 476 | for name, _ in m.named_parameters(): 477 | if name in ['bias']: 478 | nn.init.constant_(m.bias, 0) 479 | 480 | if os.path.isfile(pretrained): 481 | pretrained_state_dict = torch.load(pretrained) 482 | logger.info('=> loading pretrained model {}'.format(pretrained)) 483 | 484 | need_init_state_dict = {} 485 | for name, m in pretrained_state_dict.items(): 486 | if name.split('.')[0] in self.pretrained_layers \ 487 | or self.pretrained_layers[0] is '*': 488 | need_init_state_dict[name] = m 489 | self.load_state_dict(need_init_state_dict, strict=False) 490 | elif pretrained: 491 | logger.error('=> please download pre-trained models first!') 492 | raise ValueError('{} is not exist!'.format(pretrained)) 493 | 494 | 495 | def get_pose_net(cfg, is_train, **kwargs): 496 | model = PoseHighResolutionNet(cfg, **kwargs) 497 | 498 | if is_train and cfg['MODEL']['INIT_WEIGHTS']: 499 | model.init_weights(cfg['MODEL']['PRETRAINED']) 500 | 501 | return model -------------------------------------------------------------------------------- /lib/models/pose_resnet.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import logging 13 | 14 | import torch 15 | import torch.nn as nn 16 | 17 | 18 | BN_MOMENTUM = 0.1 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | def conv3x3(in_planes, out_planes, stride=1): 23 | """3x3 convolution with padding""" 24 | return nn.Conv2d( 25 | in_planes, out_planes, kernel_size=3, stride=stride, 26 | padding=1, bias=False 27 | ) 28 | 29 | 30 | class BasicBlock(nn.Module): 31 | expansion = 1 32 | 33 | def __init__(self, inplanes, planes, stride=1, downsample=None): 34 | super(BasicBlock, self).__init__() 35 | self.conv1 = conv3x3(inplanes, planes, stride) 36 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 37 | self.relu = nn.ReLU(inplace=True) 38 | self.conv2 = conv3x3(planes, planes) 39 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 40 | self.downsample = downsample 41 | self.stride = stride 42 | 43 | def forward(self, x): 44 | residual = x 45 | 46 | out = self.conv1(x) 47 | out = self.bn1(out) 48 | out = self.relu(out) 49 | 50 | out = self.conv2(out) 51 | out = self.bn2(out) 52 | 53 | if self.downsample is not None: 54 | residual = self.downsample(x) 55 | 56 | out += residual 57 | out = self.relu(out) 58 | 59 | return out 60 | 61 | 62 | class Bottleneck(nn.Module): 63 | expansion = 4 64 | 65 | def __init__(self, inplanes, planes, stride=1, downsample=None): 66 | super(Bottleneck, self).__init__() 67 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 68 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 69 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 70 | padding=1, bias=False) 71 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 72 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, 73 | bias=False) 74 | self.bn3 = nn.BatchNorm2d(planes * self.expansion, 75 | momentum=BN_MOMENTUM) 76 | self.relu = nn.ReLU(inplace=True) 77 | self.downsample = downsample 78 | self.stride = stride 79 | 80 | def forward(self, x): 81 | residual = x 82 | 83 | out = self.conv1(x) 84 | out = self.bn1(out) 85 | out = self.relu(out) 86 | 87 | out = self.conv2(out) 88 | out = self.bn2(out) 89 | out = self.relu(out) 90 | 91 | out = self.conv3(out) 92 | out = self.bn3(out) 93 | 94 | if self.downsample is not None: 95 | residual = self.downsample(x) 96 | 97 | out += residual 98 | out = self.relu(out) 99 | 100 | return out 101 | 102 | 103 | class PoseResNet(nn.Module): 104 | 105 | def __init__(self, block, layers, cfg, **kwargs): 106 | self.inplanes = 64 107 | extra = cfg.MODEL.EXTRA 108 | self.deconv_with_bias = extra.DECONV_WITH_BIAS 109 | 110 | super(PoseResNet, self).__init__() 111 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 112 | bias=False) 113 | self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) 114 | self.relu = nn.ReLU(inplace=True) 115 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 116 | self.layer1 = self._make_layer(block, 64, layers[0]) 117 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 118 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 119 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 120 | 121 | # used for deconv layers 122 | self.deconv_layers = self._make_deconv_layer( 123 | extra.NUM_DECONV_LAYERS, 124 | extra.NUM_DECONV_FILTERS, 125 | extra.NUM_DECONV_KERNELS, 126 | ) 127 | 128 | self.final_layer = nn.Conv2d( 129 | in_channels=extra.NUM_DECONV_FILTERS[-1], 130 | out_channels=cfg.MODEL.NUM_JOINTS, 131 | kernel_size=extra.FINAL_CONV_KERNEL, 132 | stride=1, 133 | padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0 134 | ) 135 | 136 | def _make_layer(self, block, planes, blocks, stride=1): 137 | downsample = None 138 | if stride != 1 or self.inplanes != planes * block.expansion: 139 | downsample = nn.Sequential( 140 | nn.Conv2d(self.inplanes, planes * block.expansion, 141 | kernel_size=1, stride=stride, bias=False), 142 | nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), 143 | ) 144 | 145 | layers = [] 146 | layers.append(block(self.inplanes, planes, stride, downsample)) 147 | self.inplanes = planes * block.expansion 148 | for i in range(1, blocks): 149 | layers.append(block(self.inplanes, planes)) 150 | 151 | return nn.Sequential(*layers) 152 | 153 | def _get_deconv_cfg(self, deconv_kernel, index): 154 | if deconv_kernel == 4: 155 | padding = 1 156 | output_padding = 0 157 | elif deconv_kernel == 3: 158 | padding = 1 159 | output_padding = 1 160 | elif deconv_kernel == 2: 161 | padding = 0 162 | output_padding = 0 163 | 164 | return deconv_kernel, padding, output_padding 165 | 166 | def _make_deconv_layer(self, num_layers, num_filters, num_kernels): 167 | assert num_layers == len(num_filters), \ 168 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 169 | assert num_layers == len(num_kernels), \ 170 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 171 | 172 | layers = [] 173 | for i in range(num_layers): 174 | kernel, padding, output_padding = \ 175 | self._get_deconv_cfg(num_kernels[i], i) 176 | 177 | planes = num_filters[i] 178 | layers.append( 179 | nn.ConvTranspose2d( 180 | in_channels=self.inplanes, 181 | out_channels=planes, 182 | kernel_size=kernel, 183 | stride=2, 184 | padding=padding, 185 | output_padding=output_padding, 186 | bias=self.deconv_with_bias)) 187 | layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) 188 | layers.append(nn.ReLU(inplace=True)) 189 | self.inplanes = planes 190 | 191 | return nn.Sequential(*layers) 192 | 193 | def forward(self, x): 194 | x = self.conv1(x) 195 | x = self.bn1(x) 196 | x = self.relu(x) 197 | x = self.maxpool(x) 198 | 199 | x = self.layer1(x) 200 | x = self.layer2(x) 201 | x = self.layer3(x) 202 | x = self.layer4(x) 203 | 204 | x = self.deconv_layers(x) 205 | x = self.final_layer(x) 206 | 207 | return x 208 | 209 | def init_weights(self, pretrained=''): 210 | if os.path.isfile(pretrained): 211 | logger.info('=> init deconv weights from normal distribution') 212 | for name, m in self.deconv_layers.named_modules(): 213 | if isinstance(m, nn.ConvTranspose2d): 214 | logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 215 | logger.info('=> init {}.bias as 0'.format(name)) 216 | nn.init.normal_(m.weight, std=0.001) 217 | if self.deconv_with_bias: 218 | nn.init.constant_(m.bias, 0) 219 | elif isinstance(m, nn.BatchNorm2d): 220 | logger.info('=> init {}.weight as 1'.format(name)) 221 | logger.info('=> init {}.bias as 0'.format(name)) 222 | nn.init.constant_(m.weight, 1) 223 | nn.init.constant_(m.bias, 0) 224 | logger.info('=> init final conv weights from normal distribution') 225 | for m in self.final_layer.modules(): 226 | if isinstance(m, nn.Conv2d): 227 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 228 | logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 229 | logger.info('=> init {}.bias as 0'.format(name)) 230 | nn.init.normal_(m.weight, std=0.001) 231 | nn.init.constant_(m.bias, 0) 232 | 233 | pretrained_state_dict = torch.load(pretrained) 234 | logger.info('=> loading pretrained model {}'.format(pretrained)) 235 | self.load_state_dict(pretrained_state_dict, strict=False) 236 | else: 237 | logger.info('=> init weights from normal distribution') 238 | for m in self.modules(): 239 | if isinstance(m, nn.Conv2d): 240 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 241 | nn.init.normal_(m.weight, std=0.001) 242 | # nn.init.constant_(m.bias, 0) 243 | elif isinstance(m, nn.BatchNorm2d): 244 | nn.init.constant_(m.weight, 1) 245 | nn.init.constant_(m.bias, 0) 246 | elif isinstance(m, nn.ConvTranspose2d): 247 | nn.init.normal_(m.weight, std=0.001) 248 | if self.deconv_with_bias: 249 | nn.init.constant_(m.bias, 0) 250 | 251 | 252 | resnet_spec = { 253 | 18: (BasicBlock, [2, 2, 2, 2]), 254 | 34: (BasicBlock, [3, 4, 6, 3]), 255 | 50: (Bottleneck, [3, 4, 6, 3]), 256 | 101: (Bottleneck, [3, 4, 23, 3]), 257 | 152: (Bottleneck, [3, 8, 36, 3]) 258 | } 259 | 260 | 261 | def get_pose_net(cfg, is_train, **kwargs): 262 | num_layers = cfg.MODEL.EXTRA.NUM_LAYERS 263 | 264 | block_class, layers = resnet_spec[num_layers] 265 | 266 | model = PoseResNet(block_class, layers, cfg, **kwargs) 267 | 268 | if is_train and cfg.MODEL.INIT_WEIGHTS: 269 | model.init_weights(cfg.MODEL.PRETRAINED) 270 | 271 | return model -------------------------------------------------------------------------------- /lib/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIprogrammer/AdvMix/f619fa279d9419eb452d228762c3872691e42e7d/lib/nms/__init__.py -------------------------------------------------------------------------------- /lib/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | cimport numpy as np 13 | 14 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 15 | return a if a >= b else b 16 | 17 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 18 | return a if a <= b else b 19 | 20 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 21 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 22 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 23 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 24 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 25 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 26 | 27 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 28 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1].astype('i') 29 | 30 | cdef int ndets = dets.shape[0] 31 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 32 | np.zeros((ndets), dtype=np.int) 33 | 34 | # nominal indices 35 | cdef int _i, _j 36 | # sorted indices 37 | cdef int i, j 38 | # temp variables for box i's (the box currently under consideration) 39 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 40 | # variables for computing overlap with box j (lower scoring box) 41 | cdef np.float32_t xx1, yy1, xx2, yy2 42 | cdef np.float32_t w, h 43 | cdef np.float32_t inter, ovr 44 | 45 | keep = [] 46 | for _i in range(ndets): 47 | i = order[_i] 48 | if suppressed[i] == 1: 49 | continue 50 | keep.append(i) 51 | ix1 = x1[i] 52 | iy1 = y1[i] 53 | ix2 = x2[i] 54 | iy2 = y2[i] 55 | iarea = areas[i] 56 | for _j in range(_i + 1, ndets): 57 | j = order[_j] 58 | if suppressed[j] == 1: 59 | continue 60 | xx1 = max(ix1, x1[j]) 61 | yy1 = max(iy1, y1[j]) 62 | xx2 = min(ix2, x2[j]) 63 | yy2 = min(iy2, y2[j]) 64 | w = max(0.0, xx2 - xx1 + 1) 65 | h = max(0.0, yy2 - yy1 + 1) 66 | inter = w * h 67 | ovr = inter / (iarea + areas[j] - inter) 68 | if ovr >= thresh: 69 | suppressed[j] = 1 70 | 71 | return keep 72 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | cimport numpy as np 13 | 14 | assert sizeof(int) == sizeof(np.int32_t) 15 | 16 | cdef extern from "gpu_nms.hpp": 17 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 18 | 19 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 20 | np.int32_t device_id=0): 21 | cdef int boxes_num = dets.shape[0] 22 | cdef int boxes_dim = dets.shape[1] 23 | cdef int num_out 24 | cdef np.ndarray[np.int32_t, ndim=1] \ 25 | keep = np.zeros(boxes_num, dtype=np.int32) 26 | cdef np.ndarray[np.float32_t, ndim=1] \ 27 | scores = dets[:, 4] 28 | cdef np.ndarray[np.int32_t, ndim=1] \ 29 | order = scores.argsort()[::-1].astype(np.int32) 30 | cdef np.ndarray[np.float32_t, ndim=2] \ 31 | sorted_dets = dets[order, :] 32 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 33 | keep = keep[:num_out] 34 | return list(order[keep]) 35 | -------------------------------------------------------------------------------- /lib/nms/nms.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | 13 | from .cpu_nms import cpu_nms 14 | from .gpu_nms import gpu_nms 15 | 16 | 17 | def py_nms_wrapper(thresh): 18 | def _nms(dets): 19 | return nms(dets, thresh) 20 | return _nms 21 | 22 | 23 | def cpu_nms_wrapper(thresh): 24 | def _nms(dets): 25 | return cpu_nms(dets, thresh) 26 | return _nms 27 | 28 | 29 | def gpu_nms_wrapper(thresh, device_id): 30 | def _nms(dets): 31 | return gpu_nms(dets, thresh, device_id) 32 | return _nms 33 | 34 | 35 | def nms(dets, thresh): 36 | """ 37 | greedily select boxes with high confidence and overlap with current maximum <= thresh 38 | rule out overlap >= thresh 39 | :param dets: [[x1, y1, x2, y2 score]] 40 | :param thresh: retain overlap < thresh 41 | :return: indexes to keep 42 | """ 43 | if dets.shape[0] == 0: 44 | return [] 45 | 46 | x1 = dets[:, 0] 47 | y1 = dets[:, 1] 48 | x2 = dets[:, 2] 49 | y2 = dets[:, 3] 50 | scores = dets[:, 4] 51 | 52 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 53 | order = scores.argsort()[::-1] 54 | 55 | keep = [] 56 | while order.size > 0: 57 | i = order[0] 58 | keep.append(i) 59 | xx1 = np.maximum(x1[i], x1[order[1:]]) 60 | yy1 = np.maximum(y1[i], y1[order[1:]]) 61 | xx2 = np.minimum(x2[i], x2[order[1:]]) 62 | yy2 = np.minimum(y2[i], y2[order[1:]]) 63 | 64 | w = np.maximum(0.0, xx2 - xx1 + 1) 65 | h = np.maximum(0.0, yy2 - yy1 + 1) 66 | inter = w * h 67 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 68 | 69 | inds = np.where(ovr <= thresh)[0] 70 | order = order[inds + 1] 71 | 72 | return keep 73 | 74 | 75 | def oks_iou(g, d, a_g, a_d, sigmas=None, in_vis_thre=None): 76 | if not isinstance(sigmas, np.ndarray): 77 | sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0 78 | vars = (sigmas * 2) ** 2 79 | xg = g[0::3] 80 | yg = g[1::3] 81 | vg = g[2::3] 82 | ious = np.zeros((d.shape[0])) 83 | for n_d in range(0, d.shape[0]): 84 | xd = d[n_d, 0::3] 85 | yd = d[n_d, 1::3] 86 | vd = d[n_d, 2::3] 87 | dx = xd - xg 88 | dy = yd - yg 89 | e = (dx ** 2 + dy ** 2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2 90 | if in_vis_thre is not None: 91 | ind = list(vg > in_vis_thre) and list(vd > in_vis_thre) 92 | e = e[ind] 93 | ious[n_d] = np.sum(np.exp(-e)) / e.shape[0] if e.shape[0] != 0 else 0.0 94 | return ious 95 | 96 | 97 | def oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None): 98 | """ 99 | greedily select boxes with high confidence and overlap with current maximum <= thresh 100 | rule out overlap >= thresh, overlap = oks 101 | :param kpts_db 102 | :param thresh: retain overlap < thresh 103 | :return: indexes to keep 104 | """ 105 | if len(kpts_db) == 0: 106 | return [] 107 | 108 | scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))]) 109 | kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))]) 110 | areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))]) 111 | 112 | order = scores.argsort()[::-1] 113 | 114 | keep = [] 115 | while order.size > 0: 116 | i = order[0] 117 | keep.append(i) 118 | 119 | oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre) 120 | 121 | inds = np.where(oks_ovr <= thresh)[0] 122 | order = order[inds + 1] 123 | 124 | return keep 125 | 126 | 127 | def rescore(overlap, scores, thresh, type='gaussian'): 128 | assert overlap.shape[0] == scores.shape[0] 129 | if type == 'linear': 130 | inds = np.where(overlap >= thresh)[0] 131 | scores[inds] = scores[inds] * (1 - overlap[inds]) 132 | else: 133 | scores = scores * np.exp(- overlap**2 / thresh) 134 | 135 | return scores 136 | 137 | 138 | def soft_oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None): 139 | """ 140 | greedily select boxes with high confidence and overlap with current maximum <= thresh 141 | rule out overlap >= thresh, overlap = oks 142 | :param kpts_db 143 | :param thresh: retain overlap < thresh 144 | :return: indexes to keep 145 | """ 146 | if len(kpts_db) == 0: 147 | return [] 148 | 149 | scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))]) 150 | kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))]) 151 | areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))]) 152 | 153 | order = scores.argsort()[::-1] 154 | scores = scores[order] 155 | 156 | # max_dets = order.size 157 | max_dets = 20 158 | keep = np.zeros(max_dets, dtype=np.intp) 159 | keep_cnt = 0 160 | while order.size > 0 and keep_cnt < max_dets: 161 | i = order[0] 162 | 163 | oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre) 164 | 165 | order = order[1:] 166 | scores = rescore(oks_ovr, scores[1:], thresh) 167 | 168 | tmp = scores.argsort()[::-1] 169 | order = order[tmp] 170 | scores = scores[tmp] 171 | 172 | keep[keep_cnt] = i 173 | keep_cnt += 1 174 | 175 | keep = keep[:keep_cnt] 176 | 177 | return keep 178 | # kpts_db = kpts_db[:keep_cnt] 179 | 180 | # return kpts_db 181 | -------------------------------------------------------------------------------- /lib/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Copyright (c) Microsoft 3 | // Licensed under The MIT License 4 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 5 | // ------------------------------------------------------------------ 6 | 7 | #include "gpu_nms.hpp" 8 | #include 9 | #include 10 | 11 | #define CUDA_CHECK(condition) \ 12 | /* Code block avoids redefinition of cudaError_t error */ \ 13 | do { \ 14 | cudaError_t error = condition; \ 15 | if (error != cudaSuccess) { \ 16 | std::cout << cudaGetErrorString(error) << std::endl; \ 17 | } \ 18 | } while (0) 19 | 20 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 21 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 22 | 23 | __device__ inline float devIoU(float const * const a, float const * const b) { 24 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 25 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 26 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 27 | float interS = width * height; 28 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 29 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 30 | return interS / (Sa + Sb - interS); 31 | } 32 | 33 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 34 | const float *dev_boxes, unsigned long long *dev_mask) { 35 | const int row_start = blockIdx.y; 36 | const int col_start = blockIdx.x; 37 | 38 | // if (row_start > col_start) return; 39 | 40 | const int row_size = 41 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 42 | const int col_size = 43 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 44 | 45 | __shared__ float block_boxes[threadsPerBlock * 5]; 46 | if (threadIdx.x < col_size) { 47 | block_boxes[threadIdx.x * 5 + 0] = 48 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 49 | block_boxes[threadIdx.x * 5 + 1] = 50 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 51 | block_boxes[threadIdx.x * 5 + 2] = 52 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 53 | block_boxes[threadIdx.x * 5 + 3] = 54 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 55 | block_boxes[threadIdx.x * 5 + 4] = 56 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 57 | } 58 | __syncthreads(); 59 | 60 | if (threadIdx.x < row_size) { 61 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 62 | const float *cur_box = dev_boxes + cur_box_idx * 5; 63 | int i = 0; 64 | unsigned long long t = 0; 65 | int start = 0; 66 | if (row_start == col_start) { 67 | start = threadIdx.x + 1; 68 | } 69 | for (i = start; i < col_size; i++) { 70 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 71 | t |= 1ULL << i; 72 | } 73 | } 74 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 75 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 76 | } 77 | } 78 | 79 | void _set_device(int device_id) { 80 | int current_device; 81 | CUDA_CHECK(cudaGetDevice(¤t_device)); 82 | if (current_device == device_id) { 83 | return; 84 | } 85 | // The call to cudaSetDevice must come before any calls to Get, which 86 | // may perform initialization using the GPU. 87 | CUDA_CHECK(cudaSetDevice(device_id)); 88 | } 89 | 90 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 91 | int boxes_dim, float nms_overlap_thresh, int device_id) { 92 | _set_device(device_id); 93 | 94 | float* boxes_dev = NULL; 95 | unsigned long long* mask_dev = NULL; 96 | 97 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 98 | 99 | CUDA_CHECK(cudaMalloc(&boxes_dev, 100 | boxes_num * boxes_dim * sizeof(float))); 101 | CUDA_CHECK(cudaMemcpy(boxes_dev, 102 | boxes_host, 103 | boxes_num * boxes_dim * sizeof(float), 104 | cudaMemcpyHostToDevice)); 105 | 106 | CUDA_CHECK(cudaMalloc(&mask_dev, 107 | boxes_num * col_blocks * sizeof(unsigned long long))); 108 | 109 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 110 | DIVUP(boxes_num, threadsPerBlock)); 111 | dim3 threads(threadsPerBlock); 112 | nms_kernel<<>>(boxes_num, 113 | nms_overlap_thresh, 114 | boxes_dev, 115 | mask_dev); 116 | 117 | std::vector mask_host(boxes_num * col_blocks); 118 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 119 | mask_dev, 120 | sizeof(unsigned long long) * boxes_num * col_blocks, 121 | cudaMemcpyDeviceToHost)); 122 | 123 | std::vector remv(col_blocks); 124 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 125 | 126 | int num_to_keep = 0; 127 | for (int i = 0; i < boxes_num; i++) { 128 | int nblock = i / threadsPerBlock; 129 | int inblock = i % threadsPerBlock; 130 | 131 | if (!(remv[nblock] & (1ULL << inblock))) { 132 | keep_out[num_to_keep++] = i; 133 | unsigned long long *p = &mask_host[0] + i * col_blocks; 134 | for (int j = nblock; j < col_blocks; j++) { 135 | remv[j] |= p[j]; 136 | } 137 | } 138 | } 139 | *num_out = num_to_keep; 140 | 141 | CUDA_CHECK(cudaFree(boxes_dev)); 142 | CUDA_CHECK(cudaFree(mask_dev)); 143 | } 144 | -------------------------------------------------------------------------------- /lib/nms/setup_linux.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Pose.gluon 3 | # Copyright (c) 2018-present Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | from setuptools import setup 11 | from distutils.extension import Extension 12 | from Cython.Distutils import build_ext 13 | import numpy as np 14 | 15 | 16 | def find_in_path(name, path): 17 | "Find a file in a search path" 18 | # Adapted fom 19 | # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 20 | for dir in path.split(os.pathsep): 21 | binpath = pjoin(dir, name) 22 | if os.path.exists(binpath): 23 | return os.path.abspath(binpath) 24 | return None 25 | 26 | 27 | def locate_cuda(): 28 | """Locate the CUDA environment on the system 29 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 30 | and values giving the absolute path to each directory. 31 | Starts by looking for the CUDAHOME env variable. If not found, everything 32 | is based on finding 'nvcc' in the PATH. 33 | """ 34 | 35 | # first check if the CUDAHOME env variable is in use 36 | if 'CUDAHOME' in os.environ: 37 | home = os.environ['CUDAHOME'] 38 | nvcc = pjoin(home, 'bin', 'nvcc') 39 | else: 40 | # otherwise, search the PATH for NVCC 41 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 42 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 43 | if nvcc is None: 44 | raise EnvironmentError('The nvcc binary could not be ' 45 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 46 | home = os.path.dirname(os.path.dirname(nvcc)) 47 | 48 | cudaconfig = {'home':home, 'nvcc':nvcc, 49 | 'include': pjoin(home, 'include'), 50 | 'lib64': pjoin(home, 'lib64')} 51 | for k, v in cudaconfig.items(): 52 | if not os.path.exists(v): 53 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 54 | 55 | return cudaconfig 56 | CUDA = locate_cuda() 57 | 58 | 59 | # Obtain the numpy include directory. This logic works across numpy versions. 60 | try: 61 | numpy_include = np.get_include() 62 | except AttributeError: 63 | numpy_include = np.get_numpy_include() 64 | 65 | 66 | def customize_compiler_for_nvcc(self): 67 | """inject deep into distutils to customize how the dispatch 68 | to gcc/nvcc works. 69 | If you subclass UnixCCompiler, it's not trivial to get your subclass 70 | injected in, and still have the right customizations (i.e. 71 | distutils.sysconfig.customize_compiler) run on it. So instead of going 72 | the OO route, I have this. Note, it's kindof like a wierd functional 73 | subclassing going on.""" 74 | 75 | # tell the compiler it can processes .cu 76 | self.src_extensions.append('.cu') 77 | 78 | # save references to the default compiler_so and _comple methods 79 | default_compiler_so = self.compiler_so 80 | super = self._compile 81 | 82 | # now redefine the _compile method. This gets executed for each 83 | # object but distutils doesn't have the ability to change compilers 84 | # based on source extension: we add it. 85 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 86 | if os.path.splitext(src)[1] == '.cu': 87 | # use the cuda for .cu files 88 | self.set_executable('compiler_so', CUDA['nvcc']) 89 | # use only a subset of the extra_postargs, which are 1-1 translated 90 | # from the extra_compile_args in the Extension class 91 | postargs = extra_postargs['nvcc'] 92 | else: 93 | postargs = extra_postargs['gcc'] 94 | 95 | super(obj, src, ext, cc_args, postargs, pp_opts) 96 | # reset the default compiler_so, which we might have changed for cuda 97 | self.compiler_so = default_compiler_so 98 | 99 | # inject our redefined _compile method into the class 100 | self._compile = _compile 101 | 102 | 103 | # run the customize_compiler 104 | class custom_build_ext(build_ext): 105 | def build_extensions(self): 106 | customize_compiler_for_nvcc(self.compiler) 107 | build_ext.build_extensions(self) 108 | 109 | 110 | ext_modules = [ 111 | Extension( 112 | "cpu_nms", 113 | ["cpu_nms.pyx"], 114 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 115 | include_dirs = [numpy_include] 116 | ), 117 | Extension('gpu_nms', 118 | ['nms_kernel.cu', 'gpu_nms.pyx'], 119 | library_dirs=[CUDA['lib64']], 120 | libraries=['cudart'], 121 | language='c++', 122 | runtime_library_dirs=[CUDA['lib64']], 123 | # this syntax is specific to this build system 124 | # we're only going to use certain compiler args with nvcc and not with 125 | # gcc the implementation of this trick is in customize_compiler() below 126 | extra_compile_args={'gcc': ["-Wno-unused-function"], 127 | 'nvcc': ['-arch=sm_35', 128 | '--ptxas-options=-v', 129 | '-c', 130 | '--compiler-options', 131 | "'-fPIC'"]}, 132 | include_dirs = [numpy_include, CUDA['include']] 133 | ), 134 | ] 135 | 136 | setup( 137 | name='nms', 138 | ext_modules=ext_modules, 139 | # inject our custom trigger 140 | cmdclass={'build_ext': custom_build_ext}, 141 | ) 142 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIprogrammer/AdvMix/f619fa279d9419eb452d228762c3872691e42e7d/lib/utils/__init__.py -------------------------------------------------------------------------------- /lib/utils/transforms.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | import cv2 13 | import torch 14 | 15 | 16 | def flip_back(output_flipped, matched_parts, args=None, cfg=None, dim=None): 17 | ''' 18 | ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width) 19 | ''' 20 | assert output_flipped.ndim == 4 or output_flipped.ndim == 3,\ 21 | 'output_flipped should be [batch_size, num_joints, height, width] or [batch_size, num_joints, 2]' 22 | 23 | if output_flipped.ndim == 4: 24 | output_flipped = output_flipped[..., ::-1] 25 | 26 | # flip x,y 27 | else: 28 | output_flipped = inv_coord_norm(output_flipped, cfg, args) 29 | if args.reg_coord: 30 | output_flipped[...,0] = cfg.MODEL.IMAGE_SIZE[0] - 1 - output_flipped[...,0] 31 | else: 32 | output_flipped[...,0] = dim[3] - 1 - output_flipped[...,0] 33 | 34 | output_flipped = coord_norm(output_flipped, cfg, args).clone().cpu().numpy() 35 | 36 | for pair in matched_parts: 37 | tmp = output_flipped[:, pair[0], ...].copy() 38 | output_flipped[:, pair[0], ...] = output_flipped[:, pair[1], ...] 39 | output_flipped[:, pair[1], ...] = tmp 40 | 41 | return output_flipped 42 | 43 | 44 | def fliplr_joints(joints, joints_vis, width, matched_parts): 45 | """ 46 | flip coords 47 | """ 48 | # Flip horizontal 49 | joints[:, 0] = width - joints[:, 0] - 1 50 | 51 | # Change left-right parts; point index 52 | for pair in matched_parts: 53 | joints[pair[0], :], joints[pair[1], :] = \ 54 | joints[pair[1], :], joints[pair[0], :].copy() 55 | joints_vis[pair[0], :], joints_vis[pair[1], :] = \ 56 | joints_vis[pair[1], :], joints_vis[pair[0], :].copy() 57 | 58 | return joints*joints_vis, joints_vis 59 | 60 | 61 | def transform_preds(coords, center, scale, output_size): 62 | target_coords = np.zeros(coords.shape) 63 | trans = get_affine_transform(center, scale, 0, output_size, inv=1) 64 | for p in range(coords.shape[0]): 65 | target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) 66 | return target_coords 67 | 68 | 69 | def get_affine_transform( 70 | center, scale, rot, output_size, 71 | shift=np.array([0, 0], dtype=np.float32), inv=0 72 | ): 73 | if not isinstance(scale, np.ndarray) and not isinstance(scale, list): 74 | print(scale) 75 | scale = np.array([scale, scale]) 76 | 77 | scale_tmp = scale * 200.0 78 | src_w = scale_tmp[0] 79 | dst_w = output_size[0] 80 | dst_h = output_size[1] 81 | 82 | rot_rad = np.pi * rot / 180 83 | src_dir = get_dir([0, src_w * -0.5], rot_rad) 84 | dst_dir = np.array([0, dst_w * -0.5], np.float32) 85 | 86 | src = np.zeros((3, 2), dtype=np.float32) 87 | dst = np.zeros((3, 2), dtype=np.float32) 88 | src[0, :] = center + scale_tmp * shift 89 | src[1, :] = center + src_dir + scale_tmp * shift 90 | dst[0, :] = [dst_w * 0.5, dst_h * 0.5] 91 | dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir 92 | 93 | src[2:, :] = get_3rd_point(src[0, :], src[1, :]) 94 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) 95 | 96 | if inv: 97 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) 98 | else: 99 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) 100 | 101 | return trans 102 | 103 | 104 | def affine_transform(pt, t): 105 | new_pt = np.array([pt[0], pt[1], 1.]).T 106 | new_pt = np.dot(t, new_pt) 107 | return new_pt[:2] 108 | 109 | # the same rule: [1,0] [3,0] ==> [3,-2] square triangle 110 | def get_3rd_point(a, b): 111 | direct = a - b 112 | return b + np.array([-direct[1], direct[0]], dtype=np.float32) 113 | 114 | # rotation coordination 115 | def get_dir(src_point, rot_rad): 116 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 117 | 118 | src_result = [0, 0] 119 | src_result[0] = src_point[0] * cs - src_point[1] * sn 120 | src_result[1] = src_point[0] * sn + src_point[1] * cs 121 | 122 | return src_result 123 | 124 | 125 | def crop(img, center, scale, output_size, rot=0): 126 | trans = get_affine_transform(center, scale, rot, output_size) 127 | 128 | dst_img = cv2.warpAffine( 129 | img, trans, (int(output_size[0]), int(output_size[1])), 130 | flags=cv2.INTER_LINEAR 131 | ) 132 | 133 | return dst_img 134 | 135 | 136 | def tofloat(x): 137 | 138 | if isinstance(x, np.ndarray): 139 | return torch.Tensor(x).cuda() 140 | 141 | if x.dtype == torch.float64 or x.dtype == torch.double: 142 | x = x.float() 143 | return x 144 | 145 | def coord_norm(gt, cfg, args): 146 | 147 | gt = tofloat(gt) 148 | 149 | if args.reg_coord: 150 | image_size = [cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[1]] 151 | else: 152 | image_size = [cfg.MODEL.HEATMAP_SIZE[0], cfg.MODEL.HEATMAP_SIZE[1]] 153 | 154 | gt = (gt * 2 + 1) / torch.Tensor(image_size).cuda() - 1 155 | return gt 156 | 157 | def inv_coord_norm(gt_norm, cfg, args): 158 | 159 | gt_norm = tofloat(gt_norm) 160 | 161 | if args.reg_coord: 162 | image_size = [cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[1]] 163 | else: 164 | image_size = [cfg.MODEL.HEATMAP_SIZE[0], cfg.MODEL.HEATMAP_SIZE[1]] 165 | 166 | gt = ((gt_norm + 1) * torch.Tensor(image_size).cuda() - 1) / 2 167 | return gt 168 | 169 | def _tocuda(t): 170 | if isinstance(t, list): 171 | for index in range(len(t)): 172 | t[index] = _tocuda(t[index]) 173 | else: 174 | if isinstance(t, np.ndarray): 175 | t = torch.from_numpy(t.copy()).cuda() 176 | elif t.is_cuda: 177 | return t 178 | else: 179 | t = torch.Tensor(t.clone()).cuda() 180 | return t 181 | 182 | def _tocopy(t): 183 | if isinstance(t, list): 184 | for index in range(len(list)): 185 | t[index] = _tocopy(t[index]) 186 | else: 187 | if isinstance(t, np.ndarray): 188 | t = torch.from_numpy(t.copy()).cuda() 189 | else: 190 | t = torch.Tensor(t.clone()).cuda() 191 | return t -------------------------------------------------------------------------------- /lib/utils/utils.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import logging 13 | import time 14 | from collections import namedtuple 15 | from pathlib import Path 16 | 17 | import torch 18 | import torch.optim as optim 19 | import torch.nn as nn 20 | 21 | 22 | def create_logger(args, cfg, cfg_name, phase='train'): 23 | root_output_dir = Path(cfg.OUTPUT_DIR) 24 | # set up logger 25 | if not root_output_dir.exists(): 26 | print('=> creating {}'.format(root_output_dir)) 27 | root_output_dir.mkdir() 28 | 29 | dataset = cfg.DATASET.DATASET + '_' + cfg.DATASET.HYBRID_JOINTS_TYPE \ 30 | if cfg.DATASET.HYBRID_JOINTS_TYPE else cfg.DATASET.DATASET 31 | dataset = dataset.replace(':', '_') 32 | model = cfg.MODEL.NAME 33 | 34 | cfg_name = os.path.basename(cfg_name).split('.')[0] 35 | cfg_name = args.save_suffix if args.save_suffix is not '' else cfg_name 36 | 37 | ### log output dir 38 | if args.test_robust: 39 | root_output_dir = Path('output_robustness') 40 | final_output_dir = root_output_dir / dataset / model / cfg_name / 'test_corruption' 41 | else: 42 | final_output_dir = root_output_dir / dataset / model / cfg_name 43 | 44 | print('=> creating {}'.format(final_output_dir)) 45 | final_output_dir.mkdir(parents=True, exist_ok=True) 46 | 47 | time_str = time.strftime('%Y-%m-%d-%H-%M') 48 | 49 | if args.test_robust: 50 | log_file = '{}_{}.log'.format(cfg_name, phase) 51 | else: 52 | log_file = '{}_{}_{}.log'.format(cfg_name, time_str, phase) 53 | 54 | final_log_file = final_output_dir / log_file 55 | head = '%(asctime)-15s %(message)s' 56 | logging.basicConfig(filename=str(final_log_file), 57 | format=head) 58 | logger = logging.getLogger() 59 | 60 | logger.setLevel(logging.INFO) 61 | 62 | console = logging.StreamHandler() 63 | logging.getLogger('').addHandler(console) 64 | 65 | tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / \ 66 | (cfg_name + '_' + time_str) 67 | 68 | if args.test_robust: 69 | tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / 'test_robustness' / \ 70 | (cfg_name + '_' + time_str) / args.corruption_type / str(args.severity) 71 | 72 | print('=> creating {}'.format(tensorboard_log_dir)) 73 | tensorboard_log_dir.mkdir(parents=True, exist_ok=True) 74 | 75 | return logger, str(final_output_dir), str(tensorboard_log_dir) 76 | 77 | 78 | def get_optimizer(cfg, model): 79 | optimizer = None 80 | if cfg.TRAIN.OPTIMIZER == 'sgd': 81 | optimizer = optim.SGD( 82 | model.parameters(), 83 | lr=cfg.TRAIN.LR, 84 | momentum=cfg.TRAIN.MOMENTUM, 85 | weight_decay=cfg.TRAIN.WD, 86 | nesterov=cfg.TRAIN.NESTEROV 87 | ) 88 | elif cfg.TRAIN.OPTIMIZER == 'adam': 89 | optimizer = optim.Adam( 90 | model.parameters(), 91 | lr=cfg.TRAIN.LR 92 | ) 93 | 94 | return optimizer 95 | 96 | 97 | def save_checkpoint(states, is_best, output_dir, 98 | filename='checkpoint.pth', suffix=""): 99 | if suffix != "": 100 | torch.save(states, os.path.join(output_dir, filename[:-4] + '_' + suffix + ".pth")) 101 | if is_best and 'state_dict' in states: 102 | torch.save(states['best_state_dict'], 103 | os.path.join(output_dir, 'model_best_{}.pth'.format(suffix))) 104 | else: 105 | torch.save(states, os.path.join(output_dir, filename)) 106 | if is_best and 'state_dict' in states: 107 | torch.save(states['best_state_dict'], 108 | os.path.join(output_dir, 'model_best.pth')) 109 | 110 | def get_model_summary(model, *input_tensors, item_length=26, verbose=False, return_all=False): 111 | """ 112 | :param model: 113 | :param input_tensors: 114 | :param item_length: 115 | :return: 116 | """ 117 | 118 | summary = [] 119 | 120 | ModuleDetails = namedtuple( 121 | "Layer", ["name", "input_size", "output_size", "num_parameters", "multiply_adds", "memory_access_cost"]) 122 | hooks = [] 123 | layer_instances = {} 124 | 125 | def add_hooks(module): 126 | 127 | def hook(module, input, output): 128 | class_name = str(module.__class__.__name__) 129 | instance_index = 1 130 | if class_name not in layer_instances: 131 | layer_instances[class_name] = instance_index 132 | else: 133 | instance_index = layer_instances[class_name] + 1 134 | layer_instances[class_name] = instance_index 135 | 136 | layer_name = class_name + "_" + str(instance_index) 137 | 138 | params = 0 139 | mac = 0 140 | mac_input = 0 141 | mac_output = 0 142 | mac_module = 0 143 | if class_name.find("Conv2d") != -1 or class_name.find("BatchNorm") != -1 or \ 144 | class_name.find("Linear") != -1: 145 | for param_ in module.parameters(): 146 | params += param_.view(-1).size(0) 147 | mac_module += param_.view(-1).size(0) 148 | 149 | flops = "Not Available" 150 | if class_name.find("Conv2d") != -1 and hasattr(module, "weight"): 151 | flops = ( 152 | torch.prod( 153 | torch.LongTensor(list(module.weight.data.size()))) * 154 | torch.prod( 155 | torch.LongTensor(list(output.size())[2:]))).item() 156 | elif isinstance(module, nn.Linear): 157 | flops = (torch.prod(torch.LongTensor(list(output.size()))) \ 158 | * input[0].size(1)).item() 159 | 160 | if isinstance(input[0], list): 161 | input = input[0] 162 | if isinstance(output, list): 163 | output = output[-1] 164 | 165 | mac_input = input[0].view(-1).size(0)# input shape 166 | mac_output = output.view(-1).size(0) 167 | mac += (mac_input + mac_module + mac_output) 168 | 169 | # 与 batch 无关 170 | summary.append( 171 | ModuleDetails( 172 | name=layer_name, 173 | input_size=list(input[0].size()), 174 | output_size=list(output.size()), 175 | num_parameters=params, 176 | multiply_adds=flops, 177 | memory_access_cost=mac) 178 | ) 179 | if not isinstance(module, nn.ModuleList) \ 180 | and not isinstance(module, nn.Sequential) \ 181 | and module != model: 182 | hooks.append(module.register_forward_hook(hook)) 183 | 184 | model.eval() 185 | model.apply(add_hooks) 186 | 187 | space_len = item_length 188 | 189 | model(*input_tensors) 190 | for hook in hooks: 191 | hook.remove() 192 | 193 | details = '' 194 | if verbose: 195 | details = "Model Summary" + \ 196 | os.linesep + \ 197 | "Name{}Input Size{}Output Size{}Parameters{}Multiply Adds (Flops){}MAC(memory access cost){}".format( 198 | ' ' * (space_len - len("Name")), 199 | ' ' * (space_len - len("Input Size")), 200 | ' ' * (space_len - len("Output Size")), 201 | ' ' * (space_len - len("Parameters")), 202 | ' ' * (space_len - len("Multiply Adds (Flops)")), \ 203 | ' ' * (space_len - len("MAC(memory access cost)"))) \ 204 | + os.linesep + '-' * space_len * 5 + os.linesep 205 | 206 | params_sum = 0 207 | flops_sum = 0 208 | mac_sum = 0 209 | for layer in summary: 210 | params_sum += layer.num_parameters 211 | mac_sum += layer.memory_access_cost 212 | if layer.multiply_adds != "Not Available": 213 | flops_sum += layer.multiply_adds 214 | if verbose: 215 | details += "{}{}{}{}{}{}{}{}{}{}".format( 216 | layer.name, 217 | ' ' * (space_len - len(layer.name)), 218 | layer.input_size, 219 | ' ' * (space_len - len(str(layer.input_size))), 220 | layer.output_size, 221 | ' ' * (space_len - len(str(layer.output_size))), 222 | layer.num_parameters, 223 | ' ' * (space_len - len(str(layer.num_parameters))), 224 | layer.multiply_adds, 225 | ' ' * (space_len - len(str(layer.multiply_adds)))) \ 226 | + os.linesep + '-' * space_len * 5 + os.linesep 227 | 228 | details += os.linesep \ 229 | + "Total Parameters: {:,}".format(params_sum) \ 230 | + os.linesep + '-' * space_len * 5 + os.linesep 231 | details += "Total Multiply Adds (For Convolution and Linear Layers only): {:,} GFLOPs".format(flops_sum/(1024**3)) \ 232 | + os.linesep + '-' * space_len * 5 + os.linesep 233 | details += "Memory Access Cost : {:,}".format(mac_sum) \ 234 | + os.linesep + '-' * space_len * 5 + os.linesep 235 | 236 | details += "Number of Layers" + os.linesep 237 | for layer in layer_instances: 238 | details += "{} : {} layers ".format(layer, layer_instances[layer]) 239 | 240 | if return_all: 241 | return [details, flops_sum, mac_sum, params_sum] 242 | return details 243 | -------------------------------------------------------------------------------- /lib/utils/vis.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import math 12 | 13 | import numpy as np 14 | import torchvision 15 | import cv2 16 | import torch 17 | import torch.nn as nn 18 | 19 | from core.inference import get_max_preds 20 | 21 | 22 | def tensor2im(input_image, imtype=np.uint8): 23 | """ 24 | Args: 25 | input_image (tensor) 26 | imtype (type) 27 | """ 28 | mean = [0.485,0.456,0.406] 29 | std = [0.229,0.224,0.225] 30 | if not isinstance(input_image, np.ndarray): 31 | if isinstance(input_image, torch.Tensor): 32 | image_tensor = input_image.data 33 | else: 34 | return input_image 35 | image_numpy = image_tensor.cpu().float().numpy() 36 | if image_numpy.shape[0] == 1: 37 | image_numpy = np.tile(image_numpy, (3, 1, 1)) 38 | for i in range(len(mean)): 39 | image_numpy[i] = image_numpy[i] * std[i] + mean[i] 40 | image_numpy = image_numpy * 255 41 | image_numpy = np.transpose(image_numpy, (1, 2, 0)) 42 | else: 43 | image_numpy = input_image 44 | return image_numpy.astype(imtype) 45 | 46 | 47 | def save_batch_image_with_joints(batch_image, batch_joints, batch_joints_vis, 48 | file_name, nrow=8, padding=2): 49 | 50 | """ 51 | Args: 52 | batch_image: [batch_size, channel, height, width] 53 | batch_joints: [batch_size, num_joints, 3], 54 | batch_joints_vis: [batch_size, num_joints, 1], 55 | """ 56 | grid = torchvision.utils.make_grid(batch_image, nrow, padding, False) 57 | ndarr = tensor2im(grid) 58 | ndarr = ndarr.copy() 59 | 60 | nmaps = batch_image.size(0) 61 | xmaps = min(nrow, nmaps) 62 | ymaps = int(math.ceil(float(nmaps) / xmaps)) 63 | height = int(batch_image.size(2) + padding) 64 | width = int(batch_image.size(3) + padding) 65 | k = 0 66 | for y in range(ymaps): 67 | for x in range(xmaps): 68 | if k >= nmaps: 69 | break 70 | joints = batch_joints[k] 71 | joints_vis = batch_joints_vis[k] 72 | 73 | for joint, joint_vis in zip(joints, joints_vis): 74 | joint[0] = x * width + padding + joint[0] 75 | joint[1] = y * height + padding + joint[1] 76 | if joint_vis[0]: 77 | cv2.circle(ndarr, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2) 78 | k = k + 1 79 | cv2.imwrite(file_name, ndarr) 80 | 81 | 82 | def save_batch_heatmaps(batch_image, batch_heatmaps, file_name, 83 | normalize=True, coord=None): 84 | """ 85 | Args: 86 | batch_image: [batch_size, channel, height, width] 87 | batch_heatmaps: ['batch_size, num_joints, height, width] 88 | file_name: saved file name 89 | """ 90 | if normalize: 91 | batch_image = batch_image.clone() 92 | min = float(batch_image.min()) 93 | max = float(batch_image.max()) 94 | 95 | batch_image.add_(-min).div_(max - min + 1e-5) 96 | 97 | batch_size = batch_heatmaps.size(0) 98 | num_joints = batch_heatmaps.size(1) 99 | heatmap_height = batch_heatmaps.size(2) 100 | heatmap_width = batch_heatmaps.size(3) 101 | 102 | grid_image = np.zeros((batch_size*heatmap_height, 103 | (num_joints+1)*heatmap_width, 104 | 3), 105 | dtype=np.uint8) 106 | if coord is None: 107 | preds, maxvals = get_max_preds(batch_heatmaps.detach().cpu().numpy()) 108 | else: 109 | preds = coord 110 | 111 | for i in range(batch_size): 112 | image = batch_image[i].mul(255)\ 113 | .clamp(0, 255)\ 114 | .byte()\ 115 | .permute(1, 2, 0)\ 116 | .cpu().numpy() 117 | heatmaps = batch_heatmaps[i].mul(255)\ 118 | .clamp(0, 255)\ 119 | .byte()\ 120 | .cpu().numpy() 121 | 122 | resized_image = cv2.resize(image, 123 | (int(heatmap_width), int(heatmap_height))) 124 | 125 | height_begin = heatmap_height * i 126 | height_end = heatmap_height * (i + 1) 127 | for j in range(num_joints): 128 | cv2.circle(resized_image, 129 | (int(preds[i][j][0]), int(preds[i][j][1])), 130 | 1, [0, 0, 255], 1) 131 | heatmap = heatmaps[j, :, :] 132 | colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) 133 | masked_image = colored_heatmap*0.7 + resized_image*0.3 134 | cv2.circle(masked_image, 135 | (int(preds[i][j][0]), int(preds[i][j][1])), 136 | 1, [0, 0, 255], 1) 137 | 138 | width_begin = heatmap_width * (j+1) 139 | width_end = heatmap_width * (j+2) 140 | grid_image[height_begin:height_end, width_begin:width_end, :] = \ 141 | masked_image 142 | 143 | grid_image[height_begin:height_end, 0:heatmap_width, :] = resized_image 144 | 145 | cv2.imwrite(file_name, grid_image) 146 | 147 | 148 | 149 | def save_debug_images(config, input, meta, target, joints_pred, output, 150 | prefix, coord=None): 151 | if not config.DEBUG.DEBUG: 152 | return 153 | 154 | if config.DEBUG.SAVE_BATCH_IMAGES_GT: 155 | save_batch_image_with_joints( 156 | input, meta['joints'], meta['joints_vis'], 157 | '{}_gt.jpg'.format(prefix) 158 | ) 159 | if config.DEBUG.SAVE_BATCH_IMAGES_PRED: 160 | save_batch_image_with_joints( 161 | input, joints_pred, meta['joints_vis'], 162 | '{}_pred.jpg'.format(prefix) 163 | ) 164 | if config.DEBUG.SAVE_HEATMAPS_GT: 165 | save_batch_heatmaps( 166 | input, target, '{}_hm_gt.jpg'.format(prefix) 167 | ) 168 | # normalized 169 | if config.DEBUG.SAVE_HEATMAPS_PRED: 170 | if isinstance(output, list) and len(output) > 1: 171 | save_batch_heatmaps( 172 | input, output[1], '{}_hm_pred.jpg'.format(prefix), normalize=False, coord=coord 173 | ) 174 | save_batch_heatmaps( 175 | input, output[0], '{}_hm_unnorm_pred.jpg'.format(prefix), coord=coord 176 | ) 177 | elif isinstance(output, list) and len(output) == 1: 178 | try: 179 | save_batch_heatmaps( 180 | input, output[0], '{}_hm_pred.jpg'.format(prefix), normalize=False, coord=coord 181 | ) 182 | except: 183 | print('do not support') 184 | else: 185 | save_batch_heatmaps( 186 | input, output, '{}_hm_pred.jpg'.format(prefix), normalize=False, coord=coord 187 | ) 188 | 189 | 190 | 191 | -------------------------------------------------------------------------------- /lib/utils/zipreader.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import zipfile 13 | import xml.etree.ElementTree as ET 14 | 15 | import cv2 16 | import numpy as np 17 | 18 | _im_zfile = [] 19 | _xml_path_zip = [] 20 | _xml_zfile = [] 21 | 22 | 23 | def imread(filename, flags=cv2.IMREAD_COLOR): 24 | global _im_zfile 25 | path = filename 26 | pos_at = path.index('@') 27 | if pos_at == -1: 28 | print("character '@' is not found from the given path '%s'"%(path)) 29 | assert 0 30 | path_zip = path[0: pos_at] 31 | path_img = path[pos_at + 2:] 32 | if not os.path.isfile(path_zip): 33 | print("zip file '%s' is not found"%(path_zip)) 34 | assert 0 35 | for i in range(len(_im_zfile)): 36 | if _im_zfile[i]['path'] == path_zip: 37 | data = _im_zfile[i]['zipfile'].read(path_img) 38 | return cv2.imdecode(np.frombuffer(data, np.uint8), flags) 39 | 40 | _im_zfile.append({ 41 | 'path': path_zip, 42 | 'zipfile': zipfile.ZipFile(path_zip, 'r') 43 | }) 44 | data = _im_zfile[-1]['zipfile'].read(path_img) 45 | 46 | return cv2.imdecode(np.frombuffer(data, np.uint8), flags) 47 | 48 | 49 | def xmlread(filename): 50 | global _xml_path_zip 51 | global _xml_zfile 52 | path = filename 53 | pos_at = path.index('@') 54 | if pos_at == -1: 55 | print("character '@' is not found from the given path '%s'"%(path)) 56 | assert 0 57 | path_zip = path[0: pos_at] 58 | path_xml = path[pos_at + 2:] 59 | if not os.path.isfile(path_zip): 60 | print("zip file '%s' is not found"%(path_zip)) 61 | assert 0 62 | for i in xrange(len(_xml_path_zip)): 63 | if _xml_path_zip[i] == path_zip: 64 | data = _xml_zfile[i].open(path_xml) 65 | return ET.fromstring(data.read()) 66 | _xml_path_zip.append(path_zip) 67 | print("read new xml file '%s'"%(path_zip)) 68 | _xml_zfile.append(zipfile.ZipFile(path_zip, 'r')) 69 | data = _xml_zfile[-1].open(path_xml) 70 | return ET.fromstring(data.read()) 71 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Cython 2 | scipy 3 | pandas 4 | pyyaml 5 | json_tricks 6 | yacs>=0.1.5 7 | tensorboardX==1.6 8 | tqdm 9 | glob 10 | numpy 11 | Pillow 12 | imagecorruptions 13 | torch>=1.0.0 -------------------------------------------------------------------------------- /scripts/make_datasets.sh: -------------------------------------------------------------------------------- 1 | # coco 2 | python tools/make_datasets.py --dataset coco --root_dir data --data_dir coco/val2017 3 | # mpii 4 | python tools/make_datasets.py --dataset mpii --root_dir data --data_dir mpii/images -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | which_dataset=$1 2 | 3 | if [ $which_dataset == coco ]; then 4 | exp_ID=GT_test_COCO_res50_256x192_advmix 5 | cfg_file=experiments/coco/resnet/res50_256x192_d256x3_adam_lr1e-3_advmix.yaml 6 | checkpoint=output/coco/pose_resnet/MPII_res50_256x256_advmix/model_best_D.pth 7 | elif [ $which_dataset == mpii ]; then 8 | exp_ID=GT_test_MPII_res50_256x256_advmix 9 | cfg_file=experiments/mpii/resnet/res50_256x256_d256x3_adam_lr1e-3_advmix.yaml 10 | checkpoint=output/coco/pose_resnet/COCO_res50_256x192_advmix/model_best_D.pth 11 | fi 12 | 13 | 14 | python tools/test_corruption.py \ 15 | --cfg $cfg_file \ 16 | --test_robust \ 17 | --exp_id $exp_ID \ 18 | --save_suffix $exp_ID \ 19 | TEST.MODEL_FILE $checkpoint \ 20 | TEST.USE_GT_BBOX False -------------------------------------------------------------------------------- /scripts/train.sh: -------------------------------------------------------------------------------- 1 | which_dataset=$1 2 | 3 | if [ $which_dataset == coco ]; then 4 | num_joints=17 5 | exp_ID=COCO_res50_256x192_advmix 6 | cfg_file=experiments/coco/resnet/res50_256x192_d256x3_adam_lr1e-3_advmix.yaml 7 | checkpoint=models/pytorch/pose_coco/pose_resnet_50_256x192.pth 8 | elif [ $which_dataset == mpii ]; then 9 | num_joints=16 10 | exp_ID=MPII_res50_256x256_advmix 11 | cfg_file=experiments/mpii/resnet/res50_256x256_d256x3_adam_lr1e-3_advmix.yaml 12 | checkpoint=models/pytorch/pose_mpii/pose_resnet_50_256x256.pth.tar 13 | fi 14 | echo 'Start training :'$which_dataset 15 | 16 | python tools/train.py \ 17 | --cfg $cfg_file \ 18 | --exp_id $exp_ID \ 19 | --save_suffix $exp_ID \ 20 | --load_from_D $checkpoint \ 21 | --advmix \ 22 | --sample_times 3 \ 23 | --joints_num $num_joints \ 24 | --kd_mseloss \ 25 | --alpha 0.1 \ 26 | TEST.MODEL_FILE $checkpoint \ 27 | TEST.USE_GT_BBOX True -------------------------------------------------------------------------------- /tools/_init_parse.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import argparse 6 | import os 7 | import numpy as np 8 | import pprint 9 | import shutil 10 | 11 | 12 | def parse_args(): 13 | parser = argparse.ArgumentParser(description='Train keypoints network') 14 | # general 15 | parser.add_argument('--cfg', 16 | help='experiment configure file name', 17 | required=True, 18 | type=str) 19 | 20 | parser.add_argument('opts', 21 | help="Modify config options using the command-line", 22 | default=None, 23 | nargs=argparse.REMAINDER) 24 | 25 | # philly 26 | parser.add_argument('--modelDir', 27 | help='model directory', 28 | type=str, 29 | default='') 30 | parser.add_argument('--logDir', 31 | help='log directory', 32 | type=str, 33 | default='') 34 | parser.add_argument('--dataDir', 35 | help='data directory', 36 | type=str, 37 | default='') 38 | parser.add_argument('--prevModelDir', 39 | help='prev Model directory', 40 | type=str, 41 | default='') 42 | parser.add_argument('--save_suffix', 43 | help='model output dir suffix', 44 | type=str, 45 | default='') 46 | 47 | ### test robustness 48 | parser.add_argument('--test_robust', 49 | help='normal test or test robustness', 50 | default=False, 51 | action='store_true') 52 | 53 | parser.add_argument('--corruption_type', 54 | help='type of corruption', 55 | type=str, 56 | default='') 57 | 58 | parser.add_argument('--severity', 59 | help='severity of corruption', 60 | type=int, 61 | default=0) 62 | 63 | # INPUT TYPE 64 | parser.add_argument('--dataset_root', 65 | help='data directory, if only dataset root is provided, then all the images are processed', 66 | type=str, 67 | default='', 68 | ) 69 | parser.add_argument('--load_json_file', 70 | help='load json file. The dataset root should also be given.', 71 | type=str, 72 | default='') 73 | # OUTPUT ROOT: 74 | parser.add_argument('--out_root', 75 | help='data directory', 76 | type=str, 77 | default='/mnt/lustre/share/jinsheng/res_crop') 78 | 79 | parser.add_argument('--out_file', 80 | help='data directory', 81 | type=str, 82 | default='res') 83 | 84 | # test & train 85 | parser.add_argument('--exp_id', 86 | type=str, 87 | default='') 88 | parser.add_argument('--load_from_G', 89 | type=str, 90 | default='') 91 | parser.add_argument('--load_from_D', 92 | type=str, 93 | default='') 94 | 95 | 96 | parser.add_argument('--sample_times', 97 | type=int, 98 | default=1) 99 | 100 | parser.add_argument('--adv_loss_weight', 101 | type=float, 102 | default=1) 103 | parser.add_argument('--combine_prob', 104 | type=float, 105 | default=0.2) 106 | parser.add_argument('--perturb_joint', 107 | type=float, 108 | default=0.2) 109 | parser.add_argument('--perturb_range', 110 | type=int, 111 | default=5) 112 | parser.add_argument('--sp_style', 113 | type=float, 114 | default=0) 115 | parser.add_argument('--advmix', 116 | default=False, 117 | action='store_true') 118 | 119 | parser.add_argument('--stylize_image', 120 | default=False, 121 | action='store_true') 122 | 123 | parser.add_argument('--joints_num', 124 | type=int, 125 | default=17) 126 | 127 | # generator 128 | parser.add_argument('--gen_input_chn', 129 | type=int, 130 | default=9) 131 | 132 | parser.add_argument('--downsamples', 133 | type=int, 134 | default=6) 135 | 136 | # knowledge distillation 137 | parser.add_argument('--kd_mseloss', 138 | default=False, 139 | action='store_true') 140 | 141 | parser.add_argument('--kd_klloss', 142 | default=False, 143 | action='store_true') 144 | 145 | parser.add_argument('--alpha', 146 | type=float, 147 | default=0.1) 148 | 149 | # random corruption 150 | parser.add_argument('--random_corruption', 151 | default=False, 152 | action='store_true') 153 | 154 | args = parser.parse_args() 155 | 156 | return args -------------------------------------------------------------------------------- /tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # pose.pytorch 3 | # Copyright (c) 2018-present Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import os.path as osp 13 | import sys 14 | 15 | 16 | def add_path(path): 17 | if path not in sys.path: 18 | sys.path.insert(0, path) 19 | 20 | 21 | this_dir = osp.dirname(__file__) 22 | 23 | lib_path = osp.join(this_dir, '..', 'lib') 24 | add_path(lib_path) 25 | 26 | mm_path = osp.join(this_dir, '..', 'lib/poseeval/py-motmetrics') 27 | add_path(mm_path) 28 | 29 | # mmdectection 30 | lib_path = osp.join(this_dir, '..', 'mmdetection') 31 | 32 | lib_path = osp.join(this_dir, '..', 'Synchronized_BatchNorm') 33 | add_path(lib_path) 34 | 35 | if __name__ == '__main__': 36 | import os.path as osp 37 | this_dir = osp.dirname(__file__) 38 | print(this_dir) 39 | ### only current path and sys append path -------------------------------------------------------------------------------- /tools/make_datasets.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import numpy as np 3 | import time, os 4 | from glob import glob 5 | from tqdm import tqdm 6 | import argparse 7 | import torch 8 | from torch.utils.data import Dataset 9 | from imagecorruptions import corrupt, get_corruption_names 10 | 11 | 12 | parser = argparse.ArgumentParser(description='Apply different corruption types to official validation dataset, e.g., COCO, MPII, OCHuman, etc.', 13 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 14 | parser.add_argument('--batch_size', type=int, default=64, help='Batch size for processing images.') 15 | parser.add_argument('--num_workers', type=int, default=8, help='Multi-process data loading.') 16 | parser.add_argument('--root_dir', type=str, default="./data", help='Root directory of data.') 17 | parser.add_argument('--data_dir', type=str, help='Directroy of images.') 18 | parser.add_argument('--dataset', type=str, default="COCO", help='Dataset to process.') 19 | args = parser.parse_args() 20 | 21 | class make_data(Dataset): 22 | def __init__(self, root_dir, data_dir, dataset): 23 | self.root_dir = root_dir 24 | self.data_dir = data_dir 25 | self.dataset = dataset 26 | self.imglist = glob(self.root_dir + '/' + self.data_dir + '/*.jpg') 27 | 28 | def __getitem__(self, index): 29 | img = self.imglist[index] 30 | self.process(img) 31 | return 0 32 | 33 | def __len__(self,): 34 | return len(self.imglist) 35 | 36 | def process(self,img): 37 | image = np.asarray(Image.open(img)) 38 | for corruption in get_corruption_names('all'): 39 | for severity in range(5): 40 | np.random.seed(1) 41 | corrupted = corrupt(image, corruption_name=corruption, severity=severity+1) 42 | corrupted_path = os.path.join(self.root_dir, self.dataset + '-C', corruption, str(severity), os.path.basename(img)) 43 | if not os.path.exists(os.path.dirname(corrupted_path)): 44 | os.makedirs(os.path.dirname(corrupted_path)) 45 | Image.fromarray(corrupted).save(corrupted_path) 46 | 47 | if __name__ == '__main__': 48 | root_dir = args.root_dir 49 | data_dir = args.data_dir 50 | which_dataset = args.dataset 51 | d_dataset = make_data(root_dir, data_dir, which_dataset) 52 | print("To process {} images. ".format(len(d_dataset))) 53 | distorted_dataset_loader = torch.utils.data.DataLoader( 54 | d_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) 55 | 56 | for _ in tqdm(distorted_dataset_loader): continue 57 | -------------------------------------------------------------------------------- /tools/test_corruption.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # pose.pytorch 3 | # Copyright (c) 2018-present Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import argparse 13 | import os 14 | import pprint 15 | 16 | import torch 17 | import torch.nn.parallel 18 | import torch.backends.cudnn as cudnn 19 | import torch.optim 20 | import torch.utils.data 21 | import torch.utils.data.distributed 22 | import torchvision.transforms as transforms 23 | 24 | import _init_paths 25 | from config import cfg 26 | from config import update_config 27 | from core.loss import JointsMSELoss 28 | from core.function import validate 29 | from utils.utils import create_logger 30 | 31 | import dataset 32 | import models 33 | 34 | import collections 35 | from _init_parse import parse_args 36 | import pandas as pd 37 | 38 | def val_model_init(): 39 | 40 | # adjust the gpu_ids 41 | args = parse_args() 42 | update_config(cfg, args) 43 | 44 | # cudnn related setting 45 | cudnn.benchmark = cfg.CUDNN.BENCHMARK 46 | torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC 47 | torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED 48 | 49 | model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( 50 | cfg, is_train=False 51 | ) 52 | 53 | if cfg.TEST.MODEL_FILE: 54 | model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False) 55 | 56 | model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() 57 | 58 | return model 59 | 60 | def val(distortion_name, severity, model): 61 | args = parse_args() 62 | args.corruption_type = distortion_name 63 | args.severity = severity 64 | 65 | update_config(cfg, args) 66 | 67 | exp_id = args.exp_id 68 | which_dataset = cfg.DATASET.DATASET 69 | 70 | logger, final_output_dir, tb_log_dir = create_logger(args, 71 | cfg, args.cfg, 'valid') 72 | 73 | logger.info(pprint.pformat(args)) 74 | 75 | if cfg.TEST.MODEL_FILE: 76 | logger.info('=> loading model from {}'.format(cfg.TEST.MODEL_FILE)) 77 | 78 | else: 79 | model_state_file = os.path.join( 80 | final_output_dir, 'final_state.pth' 81 | ) 82 | logger.info('=> loading model from {}'.format(model_state_file)) 83 | model.load_state_dict(torch.load(model_state_file)) 84 | 85 | # define loss function (criterion) and optimizer 86 | criterion = JointsMSELoss( 87 | use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT 88 | ).cuda() 89 | 90 | # Data loading code 91 | normalize = transforms.Normalize( 92 | mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] 93 | ) 94 | 95 | valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)( 96 | cfg, args, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, 97 | transforms.Compose([ 98 | transforms.ToTensor(), 99 | normalize, 100 | ]) 101 | ) 102 | 103 | valid_loader = torch.utils.data.DataLoader( 104 | valid_dataset, 105 | batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS), 106 | shuffle=False, 107 | num_workers=cfg.WORKERS, 108 | pin_memory=True 109 | ) 110 | 111 | # evaluate on validation set 112 | name_values, perf_indicator = validate(cfg, args, valid_loader, valid_dataset, model, criterion, 113 | final_output_dir, tb_log_dir) 114 | 115 | 116 | # recording overall results 117 | overall_dir = os.path.join(final_output_dir, 'robust_C.val') 118 | record = open(os.path.join(final_output_dir, 'robust_C.val'), 'a') 119 | record.write(distortion_name + '_' + str(severity) + ':' + '\t') 120 | for keys, values in name_values.items(): 121 | record.write(keys + ' = ' + str(values) + '\t') 122 | record.write('\n') 123 | record.close() 124 | return name_values, perf_indicator, final_output_dir, exp_id, which_dataset, overall_dir 125 | 126 | def get_corrpution_results(): 127 | distortions = [ 128 | 'gaussian_noise', 'shot_noise', 'impulse_noise', 129 | 'defocus_blur', 'glass_blur', 'motion_blur', 'zoom_blur', 130 | 'snow', 'frost', 'fog', 'brightness', 131 | 'contrast', 'elastic_transform', 'pixelate', 'jpeg_compression', 132 | 'speckle_noise', 'gaussian_blur', 'spatter', 'saturate' 133 | ] 134 | res = [] 135 | model = val_model_init() 136 | name_values, perf_indicator, final_output_dir, exp_id, which_dataset, overall_dir = val('clean', 0, model) 137 | res.append(perf_indicator) 138 | 139 | for distortion_name in distortions[:15]: 140 | for severity in range(5): 141 | name_values, perf_indicator, final_output_dir, exp_id, which_dataset, overall_dir = val(distortion_name, severity, model) 142 | res.append(perf_indicator) 143 | 144 | if which_dataset == 'mpii': 145 | get_final_results_mpii(res, distortions, final_output_dir, exp_id, mode='td') 146 | else: 147 | mode = 'bu' if cfg.model.type == 'BottomUp' else 'td' 148 | get_final_results(res, distortions, final_output_dir, exp_id, mode=mode) 149 | 150 | def get_final_results(mAP, distortions, final_output_dir, exp_id,mode='td'): 151 | dic = {} 152 | assert len(mAP) == 96, 'Result length' 153 | dic['clean_mAP'] = [mAP.pop(0)] 154 | print(mAP) 155 | 156 | all_tmp = 0 157 | for dis in distortions: 158 | tmp = [] 159 | for i in range(5): 160 | tmp.append(mAP[distortions.index(dis) * 5 + i]) 161 | dic[dis] = [sum(tmp) / len(tmp)] 162 | if dis in distortions[:15]: 163 | all_tmp += dic[dis][0] 164 | 165 | dic['mean_corrupted_AP'] = [all_tmp / 15] 166 | dic['rAP'] = dic['mean_corrupted_AP'][0] / dic['clean_mAP'][0] 167 | 168 | dataframe = pd.DataFrame(dic) 169 | columns = ['clean_mAP', 'mean_corrupted_AP', 'rAP'] + distortions 170 | dataframe.to_csv(final_output_dir + '/' + exp_id + ".csv", index=False,sep=',', columns=columns) 171 | 172 | def get_final_results_mpii(mean, distortions, final_output_dir, exp_id,mode='td'): 173 | dic = {} 174 | assert len(mean) == 96, 'Result length' 175 | dic['clean_mean'] = [round(mean.pop(0),3)] 176 | print(mean) 177 | 178 | all_tmp = 0 179 | for dis in distortions: 180 | tmp = [] 181 | for i in range(5): 182 | tmp.append(mean[distortions.index(dis) * 5 + i]) 183 | dic[dis] = [round(sum(tmp) / len(tmp),3)] 184 | if dis in distortions[:15]: 185 | all_tmp += dic[dis][0] 186 | 187 | dic['mean_corrupted_mean'] = [round(all_tmp / 15,3)] 188 | dic['rmean'] = round(dic['mean_corrupted_mean'][0] / dic['clean_mean'][0],3) 189 | 190 | dataframe = pd.DataFrame(dic) 191 | columns = ['clean_mean', 'mean_corrupted_mean', 'rmean'] + distortions 192 | dataframe.to_csv(final_output_dir + '/' + exp_id + ".csv", index=False,sep=',', columns=columns) 193 | 194 | if __name__ == '__main__': 195 | get_corrpution_results() 196 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import argparse 12 | import os 13 | import pprint 14 | import shutil 15 | import copy 16 | 17 | import torch 18 | import torch.nn.parallel 19 | import torch.backends.cudnn as cudnn 20 | import torch.optim 21 | import torch.utils.data 22 | import torch.utils.data.distributed 23 | import torchvision.transforms as transforms 24 | from tensorboardX import SummaryWriter 25 | import sys 26 | import _init_paths 27 | from _init_parse import parse_args 28 | 29 | from config import cfg 30 | from config import update_config 31 | from core.loss import JointsMSELoss 32 | from core.function import train, train_advmix 33 | from core.function import validate 34 | from utils.utils import get_optimizer 35 | from utils.utils import save_checkpoint 36 | from utils.utils import create_logger 37 | from utils.utils import get_model_summary 38 | from torch.utils.data.dataset import ConcatDataset 39 | 40 | 41 | import dataset 42 | import models 43 | 44 | 45 | def main(): 46 | args = parse_args() 47 | update_config(cfg, args) 48 | 49 | logger, final_output_dir, tb_log_dir = create_logger( 50 | args, cfg, args.cfg, 'train') 51 | 52 | logger.info(pprint.pformat(args)) 53 | logger.info(cfg) 54 | 55 | # cudnn related setting 56 | cudnn.benchmark = cfg.CUDNN.BENCHMARK 57 | torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC 58 | torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED 59 | 60 | model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( 61 | cfg, is_train=True 62 | ) 63 | 64 | if args.advmix: 65 | model_teacher = copy.deepcopy(model) 66 | print('=> Traing adversarially.') 67 | model_G = models.Unet_generator.UnetGenerator(args.gen_input_chn,3,args.downsamples) 68 | print("=> UNet generator : {} input chanenels; {} downsample times".format(args.gen_input_chn, args.downsamples)) 69 | model_G = torch.nn.DataParallel(model_G, device_ids=cfg.GPUS).cuda() 70 | 71 | # copy model file 72 | this_dir = os.path.dirname(__file__) 73 | shutil.copy2( 74 | os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), 75 | final_output_dir) 76 | 77 | shutil.copy2( 78 | args.cfg, 79 | final_output_dir) 80 | 81 | shutil.copy2( 82 | 'tools/train.py', 83 | final_output_dir) 84 | 85 | # logger.info(pprint.pformat(model)) 86 | 87 | writer_dict = { 88 | 'writer': SummaryWriter(log_dir=tb_log_dir), 89 | 'train_global_steps': 0, 90 | 'valid_global_steps': 0, 91 | } 92 | 93 | dump_input = torch.rand( 94 | (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0]) 95 | ) 96 | 97 | try: 98 | writer_dict['writer'].add_graph(model, (dump_input, )) 99 | except: 100 | pass 101 | try: 102 | logger.info(get_model_summary(model, dump_input)) 103 | except: 104 | pass 105 | 106 | model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() 107 | 108 | if args.advmix: 109 | model_teacher = torch.nn.DataParallel(model_teacher, device_ids=cfg.GPUS).cuda() 110 | 111 | criterion = JointsMSELoss( 112 | use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT 113 | ).cuda() 114 | 115 | # Data loading code 116 | normalize = transforms.Normalize( 117 | mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] 118 | ) 119 | 120 | train_dataset = eval('dataset.'+cfg.DATASET.DATASET)( 121 | cfg, args, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, 122 | transforms.Compose([ 123 | transforms.ToTensor(), 124 | normalize, 125 | ]) 126 | ) 127 | if cfg.DATASET.MINI_COCO: 128 | valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)( 129 | cfg, args, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, False, 130 | transforms.Compose([ 131 | transforms.ToTensor(), 132 | normalize, 133 | ]) 134 | ) 135 | else: 136 | valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)( 137 | cfg, args, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, 138 | transforms.Compose([ 139 | transforms.ToTensor(), 140 | normalize, 141 | ]) 142 | ) 143 | 144 | 145 | if args.advmix: 146 | if args.stylize_image: 147 | if cfg.DATASET.DATASET == 'mpii': 148 | style_dataset = eval('dataset.'+cfg.DATASET.DATASET)( 149 | cfg, args, 'data/stylize_image/output_mpii', cfg.DATASET.TRAIN_SET, True, 150 | transforms.Compose([ 151 | transforms.ToTensor(), 152 | normalize, 153 | ]) 154 | ) 155 | else: 156 | style_dataset = eval('dataset.'+cfg.DATASET.DATASET)( 157 | cfg, args, 'data/stylize_image/output', cfg.DATASET.TRAIN_SET, True, 158 | transforms.Compose([ 159 | transforms.ToTensor(), 160 | normalize, 161 | ]) 162 | ) 163 | train_dataset = ConcatDataset([train_dataset, style_dataset]) 164 | 165 | train_loader = torch.utils.data.DataLoader( 166 | train_dataset, 167 | batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS), 168 | shuffle=cfg.TRAIN.SHUFFLE, 169 | num_workers=cfg.WORKERS, 170 | pin_memory=cfg.PIN_MEMORY 171 | ) 172 | valid_loader = torch.utils.data.DataLoader( 173 | valid_dataset, 174 | batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS), 175 | shuffle=False, 176 | num_workers=cfg.WORKERS, 177 | pin_memory=cfg.PIN_MEMORY 178 | ) 179 | 180 | best_perf = 0.0 181 | best_model = False 182 | last_epoch = -1 183 | last_epoch_G = -1 184 | optimizer = get_optimizer(cfg, model) 185 | if args.advmix: 186 | optimizer_G = get_optimizer(cfg, model_G) 187 | 188 | begin_epoch = cfg.TRAIN.BEGIN_EPOCH 189 | checkpoint_file = os.path.join( 190 | final_output_dir, 'checkpoint_D.pth' 191 | ) 192 | 193 | checkpoint_file_G = os.path.join( 194 | final_output_dir, 'checkpoint_G.pth' 195 | ) 196 | 197 | 198 | if os.path.exists(args.load_from_D): 199 | logger.info('=> Fine tuning by loading pretrained model: {}'.format(args.load_from_D)) 200 | pretrained_dict = torch.load(args.load_from_D) 201 | pretrained_dict = {'module.' + k:v for k,v in pretrained_dict.items()} 202 | share_state = {} 203 | model_state = model.state_dict() 204 | for k, v in pretrained_dict.items(): 205 | if k in model_state and v.size() == model_state[k].size(): 206 | share_state[k] = v 207 | 208 | logger.info('Model dict :{}; shared dict :{}'.format(len(model_state), len(share_state))) 209 | model_state.update(share_state) 210 | model.load_state_dict(model_state) 211 | 212 | if os.path.exists(args.load_from_D) and args.advmix: 213 | logger.info('=> Build teacher model by loading pretrained model: {}'.format(args.load_from_D)) 214 | pretrained_dict = torch.load(args.load_from_D) 215 | pretrained_dict = {'module.' + k:v for k,v in pretrained_dict.items()} 216 | share_state = {} 217 | model_state = model_teacher.state_dict() 218 | for k, v in pretrained_dict.items(): 219 | if k in model_state and v.size() == model_state[k].size(): 220 | share_state[k] = v 221 | logger.info('Model dict :{}; shared dict :{}'.format(len(model_state), len(share_state))) 222 | model_state.update(share_state) 223 | model_teacher.load_state_dict(model_state) 224 | 225 | if os.path.exists(args.load_from_G): 226 | pretrained_dict = torch.load(args.load_from_G) 227 | pretrained_dict = {'module.' + k:v for k,v in pretrained_dict.items()} 228 | share_state = {} 229 | model_state = model_G.state_dict() 230 | for k, v in pretrained_dict.items(): 231 | if k in model_state and v.size() == model_state[k].size(): 232 | share_state[k] = v 233 | 234 | model_state.update(share_state) 235 | model_G.load_state_dict(model_state) 236 | 237 | 238 | if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): 239 | logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) 240 | checkpoint = torch.load(checkpoint_file) 241 | begin_epoch = checkpoint['epoch'] 242 | best_perf = checkpoint['perf'] 243 | last_epoch = checkpoint['epoch'] 244 | model.load_state_dict(checkpoint['state_dict']) 245 | 246 | optimizer.load_state_dict(checkpoint['optimizer']) 247 | logger.info("=> loaded checkpoint '{}' (epoch {})".format( 248 | checkpoint_file, checkpoint['epoch'])) 249 | 250 | if cfg.AUTO_RESUME and os.path.exists(checkpoint_file) and args.advmix: 251 | logger.info("=> loading checkpoint teacher model'{}'".format(checkpoint_file)) 252 | checkpoint = torch.load(checkpoint_file) 253 | begin_epoch = checkpoint['epoch'] 254 | best_perf = checkpoint['perf'] 255 | last_epoch = checkpoint['epoch'] 256 | model_teacher.load_state_dict(checkpoint['state_dict']) 257 | 258 | 259 | if cfg.AUTO_RESUME and os.path.exists(checkpoint_file_G) and args.advmix: 260 | logger.info("=> loading checkpoint generator'{}'".format(checkpoint_file_G)) 261 | checkpoint_G = torch.load(checkpoint_file_G) 262 | begin_epoch_G = checkpoint_G['epoch'] 263 | best_perf_G = checkpoint_G['perf'] 264 | last_epoch_G = checkpoint_G['epoch'] 265 | model_G.load_state_dict(checkpoint_G['state_dict']) 266 | 267 | optimizer_G.load_state_dict(checkpoint_G['optimizer']) 268 | logger.info("=> loaded checkpoint '{}' (epoch {})".format( 269 | checkpoint_file_G, checkpoint_G['epoch'])) 270 | 271 | 272 | lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( 273 | optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, 274 | last_epoch=last_epoch 275 | ) 276 | 277 | if args.advmix: 278 | lr_scheduler_G = torch.optim.lr_scheduler.MultiStepLR( 279 | optimizer_G, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, 280 | last_epoch=last_epoch_G 281 | ) 282 | 283 | for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): 284 | lr_scheduler.step() 285 | if args.advmix: 286 | lr_scheduler_G.step() 287 | 288 | print('=> The learning rate is: ', optimizer.param_groups[0]['lr'], optimizer_G.param_groups[0]['lr']) 289 | 290 | if args.advmix: 291 | train_advmix(cfg, args, train_loader, [model, model_G, model_teacher], criterion, [optimizer, optimizer_G], epoch, 292 | final_output_dir, tb_log_dir, writer_dict) 293 | else: 294 | print('=> Normal training ...') 295 | train(cfg, args, train_loader, model, criterion, optimizer, epoch, 296 | final_output_dir, tb_log_dir, writer_dict) 297 | 298 | name_values, perf_indicator = validate( 299 | cfg, args, valid_loader, valid_dataset, model, criterion, 300 | final_output_dir, tb_log_dir, writer_dict 301 | ) 302 | 303 | if perf_indicator >= best_perf: 304 | best_perf = perf_indicator 305 | best_model = True 306 | else: 307 | best_model = False 308 | 309 | logger.info("==> best mAP is {}".format(best_perf)) 310 | logger.info('=> saving checkpoint to {}'.format(final_output_dir)) 311 | save_checkpoint({ 312 | 'epoch': epoch + 1, 313 | 'model': cfg.MODEL.NAME, 314 | 'state_dict': model.state_dict(), 315 | 'best_state_dict': model.module.state_dict(), 316 | 'perf': perf_indicator, 317 | 'optimizer': optimizer.state_dict(), 318 | }, best_model, final_output_dir, suffix="D") 319 | 320 | if args.advmix: 321 | save_checkpoint({ 322 | 'epoch': epoch + 1, 323 | 'model': cfg.MODEL.NAME, 324 | 'state_dict': model_G.state_dict(), 325 | 'best_state_dict': model_G.module.state_dict(), 326 | 'perf': perf_indicator, 327 | 'optimizer': optimizer_G.state_dict(), 328 | }, best_model, final_output_dir, suffix = "G") 329 | 330 | 331 | final_model_state_file = os.path.join( 332 | final_output_dir, 'final_state.pth' 333 | ) 334 | logger.info('=> saving final model state to {}'.format( 335 | final_model_state_file) 336 | ) 337 | torch.save(model.module.state_dict(), final_model_state_file) 338 | writer_dict['writer'].close() 339 | 340 | 341 | if __name__ == '__main__': 342 | main() 343 | --------------------------------------------------------------------------------