├── LICENSE ├── README.md ├── exlpose.yaml └── pytorch-cpn ├── 256.192.model ├── Annotations │ ├── ExLPose-OCN │ │ ├── ExLPose-OC_test_A7M3.json │ │ ├── ExLPose-OC_test_A7M3_trans.json │ │ ├── ExLPose-OC_test_RICOH3.json │ │ └── ExLPose-OC_test_RICOH3_trans.json │ └── ExLPose │ │ ├── ExLPose_test_LL-All.json │ │ ├── ExLPose_test_LL-All_trans.json │ │ ├── ExLPose_test_LL-Extreme.json │ │ ├── ExLPose_test_LL-Extreme_trans.json │ │ ├── ExLPose_test_LL-Hard.json │ │ ├── ExLPose_test_LL-Hard_trans.json │ │ ├── ExLPose_test_LL-Normal.json │ │ ├── ExLPose_test_LL-Normal_trans.json │ │ └── ExLPose_train_trans.json ├── test.py ├── test_a7m3.py ├── test_config_a7m3.py ├── test_config_all.py ├── test_config_extreme.py ├── test_config_hard.py ├── test_config_normal.py ├── test_config_ricoh3.py ├── test_ricoh3.py ├── train.py └── train_config.py ├── cocoapi ├── .gitignore ├── .travis.yml ├── LuaAPI │ ├── CocoApi.lua │ ├── MaskApi.lua │ ├── cocoDemo.lua │ ├── env.lua │ ├── init.lua │ └── rocks │ │ └── coco-scm-1.rockspec ├── MatlabAPI │ ├── CocoApi.m │ ├── CocoEval.m │ ├── CocoUtils.m │ ├── MaskApi.m │ ├── cocoDemo.m │ ├── evalDemo.m │ ├── gason.m │ └── private │ │ ├── gasonMex.cpp │ │ ├── gasonMex.mexa64 │ │ ├── gasonMex.mexmaci64 │ │ └── getPrmDflt.m ├── PythonAPI │ ├── Makefile │ ├── pycocoDemo.ipynb │ ├── pycocoEvalDemo.ipynb │ ├── pycocotools │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── coco.cpython-37.pyc │ │ │ ├── coco_custom.cpython-36.pyc │ │ │ ├── coco_custom.cpython-37.pyc │ │ │ ├── cocoeval.cpython-37.pyc │ │ │ ├── cocoeval_custom.cpython-36.pyc │ │ │ ├── cocoeval_custom.cpython-37.pyc │ │ │ ├── mask.cpython-36.pyc │ │ │ └── mask.cpython-37.pyc │ │ ├── _mask.cpython-36m-x86_64-linux-gnu.so │ │ ├── _mask.cpython-37m-x86_64-linux-gnu.so │ │ ├── _mask.pyx │ │ ├── coco.py │ │ ├── coco_custom.py │ │ ├── cocoeval.py │ │ ├── cocoeval_custom.py │ │ └── mask.py │ └── setup.py ├── README.txt ├── common │ ├── gason.cpp │ ├── gason.h │ ├── maskApi.c │ └── maskApi.h └── license.txt ├── dataloader ├── loader_ExLPoseOC.py ├── loader_eval_LL.py ├── loader_eval_WL.py └── loader_training_pair.py ├── networks ├── __init__.py ├── globalNet.py ├── lsbn.py ├── network.py ├── refineNet.py ├── resnet.py └── resnetlsbn.py └── utils ├── __init__.py ├── evaluation.py ├── func.py ├── imutils.py ├── logger.py ├── loss.py ├── misc.py ├── osutils.py ├── serialization.py ├── transforms.py └── viz_segmask.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Sohyun Lee 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Human Pose Estimation in Extremely Low-light Conditions 2 | 3 | ### [Project Page](http://cg.postech.ac.kr/research/ExLPose/) | [Paper](https://arxiv.org/abs/2303.15410) 4 | This repo is the official implementation of [**CVPR 2023**] paper: "[Human Pose Estimation in Extremely Low-light Conditions](https://arxiv.org/abs/2303.15410)". 5 | 6 | > [Human Pose Estimation in Extremely Low-light Conditions]([https://arxiv.org/abs/2204.01587](https://arxiv.org/abs/2303.15410)) 7 | > [Sohyun Lee](https://sohyun-l.github.io)1*, Jaesung Rim1*, Boseung Jeong1, Geonu Kim1, Byungju Woo2, Haechan Lee1, [Sunghyun Cho](https://www.scho.pe.kr/)1, [Suha Kwak](http://cvlab.postech.ac.kr/~suhakwak/)1\ 8 | > POSTECH1 ADD2\ 9 | > CVPR 2023 10 | 11 | 12 | ## Overview 13 | We study human pose estimation in extremely low-light images. This task is challenging due to the difficulty of collecting real low-light images with accurate labels, and severely corrupted inputs that degrade prediction quality significantly. To address the first issue, we develop a dedicated camera system and build a new dataset of real lowlight images with accurate pose labels. Thanks to our camera system, each low-light image in our dataset is coupled with an aligned well-lit image, which enables accurate pose labeling and is used as privileged information during training. We also propose a new model and a new training strategy that fully exploit the privileged information to learn representation insensitive to lighting conditions. Our method demonstrates outstanding performance on real extremely low-light images, and extensive analyses validate that both of our model and dataset contribute to the success. 14 | 15 | ## Citation 16 | If you find our code or paper useful, please consider citing our paper: 17 | 18 | ```BibTeX 19 | @inproceedings{lee2023human, 20 | title={Human pose estimation in extremely low-light conditions}, 21 | author={Lee, Sohyun and Rim, Jaesung and Jeong, Boseung and Kim, Geonu and Woo, Byungju and Lee, Haechan and Cho, Sunghyun and Kwak, Suha}, 22 | booktitle={Proceedings of the {IEEE/CVF} Conference on Computer Vision and Pattern Recognition (CVPR)}, 23 | year={2023} 24 | } 25 | ``` 26 | 27 | ## Dataset 28 | [Our Project Page](http://cg.postech.ac.kr/research/ExLPose/) 29 | 30 | ## Installation 31 | This repository is developed and tested on 32 | 33 | - Ubuntu 20.04 34 | - Conda 4.9.2 35 | - CUDA 11.4 36 | - Python 3.7.11 37 | - PyTorch 1.9.0 38 | 39 | ## Environment Setup 40 | * Required environment is presented in the 'exlpose.yaml' file 41 | * Clone this repo 42 | ```bash 43 | ~$ git clone https://github.com/sohyun-l/ExLPose 44 | ~$ cd ExLPose 45 | ~/ExLPose$ conda env create --file exlpose.yaml 46 | ~/ExLPose$ conda activate exlpose.yaml 47 | ``` 48 | ## Training 49 | ```bash 50 | (exlpose) ~/ExLPose$ cd pytorch-cpn/256.192.model 51 | (exlpose) ~/ExLPose/pytorch-cpn/256.192.model$ python train.py 52 | ``` 53 | ## Our Model 54 | BEST_MODEL_PATH = '[./Final_model.pth.tar](https://drive.google.com/file/d/1kB9gypMxhnC2NIDk5InhrbTIIBdC9gdu/view?usp=sharing)' 55 | 56 | 57 | -------------------------------------------------------------------------------- /exlpose.yaml: -------------------------------------------------------------------------------- 1 | name: exlpose 2 | channels: 3 | - pytorch 4 | - anaconda 5 | - conda-forge 6 | - defaults 7 | dependencies: 8 | - _libgcc_mutex=0.1=main 9 | - _openmp_mutex=5.1=1_gnu 10 | - _pytorch_select=0.2=gpu_0 11 | - blas=1.0=mkl 12 | - blessings=1.7=py36h06a4308_1002 13 | - brotlipy=0.7.0=py36h8f6f2f9_1001 14 | - bzip2=1.0.8=h7f98852_4 15 | - ca-certificates=2023.05.30=h06a4308_0 16 | - cairo=1.16.0=hb05425b_5 17 | - certifi=2021.5.30=py36h06a4308_0 18 | - cffi=1.14.6=py36h400218f_0 19 | - cloudpickle=2.0.0=pyhd3eb1b0_0 20 | - colorama=0.4.4=pyhd3eb1b0_0 21 | - cryptography=35.0.0=py36hb60f036_0 22 | - cudatoolkit=10.2.89=hfd86e86_1 23 | - cudnn=7.6.5=cuda10.2_0 24 | - cycler=0.11.0=pyhd3eb1b0_0 25 | - cytoolz=0.11.0=py36h7b6447c_0 26 | - dask-core=2021.3.1=pyhd3eb1b0_0 27 | - dataclasses=0.8=pyh4f3eec9_6 28 | - dbus=1.13.18=hb2f20db_0 29 | - decorator=5.1.1=pyhd3eb1b0_0 30 | - docker-pycreds=0.4.0=py_0 31 | - expat=2.4.9=h6a678d5_0 32 | - ffmpeg=4.0.2=ha0c5888_2 33 | - fontconfig=2.14.1=h52c9d5c_1 34 | - freeglut=3.0.0=hfc679d8_5 35 | - freetype=2.12.1=h4a9f257_0 36 | - glib=2.69.1=h4ff587b_1 37 | - gmp=6.1.2=hf484d3e_1000 38 | - gnutls=3.5.19=h2a4e5f8_1 39 | - gpustat=0.6.0=pyhd3eb1b0_1 40 | - graphite2=1.3.13=h58526e2_1001 41 | - gst-plugins-base=1.14.1=h6a678d5_1 42 | - gstreamer=1.14.1=h5eee18b_1 43 | - harfbuzz=1.8.8=hffaf4a1_0 44 | - hdf5=1.10.2=hc401514_3 45 | - icu=58.2=he6710b0_3 46 | - idna=3.4=pyhd8ed1ab_0 47 | - imageio=2.9.0=pyhd3eb1b0_0 48 | - intel-openmp=2022.1.0=h9e868ea_3769 49 | - jasper=2.0.14=habb8e15_1 50 | - jpeg=9e=h5eee18b_1 51 | - kiwisolver=1.3.1=py36h2531618_0 52 | - lcms2=2.12=h3be6417_0 53 | - ld_impl_linux-64=2.38=h1181459_1 54 | - lerc=3.0=h295c915_0 55 | - libdeflate=1.17=h5eee18b_0 56 | - libffi=3.3=he6710b0_2 57 | - libgcc-ng=11.2.0=h1234567_1 58 | - libgfortran=3.0.0=1 59 | - libgfortran-ng=7.5.0=ha8ba4b0_17 60 | - libgfortran4=7.5.0=ha8ba4b0_17 61 | - libglu=9.0.0=he1b5a44_1001 62 | - libgomp=11.2.0=h1234567_1 63 | - libiconv=1.17=h166bdaf_0 64 | - libopencv=3.4.2=hb342d67_1 65 | - libpng=1.6.39=h5eee18b_0 66 | - libprotobuf=3.18.0=h780b84a_1 67 | - libstdcxx-ng=11.2.0=h1234567_1 68 | - libtiff=4.5.0=h6a678d5_2 69 | - libuuid=1.41.5=h5eee18b_0 70 | - libuv=1.44.2=h5eee18b_0 71 | - libwebp-base=1.2.4=h5eee18b_1 72 | - libxcb=1.15=h7f8727e_0 73 | - libxml2=2.9.14=h74e7548_0 74 | - lz4-c=1.9.4=h6a678d5_0 75 | - matplotlib=3.3.4=py36h06a4308_0 76 | - matplotlib-base=3.3.4=py36h62a2d02_0 77 | - mkl=2020.2=256 78 | - mkl-service=2.3.0=py36he8ac12f_0 79 | - mkl_fft=1.3.0=py36h54f3939_0 80 | - mkl_random=1.1.1=py36h0573a6f_0 81 | - ncurses=6.4=h6a678d5_0 82 | - nettle=3.3=0 83 | - networkx=2.5=py_0 84 | - ninja=1.10.2=h06a4308_5 85 | - ninja-base=1.10.2=hd09550d_5 86 | - numpy=1.19.2=py36h54aff64_0 87 | - numpy-base=1.19.2=py36hfa32c7d_0 88 | - nvidia-ml=7.352.0=pyhd3eb1b0_0 89 | - olefile=0.46=py36_0 90 | - opencv=3.4.2=py36h6fd60c2_1 91 | - openh264=1.8.0=hdbcaa40_1000 92 | - openjpeg=2.4.0=h3ad879b_0 93 | - openssl=1.1.1u=h7f8727e_0 94 | - pcre=8.45=h295c915_0 95 | - pillow=8.3.1=py36h2c7a002_0 96 | - pixman=0.40.0=h36c2ea0_0 97 | - promise=2.3=py36h5fab9bb_4 98 | - py-opencv=3.4.2=py36hb342d67_1 99 | - pycparser=2.21=pyhd3eb1b0_0 100 | - pyopenssl=22.0.0=pyhd8ed1ab_1 101 | - pyparsing=3.0.4=pyhd3eb1b0_0 102 | - pyqt=5.9.2=py36h05f1152_2 103 | - pysocks=1.7.1=py36h5fab9bb_3 104 | - python=3.6.13=h12debd9_1 105 | - python-dateutil=2.8.2=pyhd3eb1b0_0 106 | - python_abi=3.6=2_cp36m 107 | - pytorch=1.9.0=py3.6_cuda10.2_cudnn7.6.5_0 108 | - pywavelets=1.1.1=py36h7b6447c_2 109 | - pyyaml=5.4.1=py36h27cfd23_1 110 | - qt=5.9.7=h5867ecd_1 111 | - readline=8.2=h5eee18b_0 112 | - scikit-image=0.17.2=py36hdf5156a_0 113 | - setuptools=58.0.4=py36h06a4308_0 114 | - shortuuid=1.0.11=pyhd8ed1ab_0 115 | - sip=4.19.8=py36hf484d3e_0 116 | - six=1.16.0=pyhd3eb1b0_1 117 | - sqlite=3.41.2=h5eee18b_0 118 | - tifffile=2020.10.1=py36hdd07704_2 119 | - tk=8.6.12=h1ccaba5_0 120 | - toolz=0.11.2=pyhd3eb1b0_0 121 | - torchvision=0.2.1=py36_0 122 | - tornado=6.1=py36h27cfd23_0 123 | - tqdm=4.63.0=pyhd3eb1b0_0 124 | - typing_extensions=4.1.1=pyh06a4308_0 125 | - wheel=0.37.1=pyhd3eb1b0_0 126 | - x264=1!152.20180806=h14c3975_0 127 | - xz=5.4.2=h5eee18b_0 128 | - yaml=0.2.5=h7b6447c_0 129 | - zlib=1.2.13=h5eee18b_0 130 | - zstd=1.5.5=hc292b87_0 131 | - pip: 132 | - appdirs==1.4.4 133 | - charset-normalizer==2.0.12 134 | - click==8.0.4 135 | - gitdb==4.0.9 136 | - gitpython==3.1.18 137 | - importlib-metadata==4.8.3 138 | - pathtools==0.1.2 139 | - pip==21.3.1 140 | - protobuf==3.19.6 141 | - psutil==5.9.5 142 | - requests==2.27.1 143 | - scipy==1.1.0 144 | - sentry-sdk==1.29.2 145 | - setproctitle==1.2.3 146 | - smmap==5.0.0 147 | - urllib3==1.26.16 148 | - wandb==0.15.8 149 | - zipp==3.6.0 150 | prefix: /home/2so/anaconda3/envs/exlpose 151 | -------------------------------------------------------------------------------- /pytorch-cpn/256.192.model/test_a7m3.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | import time 5 | # import matplotlib.pyplot as plt 6 | 7 | import torch 8 | import torch.nn.parallel 9 | import torch.backends.cudnn as cudnn 10 | import torch.optim 11 | import torchvision.datasets as datasets 12 | import cv2 13 | import json 14 | import numpy as np 15 | 16 | from test_config_a7m3 import cfg_test 17 | from pycocotools.coco_custom import COCO 18 | from pycocotools.cocoeval_custom import COCOeval 19 | from utils.logger import Logger 20 | from utils.evaluation import accuracy, AverageMeter, final_preds 21 | from utils.misc import save_model, adjust_learning_rate 22 | from utils.osutils import mkdir_p, isfile, isdir, join 23 | from utils.transforms import fliplr, flip_back 24 | from utils.imutils import im_to_numpy, im_to_torch 25 | from networks import network 26 | from dataloader.loader_ExLPoseOC import ExLPoseOC 27 | from tqdm import tqdm 28 | from datetime import datetime 29 | import wandb 30 | 31 | def main(args): 32 | # create model 33 | file_name = 'Ours_ricoh3' 34 | 35 | wandb.init(project="dark_project", name=file_name, entity="poseindark") 36 | 37 | model = network.__dict__[cfg_test.model](cfg_test.output_shape, cfg_test.num_class, in_features=0, num_conditions=2, pretrained=True) 38 | model = torch.nn.DataParallel(model).cuda() 39 | 40 | # load trainning weights 41 | checkpoint_file = os.path.join(args.checkpoint, args.test+'.pth.tar') 42 | checkpoint = torch.load(checkpoint_file) 43 | model.load_state_dict(checkpoint['state_dict']) 44 | print("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_file, checkpoint['epoch'])) 45 | 46 | # change to evaluation mode 47 | model.eval() 48 | 49 | test_loader = torch.utils.data.DataLoader( 50 | ExLPoseOC(cfg_test, train=False), 51 | batch_size=128*args.num_gpus, shuffle=False, 52 | num_workers=args.workers, pin_memory=True) 53 | 54 | print('testing...') 55 | full_result = [] 56 | for i, (inputs, meta) in tqdm(enumerate(test_loader)): 57 | with torch.no_grad(): 58 | input_var = torch.autograd.Variable(inputs.cuda()) 59 | if args.flip == True: 60 | flip_inputs = inputs.clone() 61 | for i, finp in enumerate(flip_inputs): 62 | finp = im_to_numpy(finp) 63 | finp = cv2.flip(finp, 1) 64 | flip_inputs[i] = im_to_torch(finp) 65 | flip_input_var = torch.autograd.Variable(flip_inputs.cuda()) 66 | 67 | # compute output 68 | t0, t1, t2, t3, t4, global_outputs, refine_output = model(input_var, 0 * torch.ones(input_var.shape[0], dtype=torch.long).cuda()) 69 | score_map = refine_output.data.cpu() 70 | score_map = score_map.numpy() 71 | 72 | if args.flip == True: 73 | t0, t1, t2, t3, t4, flip_global_outputs, flip_output = model(flip_input_var, 0 * torch.ones(flip_input_var.shape[0], dtype=torch.long).cuda()) 74 | flip_score_map = flip_output.data.cpu() 75 | flip_score_map = flip_score_map.numpy() 76 | 77 | for i, fscore in enumerate(flip_score_map): 78 | fscore = fscore.transpose((1,2,0)) 79 | fscore = cv2.flip(fscore, 1) 80 | fscore = list(fscore.transpose((2,0,1))) 81 | for (q, w) in cfg_test.symmetry: 82 | fscore[q], fscore[w] = fscore[w], fscore[q] 83 | fscore = np.array(fscore) 84 | score_map[i] += fscore 85 | score_map[i] /= 2 86 | 87 | ids = meta['imgID'].numpy() 88 | det_scores = meta['det_scores'] 89 | for b in range(inputs.size(0)): 90 | details = meta['augmentation_details'] 91 | single_result_dict = {} 92 | single_result = [] 93 | 94 | single_map = score_map[b] 95 | r0 = single_map.copy() 96 | r0 /= 255 97 | r0 += 0.5 98 | v_score = np.zeros(14) 99 | for p in range(14): 100 | single_map[p] /= np.amax(single_map[p]) 101 | border = 10 102 | dr = np.zeros((cfg_test.output_shape[0] + 2*border, cfg_test.output_shape[1]+2*border)) 103 | dr[border:-border, border:-border] = single_map[p].copy() 104 | dr = cv2.GaussianBlur(dr, (21, 21), 0) 105 | lb = dr.argmax() 106 | y, x = np.unravel_index(lb, dr.shape) 107 | dr[y, x] = 0 108 | lb = dr.argmax() 109 | py, px = np.unravel_index(lb, dr.shape) 110 | y -= border 111 | x -= border 112 | py -= border + y 113 | px -= border + x 114 | ln = (px ** 2 + py ** 2) ** 0.5 115 | delta = 0.25 116 | if ln > 1e-3: 117 | x += delta * px / ln 118 | y += delta * py / ln 119 | x = max(0, min(x, cfg_test.output_shape[1] - 1)) 120 | y = max(0, min(y, cfg_test.output_shape[0] - 1)) 121 | resy = float((4 * y + 2) / cfg_test.data_shape[0] * (details[b][3] - details[b][1]) + details[b][1]) 122 | resx = float((4 * x + 2) / cfg_test.data_shape[1] * (details[b][2] - details[b][0]) + details[b][0]) 123 | v_score[p] = float(r0[p, int(round(y)+1e-10), int(round(x)+1e-10)]) 124 | single_result.append(resx) 125 | single_result.append(resy) 126 | single_result.append(1) 127 | if len(single_result) != 0: 128 | single_result_dict['image_id'] = int(ids[b]) 129 | single_result_dict['category_id'] = 1 130 | single_result_dict['keypoints'] = single_result 131 | single_result_dict['score'] = float(det_scores[b])*v_score.mean() 132 | full_result.append(single_result_dict) 133 | 134 | result_path = 'Qual_generalization_Ours' 135 | if not isdir(result_path): 136 | mkdir_p(result_path) 137 | result_file = os.path.join(result_path, 'Ours_a7m3.json') 138 | with open(result_file,'w') as wf: 139 | json.dump(full_result, wf) 140 | 141 | eval_gt = COCO(cfg_test.ori_gt_path) 142 | eval_dt = eval_gt.loadRes(result_file) 143 | cocoEval = COCOeval(eval_gt, eval_dt, iouType='keypoints') 144 | cocoEval.evaluate() 145 | cocoEval.accumulate() 146 | result = cocoEval.summarize() 147 | 148 | 149 | wandb.log({"A7M3_AP": result[0]}) 150 | 151 | 152 | if __name__ == '__main__': 153 | parser = argparse.ArgumentParser(description='PyTorch CPN Test') 154 | parser.add_argument('-j', '--workers', default=0, type=int, metavar='N', 155 | help='number of data loading workers (default: 12)') 156 | parser.add_argument('-g', '--num_gpus', default=1, type=int, metavar='N', 157 | help='number of GPU to use (default: 1)') 158 | parser.add_argument('-c', '--checkpoint', default='checkpoint', type=str, metavar='PATH', 159 | help='path to load checkpoint (default: checkpoint)') 160 | parser.add_argument('-f', '--flip', default=True, type=bool, 161 | help='flip input image during test (default: True)') 162 | parser.add_argument('-b', '--batch', default=128, type=int, 163 | help='test batch size (default: 128)') 164 | parser.add_argument('-t', '--test', default='CPN256x192', type=str, 165 | help='using which checkpoint to be tested (default: CPN256x192') 166 | parser.add_argument('-r', '--result', default='result', type=str, 167 | help='path to save save result (default: result)') 168 | main(parser.parse_args()) -------------------------------------------------------------------------------- /pytorch-cpn/256.192.model/test_config_a7m3.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import sys 4 | import numpy as np 5 | 6 | def add_pypath(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | class Config: 11 | cur_dir = os.path.dirname(os.path.abspath(__file__)) 12 | this_dir_name = cur_dir.split('/')[-1] 13 | root_dir = os.path.join(cur_dir, '..') 14 | 15 | model = 'LSBN_CPN50' 16 | 17 | num_class = 14 18 | img_path = os.path.join(root_dir, '/data01/', 'PoseInTheDark/PID_OTHER') 19 | symmetry = [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10), (11, 12)] 20 | bbox_extend_factor = (0.1, 0.15) # x, y 21 | 22 | pixel_means = np.array([122.7717, 115.9465, 102.9801]) # RGB 23 | data_shape = (256, 192) 24 | output_shape = (64, 48) 25 | 26 | 27 | use_GT_bbox = True 28 | # if use_GT_bbox: 29 | gt_path = os.path.join(root_dir, '256.192.model/Annotations/ExLPose-OCN','ExLPose-OC_test_A7M3_trans.json') 30 | ori_gt_path = os.path.join(root_dir, '256.192.model/Annotations/ExLPose-OCN','ExLPose-OC_test_A7M3.json') 31 | 32 | cfg_test = Config() 33 | add_pypath(cfg_test.root_dir) 34 | add_pypath(os.path.join(cfg_test.root_dir, 'cocoapi/PythonAPI')) -------------------------------------------------------------------------------- /pytorch-cpn/256.192.model/test_config_all.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import sys 4 | import numpy as np 5 | 6 | def add_pypath(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | class Config: 11 | cur_dir = os.path.dirname(os.path.abspath(__file__)) 12 | this_dir_name = cur_dir.split('/')[-1] 13 | root_dir = os.path.join(cur_dir, '..') 14 | 15 | model = 'LSBN_CPN50' 16 | 17 | num_class = 14 18 | img_path = os.path.join(root_dir, '/data01/', 'PoseInTheDark') 19 | symmetry = [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10), (11, 12)] 20 | bbox_extend_factor = (0.1, 0.15) # x, y 21 | 22 | pixel_means = np.array([122.7717, 115.9465, 102.9801]) # RGB 23 | data_shape = (256, 192) 24 | output_shape = (64, 48) 25 | 26 | 27 | use_GT_bbox = True 28 | # if use_GT_bbox: 29 | gt_path = os.path.join(root_dir, '256.192.model/Annotations/ExLPose','ExLPose_test_LL-All_trans.json') 30 | ori_gt_path = os.path.join(root_dir, '256.192.model/Annotations/ExLPose','ExLPose_test_LL-All.json') 31 | 32 | cfg_test = Config() 33 | add_pypath(cfg_test.root_dir) 34 | add_pypath(os.path.join(cfg_test.root_dir, 'cocoapi/PythonAPI')) -------------------------------------------------------------------------------- /pytorch-cpn/256.192.model/test_config_extreme.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import sys 4 | import numpy as np 5 | 6 | def add_pypath(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | class Config: 11 | cur_dir = os.path.dirname(os.path.abspath(__file__)) 12 | this_dir_name = cur_dir.split('/')[-1] 13 | root_dir = os.path.join(cur_dir, '..') 14 | 15 | model = 'LSBN_CPN50' 16 | 17 | num_class = 14 18 | img_path = os.path.join(root_dir, '/data01/', 'PoseInTheDark') 19 | symmetry = [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10), (11, 12)] 20 | bbox_extend_factor = (0.1, 0.15) # x, y 21 | 22 | pixel_means = np.array([122.7717, 115.9465, 102.9801]) # RGB 23 | data_shape = (256, 192) 24 | output_shape = (64, 48) 25 | 26 | 27 | use_GT_bbox = True 28 | # if use_GT_bbox: 29 | gt_path = os.path.join(root_dir, '256.192.model/Annotations/ExLPose','ExLPose_test_LL-Extreme_trans.json') 30 | ori_gt_path = os.path.join(root_dir, '256.192.model/Annotations/ExLPose','ExLPose_test_LL-Extreme.json') 31 | 32 | cfg_test_extreme = Config() 33 | add_pypath(cfg_test_extreme.root_dir) 34 | add_pypath(os.path.join(cfg_test_extreme.root_dir, 'cocoapi/PythonAPI')) -------------------------------------------------------------------------------- /pytorch-cpn/256.192.model/test_config_hard.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import sys 4 | import numpy as np 5 | 6 | def add_pypath(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | class Config: 11 | cur_dir = os.path.dirname(os.path.abspath(__file__)) 12 | this_dir_name = cur_dir.split('/')[-1] 13 | root_dir = os.path.join(cur_dir, '..') 14 | 15 | model = 'LSBN_CPN50' 16 | 17 | num_class = 14 18 | img_path = os.path.join(root_dir, '/data01/', 'PoseInTheDark') 19 | symmetry = [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10), (11, 12)] 20 | bbox_extend_factor = (0.1, 0.15) # x, y 21 | 22 | pixel_means = np.array([122.7717, 115.9465, 102.9801]) # RGB 23 | data_shape = (256, 192) 24 | output_shape = (64, 48) 25 | 26 | 27 | use_GT_bbox = True 28 | # if use_GT_bbox: 29 | gt_path = os.path.join(root_dir, '256.192.model/Annotations/ExLPose','ExLPose_test_LL-Hard_trans.json') 30 | ori_gt_path = os.path.join(root_dir, '256.192.model/Annotations/ExLPose','ExLPose_test_LL-Hard.json') 31 | 32 | cfg_test_hard = Config() 33 | add_pypath(cfg_test_hard.root_dir) 34 | add_pypath(os.path.join(cfg_test_hard.root_dir, 'cocoapi/PythonAPI')) -------------------------------------------------------------------------------- /pytorch-cpn/256.192.model/test_config_normal.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import sys 4 | import numpy as np 5 | 6 | def add_pypath(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | class Config: 11 | cur_dir = os.path.dirname(os.path.abspath(__file__)) 12 | this_dir_name = cur_dir.split('/')[-1] 13 | root_dir = os.path.join(cur_dir, '..') 14 | 15 | model = 'LSBN_CPN50' 16 | 17 | num_class = 14 18 | img_path = os.path.join(root_dir, '/data01/', 'PoseInTheDark') 19 | symmetry = [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10), (11, 12)] 20 | bbox_extend_factor = (0.1, 0.15) # x, y 21 | 22 | pixel_means = np.array([122.7717, 115.9465, 102.9801]) # RGB 23 | data_shape = (256, 192) 24 | output_shape = (64, 48) 25 | 26 | 27 | use_GT_bbox = True 28 | # if use_GT_bbox: 29 | gt_path = os.path.join(root_dir, '256.192.model/Annotations/ExLPose','ExLPose_test_LL-Normal_trans.json') 30 | ori_gt_path = os.path.join(root_dir, '256.192.model/Annotations/ExLPose','ExLPose_test_LL-Normal.json') 31 | 32 | cfg_test_normal = Config() 33 | add_pypath(cfg_test_normal.root_dir) 34 | add_pypath(os.path.join(cfg_test_normal.root_dir, 'cocoapi/PythonAPI')) -------------------------------------------------------------------------------- /pytorch-cpn/256.192.model/test_config_ricoh3.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import sys 4 | import numpy as np 5 | 6 | def add_pypath(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | class Config: 11 | cur_dir = os.path.dirname(os.path.abspath(__file__)) 12 | this_dir_name = cur_dir.split('/')[-1] 13 | root_dir = os.path.join(cur_dir, '..') 14 | 15 | model = 'LSBN_CPN50' 16 | 17 | num_class = 14 18 | img_path = os.path.join(root_dir, '/data01/', 'PoseInTheDark/PID_OTHER') 19 | symmetry = [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10), (11, 12)] 20 | bbox_extend_factor = (0.1, 0.15) # x, y 21 | 22 | pixel_means = np.array([122.7717, 115.9465, 102.9801]) # RGB 23 | data_shape = (256, 192) 24 | output_shape = (64, 48) 25 | 26 | 27 | use_GT_bbox = True 28 | # if use_GT_bbox: 29 | gt_path = os.path.join(root_dir, '256.192.model/Annotations/ExLPose-OCN','ExLPose-OC_test_RICOH3_trans.json') 30 | ori_gt_path = os.path.join(root_dir, '256.192.model/Annotations/ExLPose-OCN','ExLPose-OC_test_RICOH3.json') 31 | 32 | cfg_test = Config() 33 | add_pypath(cfg_test.root_dir) 34 | add_pypath(os.path.join(cfg_test.root_dir, 'cocoapi/PythonAPI')) -------------------------------------------------------------------------------- /pytorch-cpn/256.192.model/test_ricoh3.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | import time 5 | # import matplotlib.pyplot as plt 6 | 7 | import torch 8 | import torch.nn.parallel 9 | import torch.backends.cudnn as cudnn 10 | import torch.optim 11 | import torchvision.datasets as datasets 12 | import cv2 13 | import json 14 | import numpy as np 15 | 16 | from test_config_ricoh3 import cfg_test 17 | from pycocotools.coco_custom import COCO 18 | from pycocotools.cocoeval_custom import COCOeval 19 | from utils.logger import Logger 20 | from utils.evaluation import accuracy, AverageMeter, final_preds 21 | from utils.misc import save_model, adjust_learning_rate 22 | from utils.osutils import mkdir_p, isfile, isdir, join 23 | from utils.transforms import fliplr, flip_back 24 | from utils.imutils import im_to_numpy, im_to_torch 25 | from networks import network 26 | from dataloader.loader_ExLPoseOC import ExLPoseOC 27 | from tqdm import tqdm 28 | from datetime import datetime 29 | import wandb 30 | 31 | def main(args): 32 | # create model 33 | file_name = 'Ours_ricoh3' 34 | wandb.init(project="dark_project", name=file_name, entity="poseindark") 35 | 36 | model = network.__dict__[cfg_test.model](cfg_test.output_shape, cfg_test.num_class, in_features=0, num_conditions=2, pretrained=True) 37 | model = torch.nn.DataParallel(model).cuda() 38 | 39 | # load trainning weights 40 | checkpoint_file = os.path.join(args.checkpoint, args.test+'.pth.tar') 41 | checkpoint = torch.load(checkpoint_file) 42 | model.load_state_dict(checkpoint['state_dict']) 43 | print("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_file, checkpoint['epoch'])) 44 | 45 | # change to evaluation mode 46 | model.eval() 47 | 48 | test_loader = torch.utils.data.DataLoader( 49 | ExLPoseOC(cfg_test, train=False), 50 | batch_size=128*args.num_gpus, shuffle=False, 51 | num_workers=args.workers, pin_memory=True) 52 | 53 | print('testing...') 54 | full_result = [] 55 | for i, (inputs, meta) in tqdm(enumerate(test_loader)): 56 | with torch.no_grad(): 57 | input_var = torch.autograd.Variable(inputs.cuda()) 58 | if args.flip == True: 59 | flip_inputs = inputs.clone() 60 | for i, finp in enumerate(flip_inputs): 61 | finp = im_to_numpy(finp) 62 | finp = cv2.flip(finp, 1) 63 | flip_inputs[i] = im_to_torch(finp) 64 | flip_input_var = torch.autograd.Variable(flip_inputs.cuda()) 65 | 66 | # compute output 67 | t0, t1, t2, t3, t4, global_outputs, refine_output = model(input_var, 0 * torch.ones(input_var.shape[0], dtype=torch.long).cuda()) 68 | score_map = refine_output.data.cpu() 69 | score_map = score_map.numpy() 70 | 71 | if args.flip == True: 72 | t0, t1, t2, t3, t4, flip_global_outputs, flip_output = model(flip_input_var, 0 * torch.ones(flip_input_var.shape[0], dtype=torch.long).cuda()) 73 | flip_score_map = flip_output.data.cpu() 74 | flip_score_map = flip_score_map.numpy() 75 | 76 | for i, fscore in enumerate(flip_score_map): 77 | fscore = fscore.transpose((1,2,0)) 78 | fscore = cv2.flip(fscore, 1) 79 | fscore = list(fscore.transpose((2,0,1))) 80 | for (q, w) in cfg_test.symmetry: 81 | fscore[q], fscore[w] = fscore[w], fscore[q] 82 | fscore = np.array(fscore) 83 | score_map[i] += fscore 84 | score_map[i] /= 2 85 | 86 | ids = meta['imgID'].numpy() 87 | det_scores = meta['det_scores'] 88 | for b in range(inputs.size(0)): 89 | details = meta['augmentation_details'] 90 | single_result_dict = {} 91 | single_result = [] 92 | 93 | single_map = score_map[b] 94 | r0 = single_map.copy() 95 | r0 /= 255 96 | r0 += 0.5 97 | v_score = np.zeros(14) 98 | for p in range(14): 99 | single_map[p] /= np.amax(single_map[p]) 100 | border = 10 101 | dr = np.zeros((cfg_test.output_shape[0] + 2*border, cfg_test.output_shape[1]+2*border)) 102 | dr[border:-border, border:-border] = single_map[p].copy() 103 | dr = cv2.GaussianBlur(dr, (21, 21), 0) 104 | lb = dr.argmax() 105 | y, x = np.unravel_index(lb, dr.shape) 106 | dr[y, x] = 0 107 | lb = dr.argmax() 108 | py, px = np.unravel_index(lb, dr.shape) 109 | y -= border 110 | x -= border 111 | py -= border + y 112 | px -= border + x 113 | ln = (px ** 2 + py ** 2) ** 0.5 114 | delta = 0.25 115 | if ln > 1e-3: 116 | x += delta * px / ln 117 | y += delta * py / ln 118 | x = max(0, min(x, cfg_test.output_shape[1] - 1)) 119 | y = max(0, min(y, cfg_test.output_shape[0] - 1)) 120 | resy = float((4 * y + 2) / cfg_test.data_shape[0] * (details[b][3] - details[b][1]) + details[b][1]) 121 | resx = float((4 * x + 2) / cfg_test.data_shape[1] * (details[b][2] - details[b][0]) + details[b][0]) 122 | v_score[p] = float(r0[p, int(round(y)+1e-10), int(round(x)+1e-10)]) 123 | single_result.append(resx) 124 | single_result.append(resy) 125 | single_result.append(1) 126 | if len(single_result) != 0: 127 | single_result_dict['image_id'] = int(ids[b]) 128 | single_result_dict['category_id'] = 1 129 | single_result_dict['keypoints'] = single_result 130 | single_result_dict['score'] = float(det_scores[b])*v_score.mean() 131 | full_result.append(single_result_dict) 132 | 133 | 134 | result_path = 'Qual_generalization_Ours' 135 | if not isdir(result_path): 136 | mkdir_p(result_path) 137 | result_file = os.path.join(result_path, 'Ours_ricoh3.json') 138 | with open(result_file,'w') as wf: 139 | json.dump(full_result, wf) 140 | 141 | eval_gt = COCO(cfg_test.ori_gt_path) 142 | eval_dt = eval_gt.loadRes(result_file) 143 | cocoEval = COCOeval(eval_gt, eval_dt, iouType='keypoints') 144 | cocoEval.evaluate() 145 | cocoEval.accumulate() 146 | result = cocoEval.summarize() 147 | 148 | wandb.log({"RICOH3_AP": result[0]}) 149 | 150 | 151 | if __name__ == '__main__': 152 | parser = argparse.ArgumentParser(description='PyTorch CPN Test') 153 | parser.add_argument('-j', '--workers', default=0, type=int, metavar='N', 154 | help='number of data loading workers (default: 12)') 155 | parser.add_argument('-g', '--num_gpus', default=1, type=int, metavar='N', 156 | help='number of GPU to use (default: 1)') 157 | parser.add_argument('-c', '--checkpoint', default='checkpoint', type=str, metavar='PATH', 158 | help='path to load checkpoint (default: checkpoint)') 159 | parser.add_argument('-f', '--flip', default=True, type=bool, 160 | help='flip input image during test (default: True)') 161 | parser.add_argument('-b', '--batch', default=128, type=int, 162 | help='test batch size (default: 128)') 163 | parser.add_argument('-t', '--test', default='CPN256x192', type=str, 164 | help='using which checkpoint to be tested (default: CPN256x192') 165 | parser.add_argument('-r', '--result', default='result', type=str, 166 | help='path to save save result (default: result)') 167 | main(parser.parse_args()) -------------------------------------------------------------------------------- /pytorch-cpn/256.192.model/train_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import sys 4 | import numpy as np 5 | 6 | def add_pypath(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | class Config: 11 | cur_dir = os.path.dirname(os.path.abspath(__file__)) 12 | this_dir_name = cur_dir.split('/')[-1] 13 | root_dir = os.path.join(cur_dir, '..') 14 | 15 | model = 'LSBN_CPN50' 16 | 17 | lr = 5e-4 18 | lr_gamma = 0.5 19 | lr_dec_epoch = list(range(6,40,6)) 20 | 21 | batch_size = 32 22 | weight_decay = 1e-5 23 | 24 | num_class = 14 25 | img_path = os.path.join(root_dir, '/data01/', 'PoseInTheDark') 26 | symmetry = [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10), (11, 12)] 27 | bbox_extend_factor = (0.1, 0.15) # x, y 28 | 29 | # data augmentation setting 30 | scale_factor=(0.7, 1.35) 31 | rot_factor=45 32 | 33 | pixel_means = np.array([122.7717, 115.9465, 102.9801]) # RGB 34 | data_shape = (256, 192) 35 | output_shape = (64, 48) 36 | gaussain_kernel = (7, 7) 37 | 38 | gk15 = (15, 15) 39 | gk11 = (11, 11) 40 | gk9 = (9, 9) 41 | gk7 = (7, 7) 42 | 43 | gt_path = os.path.join(root_dir, '256.192.model/Annotations/ExLPose','ExLPose_train_trans.json') 44 | 45 | cfg = Config() 46 | add_pypath(cfg.root_dir) 47 | 48 | -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/.gitignore: -------------------------------------------------------------------------------- 1 | images/ 2 | annotations/ 3 | results/ 4 | external/ 5 | .DS_Store 6 | 7 | MatlabAPI/analyze*/ 8 | MatlabAPI/visualize*/ 9 | MatlabAPI/private/maskApiMex.* 10 | 11 | PythonAPI/pycocotools/__init__.pyc 12 | PythonAPI/pycocotools/_mask.c 13 | PythonAPI/pycocotools/_mask.so 14 | PythonAPI/pycocotools/coco.pyc 15 | PythonAPI/pycocotools/cocoeval.pyc 16 | PythonAPI/pycocotools/mask.pyc 17 | -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/.travis.yml: -------------------------------------------------------------------------------- 1 | group: travis_latest 2 | language: python 3 | cache: pip 4 | python: 5 | - 2.7 6 | - 3.6 7 | install: 8 | - pip install --upgrade pip 9 | - pip install pycocotools 10 | script: 11 | - true 12 | -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/LuaAPI/CocoApi.lua: -------------------------------------------------------------------------------- 1 | --[[---------------------------------------------------------------------------- 2 | 3 | Interface for accessing the Common Objects in COntext (COCO) dataset. 4 | 5 | For an overview of the API please see http://mscoco.org/dataset/#download. 6 | CocoApi.lua (this file) is modeled after the Matlab CocoApi.m: 7 | https://github.com/pdollar/coco/blob/master/MatlabAPI/CocoApi.m 8 | 9 | The following API functions are defined in the Lua API: 10 | CocoApi - Load COCO annotation file and prepare data structures. 11 | getAnnIds - Get ann ids that satisfy given filter conditions. 12 | getCatIds - Get cat ids that satisfy given filter conditions. 13 | getImgIds - Get img ids that satisfy given filter conditions. 14 | loadAnns - Load anns with the specified ids. 15 | loadCats - Load cats with the specified ids. 16 | loadImgs - Load imgs with the specified ids. 17 | showAnns - Display the specified annotations. 18 | Throughout the API "ann"=annotation, "cat"=category, and "img"=image. 19 | For detailed usage information please see cocoDemo.lua. 20 | 21 | LIMITATIONS: the following API functions are NOT defined in the Lua API: 22 | loadRes - Load algorithm results and create API for accessing them. 23 | download - Download COCO images from mscoco.org server. 24 | In addition, currently the getCatIds() and getImgIds() do not accept filters. 25 | getAnnIds() can be called using getAnnIds({imgId=id}) and getAnnIds({catId=id}). 26 | 27 | Note: loading COCO JSON annotations to Lua tables is quite slow. Hence, a call 28 | to CocApi(annFile) converts the annotations to a custom 'flattened' format that 29 | is more efficient. The first time a COCO JSON is loaded, the conversion is 30 | invoked (this may take up to a minute). The converted data is then stored in a 31 | t7 file (the code must have write permission to the dir of the JSON file). 32 | Future calls of cocoApi=CocApi(annFile) take a fraction of a second. To view the 33 | created data just inspect cocoApi.data of a created instance of the CocoApi. 34 | 35 | Common Objects in COntext (COCO) Toolbox. version 3.0 36 | Data, paper, and tutorials available at: http://mscoco.org/ 37 | Code written by Pedro O. Pinheiro and Piotr Dollar, 2016. 38 | Licensed under the Simplified BSD License [see coco/license.txt] 39 | 40 | ------------------------------------------------------------------------------]] 41 | 42 | local json = require 'cjson' 43 | local coco = require 'coco.env' 44 | 45 | local TensorTable = torch.class('TensorTable',coco) 46 | local CocoSeg = torch.class('CocoSeg',coco) 47 | local CocoApi = torch.class('CocoApi',coco) 48 | 49 | -------------------------------------------------------------------------------- 50 | 51 | --[[ TensorTable is a lightweight data structure for storing variable size 1D 52 | tensors. Tables of tensors are slow to save/load to disk. Instead, TensorTable 53 | stores all the data in a single long tensor (along with indices into the tensor) 54 | making serialization fast. A TensorTable may only contain 1D same-type torch 55 | tensors or strings. It supports only creation from a table and indexing. ]] 56 | 57 | function TensorTable:__init( T ) 58 | local n = #T; assert(n>0) 59 | local isStr = torch.type(T[1])=='string' 60 | assert(isStr or torch.isTensor(T[1])) 61 | local c=function(s) return torch.CharTensor(torch.CharStorage():string(s)) end 62 | if isStr then local S=T; T={}; for i=1,n do T[i]=c(S[i]) end end 63 | local ms, idx = torch.LongTensor(n), torch.LongTensor(n+1) 64 | for i=1,n do ms[i]=T[i]:numel() end 65 | idx[1]=1; idx:narrow(1,2,n):copy(ms); idx=idx:cumsum() 66 | local type = string.sub(torch.type(T[1]),7,-1) 67 | local data = torch[type](idx[n+1]-1) 68 | if isStr then type='string' end 69 | for i=1,n do if ms[i]>0 then data:sub(idx[i],idx[i+1]-1):copy(T[i]) end end 70 | if ms:eq(ms[1]):all() and ms[1]>0 then data=data:view(n,ms[1]); idx=nil end 71 | self.data, self.idx, self.type = data, idx, type 72 | end 73 | 74 | function TensorTable:__index__( i ) 75 | if torch.type(i)~='number' then return false end 76 | local d, idx, type = self.data, self.idx, self.type 77 | if idx and idx[i]==idx[i+1] then 78 | if type=='string' then d='' else d=torch[type]() end 79 | else 80 | if idx then d=d:sub(idx[i],idx[i+1]-1) else d=d[i] end 81 | if type=='string' then d=d:clone():storage():string() end 82 | end 83 | return d, true 84 | end 85 | 86 | -------------------------------------------------------------------------------- 87 | 88 | --[[ CocoSeg is an efficient data structure for storing COCO segmentations. ]] 89 | 90 | function CocoSeg:__init( segs ) 91 | local polys, pIdx, sizes, rles, p, isStr = {}, {}, {}, {}, 0, 0 92 | for i,seg in pairs(segs) do if seg.size then isStr=seg.counts break end end 93 | isStr = torch.type(isStr)=='string' 94 | for i,seg in pairs(segs) do 95 | pIdx[i], sizes[i] = {}, {} 96 | if seg.size then 97 | sizes[i],rles[i] = seg.size,seg.counts 98 | else 99 | if isStr then rles[i]='' else rles[i]={} end 100 | for j=1,#seg do p=p+1; pIdx[i][j],polys[p] = p,seg[j] end 101 | end 102 | pIdx[i],sizes[i] = torch.LongTensor(pIdx[i]),torch.IntTensor(sizes[i]) 103 | if not isStr then rles[i]=torch.IntTensor(rles[i]) end 104 | end 105 | for i=1,p do polys[i]=torch.DoubleTensor(polys[i]) end 106 | self.polys, self.pIdx = coco.TensorTable(polys), coco.TensorTable(pIdx) 107 | self.sizes, self.rles = coco.TensorTable(sizes), coco.TensorTable(rles) 108 | end 109 | 110 | function CocoSeg:__index__( i ) 111 | if torch.type(i)~='number' then return false end 112 | if self.sizes[i]:numel()>0 then 113 | return {size=self.sizes[i],counts=self.rles[i]}, true 114 | else 115 | local ids, polys = self.pIdx[i], {} 116 | for i=1,ids:numel() do polys[i]=self.polys[ids[i]] end 117 | return polys, true 118 | end 119 | end 120 | 121 | -------------------------------------------------------------------------------- 122 | 123 | --[[ CocoApi is the API to the COCO dataset, see main comment for details. ]] 124 | 125 | function CocoApi:__init( annFile ) 126 | assert( string.sub(annFile,-4,-1)=='json' and paths.filep(annFile) ) 127 | local torchFile = string.sub(annFile,1,-6) .. '.t7' 128 | if not paths.filep(torchFile) then self:__convert(annFile,torchFile) end 129 | local data = torch.load(torchFile) 130 | self.data, self.inds = data, {} 131 | for k,v in pairs({images='img',categories='cat',annotations='ann'}) do 132 | local M = {}; self.inds[v..'IdsMap']=M 133 | if data[k] then for i=1,data[k].id:size(1) do M[data[k].id[i]]=i end end 134 | end 135 | end 136 | 137 | function CocoApi:__convert( annFile, torchFile ) 138 | print('convert: '..annFile..' --> .t7 [please be patient]') 139 | local tic = torch.tic() 140 | -- load data and decode json 141 | local data = torch.CharStorage(annFile):string() 142 | data = json.decode(data); collectgarbage() 143 | -- transpose and flatten each field in the coco data struct 144 | local convert = {images=true, categories=true, annotations=true} 145 | for field, d in pairs(data) do if convert[field] then 146 | print('converting: '..field) 147 | local n, out = #d, {} 148 | if n==0 then d,n={d},1 end 149 | for k,v in pairs(d[1]) do 150 | local t, isReg = torch.type(v), true 151 | for i=1,n do isReg=isReg and torch.type(d[i][k])==t end 152 | if t=='number' and isReg then 153 | out[k] = torch.DoubleTensor(n) 154 | for i=1,n do out[k][i]=d[i][k] end 155 | elseif t=='string' and isReg then 156 | out[k]={}; for i=1,n do out[k][i]=d[i][k] end 157 | out[k] = coco.TensorTable(out[k]) 158 | elseif t=='table' and isReg and torch.type(v[1])=='number' then 159 | out[k]={}; for i=1,n do out[k][i]=torch.DoubleTensor(d[i][k]) end 160 | out[k] = coco.TensorTable(out[k]) 161 | if not out[k].idx then out[k]=out[k].data end 162 | else 163 | out[k]={}; for i=1,n do out[k][i]=d[i][k] end 164 | if k=='segmentation' then out[k] = coco.CocoSeg(out[k]) end 165 | end 166 | collectgarbage() 167 | end 168 | if out.id then out.idx=torch.range(1,out.id:size(1)) end 169 | data[field] = out 170 | collectgarbage() 171 | end end 172 | -- create mapping from cat/img index to anns indices for that cat/img 173 | print('convert: building indices') 174 | local makeMap = function( type, type_id ) 175 | if not data[type] or not data.annotations then return nil end 176 | local invmap, n = {}, data[type].id:size(1) 177 | for i=1,n do invmap[data[type].id[i]]=i end 178 | local map = {}; for i=1,n do map[i]={} end 179 | data.annotations[type_id..'x'] = data.annotations[type_id]:clone() 180 | for i=1,data.annotations.id:size(1) do 181 | local id = invmap[data.annotations[type_id][i]] 182 | data.annotations[type_id..'x'][i] = id 183 | table.insert(map[id],data.annotations.id[i]) 184 | end 185 | for i=1,n do map[i]=torch.LongTensor(map[i]) end 186 | return coco.TensorTable(map) 187 | end 188 | data.annIdsPerImg = makeMap('images','image_id') 189 | data.annIdsPerCat = makeMap('categories','category_id') 190 | -- save to disk 191 | torch.save( torchFile, data ) 192 | print(('convert: complete [%.2f s]'):format(torch.toc(tic))) 193 | end 194 | 195 | function CocoApi:getAnnIds( filters ) 196 | if not filters then filters = {} end 197 | if filters.imgId then 198 | return self.data.annIdsPerImg[self.inds.imgIdsMap[filters.imgId]] or {} 199 | elseif filters.catId then 200 | return self.data.annIdsPerCat[self.inds.catIdsMap[filters.catId]] or {} 201 | else 202 | return self.data.annotations.id 203 | end 204 | end 205 | 206 | function CocoApi:getCatIds() 207 | return self.data.categories.id 208 | end 209 | 210 | function CocoApi:getImgIds() 211 | return self.data.images.id 212 | end 213 | 214 | function CocoApi:loadAnns( ids ) 215 | return self:__load(self.data.annotations,self.inds.annIdsMap,ids) 216 | end 217 | 218 | function CocoApi:loadCats( ids ) 219 | return self:__load(self.data.categories,self.inds.catIdsMap,ids) 220 | end 221 | 222 | function CocoApi:loadImgs( ids ) 223 | return self:__load(self.data.images,self.inds.imgIdsMap,ids) 224 | end 225 | 226 | function CocoApi:showAnns( img, anns ) 227 | local n, h, w = #anns, img:size(2), img:size(3) 228 | local MaskApi, clrs = coco.MaskApi, torch.rand(n,3)*.6+.4 229 | local O = img:clone():contiguous():float() 230 | if n==0 then anns,n={anns},1 end 231 | if anns[1].keypoints then for i=1,n do if anns[i].iscrowd==0 then 232 | local sk, kp, j, k = self:loadCats(anns[i].category_id)[1].skeleton 233 | kp=anns[i].keypoints; k=kp:size(1); j=torch.range(1,k,3):long(); k=k/3; 234 | local x,y,v = kp:index(1,j), kp:index(1,j+1), kp:index(1,j+2) 235 | for _,s in pairs(sk) do if v[s[1]]>0 and v[s[2]]>0 then 236 | MaskApi.drawLine(O,x[s[1]],y[s[1]],x[s[2]],y[s[2]],.75,clrs[i]) 237 | end end 238 | for j=1,k do if v[j]==1 then MaskApi.drawCirc(O,x[j],y[j],4,{0,0,0}) end end 239 | for j=1,k do if v[j]>0 then MaskApi.drawCirc(O,x[j],y[j],3,clrs[i]) end end 240 | end end end 241 | if anns[1].segmentation or anns[1].bbox then 242 | local Rs, alpha = {}, anns[1].keypoints and .25 or .4 243 | for i=1,n do 244 | Rs[i]=anns[i].segmentation 245 | if Rs[i] and #Rs[i]>0 then Rs[i]=MaskApi.frPoly(Rs[i],h,w) end 246 | if not Rs[i] then Rs[i]=MaskApi.frBbox(anns[i].bbox,h,w)[1] end 247 | end 248 | MaskApi.drawMasks(O,MaskApi.decode(Rs),nil,alpha,clrs) 249 | end 250 | return O 251 | end 252 | 253 | function CocoApi:__load( data, map, ids ) 254 | if not torch.isTensor(ids) then ids=torch.LongTensor({ids}) end 255 | local out, idx = {}, nil 256 | for i=1,ids:numel() do 257 | out[i], idx = {}, map[ids[i]] 258 | for k,v in pairs(data) do out[i][k]=v[idx] end 259 | end 260 | return out 261 | end 262 | -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/LuaAPI/MaskApi.lua: -------------------------------------------------------------------------------- 1 | --[[---------------------------------------------------------------------------- 2 | 3 | Interface for manipulating masks stored in RLE format. 4 | 5 | For an overview of RLE please see http://mscoco.org/dataset/#download. 6 | Additionally, more detailed information can be found in the Matlab MaskApi.m: 7 | https://github.com/pdollar/coco/blob/master/MatlabAPI/MaskApi.m 8 | 9 | The following API functions are defined: 10 | encode - Encode binary masks using RLE. 11 | decode - Decode binary masks encoded via RLE. 12 | merge - Compute union or intersection of encoded masks. 13 | iou - Compute intersection over union between masks. 14 | nms - Compute non-maximum suppression between ordered masks. 15 | area - Compute area of encoded masks. 16 | toBbox - Get bounding boxes surrounding encoded masks. 17 | frBbox - Convert bounding boxes to encoded masks. 18 | frPoly - Convert polygon to encoded mask. 19 | drawCirc - Draw circle into image (alters input). 20 | drawLine - Draw line into image (alters input). 21 | drawMasks - Draw masks into image (alters input). 22 | 23 | Usage: 24 | Rs = MaskApi.encode( masks ) 25 | masks = MaskApi.decode( Rs ) 26 | R = MaskApi.merge( Rs, [intersect=false] ) 27 | o = MaskApi.iou( dt, gt, [iscrowd=false] ) 28 | keep = MaskApi.nms( dt, thr ) 29 | a = MaskApi.area( Rs ) 30 | bbs = MaskApi.toBbox( Rs ) 31 | Rs = MaskApi.frBbox( bbs, h, w ) 32 | R = MaskApi.frPoly( poly, h, w ) 33 | MaskApi.drawCirc( img, x, y, rad, clr ) 34 | MaskApi.drawLine( img, x0, y0, x1, y1, rad, clr ) 35 | MaskApi.drawMasks( img, masks, [maxn=n], [alpha=.4], [clrs] ) 36 | For detailed usage information please see cocoDemo.lua. 37 | 38 | In the API the following formats are used: 39 | R,Rs - [table] Run-length encoding of binary mask(s) 40 | masks - [nxhxw] Binary mask(s) 41 | bbs - [nx4] Bounding box(es) stored as [x y w h] 42 | poly - Polygon stored as {[x1 y1 x2 y2...],[x1 y1 ...],...} 43 | dt,gt - May be either bounding boxes or encoded masks 44 | Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 45 | 46 | Common Objects in COntext (COCO) Toolbox. version 3.0 47 | Data, paper, and tutorials available at: http://mscoco.org/ 48 | Code written by Pedro O. Pinheiro and Piotr Dollar, 2016. 49 | Licensed under the Simplified BSD License [see coco/license.txt] 50 | 51 | ------------------------------------------------------------------------------]] 52 | 53 | local ffi = require 'ffi' 54 | local coco = require 'coco.env' 55 | 56 | coco.MaskApi = {} 57 | local MaskApi = coco.MaskApi 58 | 59 | coco.libmaskapi = ffi.load(package.searchpath('libmaskapi',package.cpath)) 60 | local libmaskapi = coco.libmaskapi 61 | 62 | -------------------------------------------------------------------------------- 63 | 64 | MaskApi.encode = function( masks ) 65 | local n, h, w = masks:size(1), masks:size(2), masks:size(3) 66 | masks = masks:type('torch.ByteTensor'):transpose(2,3) 67 | local data = masks:contiguous():data() 68 | local Qs = MaskApi._rlesInit(n) 69 | libmaskapi.rleEncode(Qs[0],data,h,w,n) 70 | return MaskApi._rlesToLua(Qs,n) 71 | end 72 | 73 | MaskApi.decode = function( Rs ) 74 | local Qs, n, h, w = MaskApi._rlesFrLua(Rs) 75 | local masks = torch.ByteTensor(n,w,h):zero():contiguous() 76 | libmaskapi.rleDecode(Qs,masks:data(),n) 77 | MaskApi._rlesFree(Qs,n) 78 | return masks:transpose(2,3) 79 | end 80 | 81 | MaskApi.merge = function( Rs, intersect ) 82 | intersect = intersect or 0 83 | local Qs, n, h, w = MaskApi._rlesFrLua(Rs) 84 | local Q = MaskApi._rlesInit(1) 85 | libmaskapi.rleMerge(Qs,Q,n,intersect) 86 | MaskApi._rlesFree(Qs,n) 87 | return MaskApi._rlesToLua(Q,1)[1] 88 | end 89 | 90 | MaskApi.iou = function( dt, gt, iscrowd ) 91 | if not iscrowd then iscrowd = NULL else 92 | iscrowd = iscrowd:type('torch.ByteTensor'):contiguous():data() 93 | end 94 | if torch.isTensor(gt) and torch.isTensor(dt) then 95 | local nDt, k = dt:size(1), dt:size(2); assert(k==4) 96 | local nGt, k = gt:size(1), gt:size(2); assert(k==4) 97 | local dDt = dt:type('torch.DoubleTensor'):contiguous():data() 98 | local dGt = gt:type('torch.DoubleTensor'):contiguous():data() 99 | local o = torch.DoubleTensor(nGt,nDt):contiguous() 100 | libmaskapi.bbIou(dDt,dGt,nDt,nGt,iscrowd,o:data()) 101 | return o:transpose(1,2) 102 | else 103 | local qDt, nDt = MaskApi._rlesFrLua(dt) 104 | local qGt, nGt = MaskApi._rlesFrLua(gt) 105 | local o = torch.DoubleTensor(nGt,nDt):contiguous() 106 | libmaskapi.rleIou(qDt,qGt,nDt,nGt,iscrowd,o:data()) 107 | MaskApi._rlesFree(qDt,nDt); MaskApi._rlesFree(qGt,nGt) 108 | return o:transpose(1,2) 109 | end 110 | end 111 | 112 | MaskApi.nms = function( dt, thr ) 113 | if torch.isTensor(dt) then 114 | local n, k = dt:size(1), dt:size(2); assert(k==4) 115 | local Q = dt:type('torch.DoubleTensor'):contiguous():data() 116 | local kp = torch.IntTensor(n):contiguous() 117 | libmaskapi.bbNms(Q,n,kp:data(),thr) 118 | return kp 119 | else 120 | local Q, n = MaskApi._rlesFrLua(dt) 121 | local kp = torch.IntTensor(n):contiguous() 122 | libmaskapi.rleNms(Q,n,kp:data(),thr) 123 | MaskApi._rlesFree(Q,n) 124 | return kp 125 | end 126 | end 127 | 128 | MaskApi.area = function( Rs ) 129 | local Qs, n, h, w = MaskApi._rlesFrLua(Rs) 130 | local a = torch.IntTensor(n):contiguous() 131 | libmaskapi.rleArea(Qs,n,a:data()) 132 | MaskApi._rlesFree(Qs,n) 133 | return a 134 | end 135 | 136 | MaskApi.toBbox = function( Rs ) 137 | local Qs, n, h, w = MaskApi._rlesFrLua(Rs) 138 | local bb = torch.DoubleTensor(n,4):contiguous() 139 | libmaskapi.rleToBbox(Qs,bb:data(),n) 140 | MaskApi._rlesFree(Qs,n) 141 | return bb 142 | end 143 | 144 | MaskApi.frBbox = function( bbs, h, w ) 145 | if bbs:dim()==1 then bbs=bbs:view(1,bbs:size(1)) end 146 | local n, k = bbs:size(1), bbs:size(2); assert(k==4) 147 | local data = bbs:type('torch.DoubleTensor'):contiguous():data() 148 | local Qs = MaskApi._rlesInit(n) 149 | libmaskapi.rleFrBbox(Qs[0],data,h,w,n) 150 | return MaskApi._rlesToLua(Qs,n) 151 | end 152 | 153 | MaskApi.frPoly = function( poly, h, w ) 154 | local n = #poly 155 | local Qs, Q = MaskApi._rlesInit(n), MaskApi._rlesInit(1) 156 | for i,p in pairs(poly) do 157 | local xy = p:type('torch.DoubleTensor'):contiguous():data() 158 | libmaskapi.rleFrPoly(Qs[i-1],xy,p:size(1)/2,h,w) 159 | end 160 | libmaskapi.rleMerge(Qs,Q[0],n,0) 161 | MaskApi._rlesFree(Qs,n) 162 | return MaskApi._rlesToLua(Q,1)[1] 163 | end 164 | 165 | -------------------------------------------------------------------------------- 166 | 167 | MaskApi.drawCirc = function( img, x, y, rad, clr ) 168 | assert(img:isContiguous() and img:dim()==3) 169 | local k, h, w, data = img:size(1), img:size(2), img:size(3), img:data() 170 | for dx=-rad,rad do for dy=-rad,rad do 171 | local xi, yi = torch.round(x+dx), torch.round(y+dy) 172 | if dx*dx+dy*dy<=rad*rad and xi>=0 and yi>=0 and xi=0 and yi>=0 and xi= 5.1", 17 | "torch >= 7.0", 18 | "lua-cjson" 19 | } 20 | 21 | build = { 22 | type = "builtin", 23 | modules = { 24 | ["coco.env"] = "LuaAPI/env.lua", 25 | ["coco.init"] = "LuaAPI/init.lua", 26 | ["coco.MaskApi"] = "LuaAPI/MaskApi.lua", 27 | ["coco.CocoApi"] = "LuaAPI/CocoApi.lua", 28 | libmaskapi = { 29 | sources = { "common/maskApi.c" }, 30 | incdirs = { "common/" } 31 | } 32 | } 33 | } 34 | 35 | -- luarocks make LuaAPI/rocks/coco-scm-1.rockspec 36 | -- https://github.com/pdollar/coco/raw/master/LuaAPI/rocks/coco-scm-1.rockspec 37 | -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/MatlabAPI/MaskApi.m: -------------------------------------------------------------------------------- 1 | classdef MaskApi 2 | % Interface for manipulating masks stored in RLE format. 3 | % 4 | % RLE is a simple yet efficient format for storing binary masks. RLE 5 | % first divides a vector (or vectorized image) into a series of piecewise 6 | % constant regions and then for each piece simply stores the length of 7 | % that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 8 | % be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 9 | % (note that the odd counts are always the numbers of zeros). Instead of 10 | % storing the counts directly, additional compression is achieved with a 11 | % variable bitrate representation based on a common scheme called LEB128. 12 | % 13 | % Compression is greatest given large piecewise constant regions. 14 | % Specifically, the size of the RLE is proportional to the number of 15 | % *boundaries* in M (or for an image the number of boundaries in the y 16 | % direction). Assuming fairly simple shapes, the RLE representation is 17 | % O(sqrt(n)) where n is number of pixels in the object. Hence space usage 18 | % is substantially lower, especially for large simple objects (large n). 19 | % 20 | % Many common operations on masks can be computed directly using the RLE 21 | % (without need for decoding). This includes computations such as area, 22 | % union, intersection, etc. All of these operations are linear in the 23 | % size of the RLE, in other words they are O(sqrt(n)) where n is the area 24 | % of the object. Computing these operations on the original mask is O(n). 25 | % Thus, using the RLE can result in substantial computational savings. 26 | % 27 | % The following API functions are defined: 28 | % encode - Encode binary masks using RLE. 29 | % decode - Decode binary masks encoded via RLE. 30 | % merge - Compute union or intersection of encoded masks. 31 | % iou - Compute intersection over union between masks. 32 | % nms - Compute non-maximum suppression between ordered masks. 33 | % area - Compute area of encoded masks. 34 | % toBbox - Get bounding boxes surrounding encoded masks. 35 | % frBbox - Convert bounding boxes to encoded masks. 36 | % frPoly - Convert polygon to encoded mask. 37 | % 38 | % Usage: 39 | % Rs = MaskApi.encode( masks ) 40 | % masks = MaskApi.decode( Rs ) 41 | % R = MaskApi.merge( Rs, [intersect=false] ) 42 | % o = MaskApi.iou( dt, gt, [iscrowd=false] ) 43 | % keep = MaskApi.nms( dt, thr ) 44 | % a = MaskApi.area( Rs ) 45 | % bbs = MaskApi.toBbox( Rs ) 46 | % Rs = MaskApi.frBbox( bbs, h, w ) 47 | % R = MaskApi.frPoly( poly, h, w ) 48 | % 49 | % In the API the following formats are used: 50 | % R,Rs - [struct] Run-length encoding of binary mask(s) 51 | % masks - [hxwxn] Binary mask(s) (must have type uint8) 52 | % bbs - [nx4] Bounding box(es) stored as [x y w h] 53 | % poly - Polygon stored as {[x1 y1 x2 y2...],[x1 y1 ...],...} 54 | % dt,gt - May be either bounding boxes or encoded masks 55 | % Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 56 | % 57 | % Finally, a note about the intersection over union (iou) computation. 58 | % The standard iou of a ground truth (gt) and detected (dt) object is 59 | % iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 60 | % For "crowd" regions, we use a modified criteria. If a gt object is 61 | % marked as "iscrowd", we allow a dt to match any subregion of the gt. 62 | % Choosing gt' in the crowd gt that best matches the dt can be done using 63 | % gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 64 | % iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 65 | % For crowd gt regions we use this modified criteria above for the iou. 66 | % 67 | % To compile use the following (some precompiled binaries are included): 68 | % mex('CFLAGS=\$CFLAGS -Wall -std=c99','-largeArrayDims',... 69 | % 'private/maskApiMex.c','../common/maskApi.c',... 70 | % '-I../common/','-outdir','private'); 71 | % Please do not contact us for help with compiling. 72 | % 73 | % Microsoft COCO Toolbox. version 2.0 74 | % Data, paper, and tutorials available at: http://mscoco.org/ 75 | % Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 76 | % Licensed under the Simplified BSD License [see coco/license.txt] 77 | 78 | methods( Static ) 79 | function Rs = encode( masks ) 80 | Rs = maskApiMex( 'encode', masks ); 81 | end 82 | 83 | function masks = decode( Rs ) 84 | masks = maskApiMex( 'decode', Rs ); 85 | end 86 | 87 | function R = merge( Rs, varargin ) 88 | R = maskApiMex( 'merge', Rs, varargin{:} ); 89 | end 90 | 91 | function o = iou( dt, gt, varargin ) 92 | o = maskApiMex( 'iou', dt', gt', varargin{:} ); 93 | end 94 | 95 | function keep = nms( dt, thr ) 96 | keep = maskApiMex('nms',dt',thr); 97 | end 98 | 99 | function a = area( Rs ) 100 | a = maskApiMex( 'area', Rs ); 101 | end 102 | 103 | function bbs = toBbox( Rs ) 104 | bbs = maskApiMex( 'toBbox', Rs )'; 105 | end 106 | 107 | function Rs = frBbox( bbs, h, w ) 108 | Rs = maskApiMex( 'frBbox', bbs', h, w ); 109 | end 110 | 111 | function R = frPoly( poly, h, w ) 112 | R = maskApiMex( 'frPoly', poly, h , w ); 113 | end 114 | end 115 | 116 | end 117 | -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/MatlabAPI/cocoDemo.m: -------------------------------------------------------------------------------- 1 | %% Demo for the CocoApi (see CocoApi.m) 2 | 3 | %% initialize COCO api (please specify dataType/annType below) 4 | annTypes = { 'instances', 'captions', 'person_keypoints' }; 5 | dataType='val2014'; annType=annTypes{1}; % specify dataType/annType 6 | annFile=sprintf('../annotations/%s_%s.json',annType,dataType); 7 | coco=CocoApi(annFile); 8 | 9 | %% display COCO categories and supercategories 10 | if( ~strcmp(annType,'captions') ) 11 | cats = coco.loadCats(coco.getCatIds()); 12 | nms={cats.name}; fprintf('COCO categories: '); 13 | fprintf('%s, ',nms{:}); fprintf('\n'); 14 | nms=unique({cats.supercategory}); fprintf('COCO supercategories: '); 15 | fprintf('%s, ',nms{:}); fprintf('\n'); 16 | end 17 | 18 | %% get all images containing given categories, select one at random 19 | catIds = coco.getCatIds('catNms',{'person','dog','skateboard'}); 20 | imgIds = coco.getImgIds('catIds',catIds); 21 | imgId = imgIds(randi(length(imgIds))); 22 | 23 | %% load and display image 24 | img = coco.loadImgs(imgId); 25 | I = imread(sprintf('../images/%s/%s',dataType,img.file_name)); 26 | figure(1); imagesc(I); axis('image'); set(gca,'XTick',[],'YTick',[]) 27 | 28 | %% load and display annotations 29 | annIds = coco.getAnnIds('imgIds',imgId,'catIds',catIds,'iscrowd',[]); 30 | anns = coco.loadAnns(annIds); coco.showAnns(anns); 31 | -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/MatlabAPI/evalDemo.m: -------------------------------------------------------------------------------- 1 | %% Demo demonstrating the algorithm result formats for COCO 2 | 3 | %% select results type for demo (either bbox or segm) 4 | type = {'segm','bbox','keypoints'}; type = type{1}; % specify type here 5 | fprintf('Running demo for *%s* results.\n\n',type); 6 | 7 | %% initialize COCO ground truth api 8 | dataDir='../'; prefix='instances'; dataType='val2014'; 9 | if(strcmp(type,'keypoints')), prefix='person_keypoints'; end 10 | annFile=sprintf('%s/annotations/%s_%s.json',dataDir,prefix,dataType); 11 | cocoGt=CocoApi(annFile); 12 | 13 | %% initialize COCO detections api 14 | resFile='%s/results/%s_%s_fake%s100_results.json'; 15 | resFile=sprintf(resFile,dataDir,prefix,dataType,type); 16 | cocoDt=cocoGt.loadRes(resFile); 17 | 18 | %% visialuze gt and dt side by side 19 | imgIds=sort(cocoGt.getImgIds()); imgIds=imgIds(1:100); 20 | imgId = imgIds(randi(100)); img = cocoGt.loadImgs(imgId); 21 | I = imread(sprintf('%s/images/val2014/%s',dataDir,img.file_name)); 22 | figure(1); subplot(1,2,1); imagesc(I); axis('image'); axis off; 23 | annIds = cocoGt.getAnnIds('imgIds',imgId); title('ground truth') 24 | anns = cocoGt.loadAnns(annIds); cocoGt.showAnns(anns); 25 | figure(1); subplot(1,2,2); imagesc(I); axis('image'); axis off; 26 | annIds = cocoDt.getAnnIds('imgIds',imgId); title('results') 27 | anns = cocoDt.loadAnns(annIds); cocoDt.showAnns(anns); 28 | 29 | %% load raw JSON and show exact format for results 30 | fprintf('results structure have the following format:\n'); 31 | res = gason(fileread(resFile)); disp(res) 32 | 33 | %% the following command can be used to save the results back to disk 34 | if(0), f=fopen(resFile,'w'); fwrite(f,gason(res)); fclose(f); end 35 | 36 | %% run COCO evaluation code (see CocoEval.m) 37 | cocoEval=CocoEval(cocoGt,cocoDt,type); 38 | cocoEval.params.imgIds=imgIds; 39 | cocoEval.evaluate(); 40 | cocoEval.accumulate(); 41 | cocoEval.summarize(); 42 | 43 | %% generate Derek Hoiem style analyis of false positives (slow) 44 | if(0), cocoEval.analyze(); end 45 | -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/MatlabAPI/gason.m: -------------------------------------------------------------------------------- 1 | function out = gason( in ) 2 | % Convert between JSON strings and corresponding JSON objects. 3 | % 4 | % This parser is based on Gason written and maintained by Ivan Vashchaev: 5 | % https://github.com/vivkin/gason 6 | % Gason is a "lightweight and fast JSON parser for C++". Please see the 7 | % above link for license information and additional details about Gason. 8 | % 9 | % Given a JSON string, gason calls the C++ parser and converts the output 10 | % into an appropriate Matlab structure. As the parsing is performed in mex 11 | % the resulting parser is blazingly fast. Large JSON structs (100MB+) take 12 | % only a few seconds to parse (compared to hours for pure Matlab parsers). 13 | % 14 | % Given a JSON object, gason calls the C++ encoder to convert the object 15 | % back into a JSON string representation. Nearly any Matlab struct, cell 16 | % array, or numeric array represent a valid JSON object. Note that gason() 17 | % can be used to go both from JSON string to JSON object and back. 18 | % 19 | % Gason requires C++11 to compile (for GCC this requires version 4.7 or 20 | % later). The following command compiles the parser (may require tweaking): 21 | % mex('CXXFLAGS=\$CXXFLAGS -std=c++11 -Wall','-largeArrayDims',... 22 | % 'private/gasonMex.cpp','../common/gason.cpp',... 23 | % '-I../common/','-outdir','private'); 24 | % Note the use of the "-std=c++11" flag. A number of precompiled binaries 25 | % are included, please do not contact us for help with compiling. If needed 26 | % you can specify a compiler by adding the option 'CXX="/usr/bin/g++"'. 27 | % 28 | % Note that by default JSON arrays that contain only numbers are stored as 29 | % regular Matlab arrays. Likewise, JSON arrays that contain only objects of 30 | % the same type are stored as Matlab struct arrays. This is much faster and 31 | % can use considerably less memory than always using Matlab cell arrays. 32 | % 33 | % USAGE 34 | % object = gason( string ) 35 | % string = gason( object ) 36 | % 37 | % INPUTS/OUTPUTS 38 | % string - JSON string 39 | % object - JSON object 40 | % 41 | % EXAMPLE 42 | % o = struct('first',{'piotr','ty'},'last',{'dollar','lin'}) 43 | % s = gason( o ) % convert JSON object -> JSON string 44 | % p = gason( s ) % convert JSON string -> JSON object 45 | % 46 | % See also 47 | % 48 | % Microsoft COCO Toolbox. version 2.0 49 | % Data, paper, and tutorials available at: http://mscoco.org/ 50 | % Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 51 | % Licensed under the Simplified BSD License [see coco/license.txt] 52 | 53 | out = gasonMex( 'convert', in ); 54 | -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/MatlabAPI/private/gasonMex.cpp: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #include "gason.h" 8 | #include "mex.h" 9 | #include "string.h" 10 | #include "math.h" 11 | #include 12 | #include 13 | #include 14 | typedef std::ostringstream ostrm; 15 | typedef unsigned long siz; 16 | typedef unsigned short ushort; 17 | 18 | siz length( const JsonValue &a ) { 19 | // get number of elements in JSON_ARRAY or JSON_OBJECT 20 | siz k=0; auto n=a.toNode(); while(n) { k++; n=n->next; } return k; 21 | } 22 | 23 | bool isRegularObjArray( const JsonValue &a ) { 24 | // check if all JSON_OBJECTs in JSON_ARRAY have the same fields 25 | JsonValue o=a.toNode()->value; siz k, n; const char **keys; 26 | n=length(o); keys=new const char*[n]; 27 | k=0; for(auto j:o) keys[k++]=j->key; 28 | for( auto i:a ) { 29 | if(length(i->value)!=n) return false; k=0; 30 | for(auto j:i->value) if(strcmp(j->key,keys[k++])) return false; 31 | } 32 | delete [] keys; return true; 33 | } 34 | 35 | mxArray* json( const JsonValue &o ) { 36 | // convert JsonValue to Matlab mxArray 37 | siz k, m, n; mxArray *M; const char **keys; 38 | switch( o.getTag() ) { 39 | case JSON_NUMBER: 40 | return mxCreateDoubleScalar(o.toNumber()); 41 | case JSON_STRING: 42 | return mxCreateString(o.toString()); 43 | case JSON_ARRAY: { 44 | if(!o.toNode()) return mxCreateDoubleMatrix(1,0,mxREAL); 45 | JsonValue o0=o.toNode()->value; JsonTag tag=o0.getTag(); 46 | n=length(o); bool isRegular=true; 47 | for(auto i:o) isRegular=isRegular && i->value.getTag()==tag; 48 | if( isRegular && tag==JSON_OBJECT && isRegularObjArray(o) ) { 49 | m=length(o0); keys=new const char*[m]; 50 | k=0; for(auto j:o0) keys[k++]=j->key; 51 | M = mxCreateStructMatrix(1,n,m,keys); 52 | k=0; for(auto i:o) { m=0; for(auto j:i->value) 53 | mxSetFieldByNumber(M,k,m++,json(j->value)); k++; } 54 | delete [] keys; return M; 55 | } else if( isRegular && tag==JSON_NUMBER ) { 56 | M = mxCreateDoubleMatrix(1,n,mxREAL); double *p=mxGetPr(M); 57 | k=0; for(auto i:o) p[k++]=i->value.toNumber(); return M; 58 | } else { 59 | M = mxCreateCellMatrix(1,n); 60 | k=0; for(auto i:o) mxSetCell(M,k++,json(i->value)); 61 | return M; 62 | } 63 | } 64 | case JSON_OBJECT: 65 | if(!o.toNode()) return mxCreateStructMatrix(1,0,0,NULL); 66 | n=length(o); keys=new const char*[n]; 67 | k=0; for(auto i:o) keys[k++]=i->key; 68 | M = mxCreateStructMatrix(1,1,n,keys); k=0; 69 | for(auto i:o) mxSetFieldByNumber(M,0,k++,json(i->value)); 70 | delete [] keys; return M; 71 | case JSON_TRUE: 72 | return mxCreateDoubleScalar(1); 73 | case JSON_FALSE: 74 | return mxCreateDoubleScalar(0); 75 | case JSON_NULL: 76 | return mxCreateDoubleMatrix(0,0,mxREAL); 77 | default: return NULL; 78 | } 79 | } 80 | 81 | template ostrm& json( ostrm &S, T *A, siz n ) { 82 | // convert numeric array to JSON string with casting 83 | if(n==0) { S<<"[]"; return S; } if(n==1) { S< ostrm& json( ostrm &S, T *A, siz n ) { 89 | // convert numeric array to JSON string without casting 90 | return json(S,A,n); 91 | } 92 | 93 | ostrm& json( ostrm &S, const char *A ) { 94 | // convert char array to JSON string (handle escape characters) 95 | #define RPL(a,b) case a: { S << b; A++; break; } 96 | S << "\""; while( *A>0 ) switch( *A ) { 97 | RPL('"',"\\\""); RPL('\\',"\\\\"); RPL('/',"\\/"); RPL('\b',"\\b"); 98 | RPL('\f',"\\f"); RPL('\n',"\\n"); RPL('\r',"\\r"); RPL('\t',"\\t"); 99 | default: S << *A; A++; 100 | } 101 | S << "\""; return S; 102 | } 103 | 104 | ostrm& json( ostrm& S, const JsonValue *o ) { 105 | // convert JsonValue to JSON string 106 | switch( o->getTag() ) { 107 | case JSON_NUMBER: S << o->toNumber(); return S; 108 | case JSON_TRUE: S << "true"; return S; 109 | case JSON_FALSE: S << "false"; return S; 110 | case JSON_NULL: S << "null"; return S; 111 | case JSON_STRING: return json(S,o->toString()); 112 | case JSON_ARRAY: 113 | S << "["; for(auto i:*o) { 114 | json(S,&i->value) << (i->next ? "," : ""); } 115 | S << "]"; return S; 116 | case JSON_OBJECT: 117 | S << "{"; for(auto i:*o) { 118 | json(S,i->key) << ":"; 119 | json(S,&i->value) << (i->next ? "," : ""); } 120 | S << "}"; return S; 121 | default: return S; 122 | } 123 | } 124 | 125 | ostrm& json( ostrm& S, const mxArray *M ) { 126 | // convert Matlab mxArray to JSON string 127 | siz i, j, m, n=mxGetNumberOfElements(M); 128 | void *A=mxGetData(M); ostrm *nms; 129 | switch( mxGetClassID(M) ) { 130 | case mxDOUBLE_CLASS: return json(S,(double*) A,n); 131 | case mxSINGLE_CLASS: return json(S,(float*) A,n); 132 | case mxINT64_CLASS: return json(S,(int64_t*) A,n); 133 | case mxUINT64_CLASS: return json(S,(uint64_t*) A,n); 134 | case mxINT32_CLASS: return json(S,(int32_t*) A,n); 135 | case mxUINT32_CLASS: return json(S,(uint32_t*) A,n); 136 | case mxINT16_CLASS: return json(S,(int16_t*) A,n); 137 | case mxUINT16_CLASS: return json(S,(uint16_t*) A,n); 138 | case mxINT8_CLASS: return json(S,(int8_t*) A,n); 139 | case mxUINT8_CLASS: return json(S,(uint8_t*) A,n); 140 | case mxLOGICAL_CLASS: return json(S,(uint8_t*) A,n); 141 | case mxCHAR_CLASS: return json(S,mxArrayToString(M)); 142 | case mxCELL_CLASS: 143 | S << "["; for(i=0; i0) json(S,mxGetCell(M,n-1)); S << "]"; return S; 145 | case mxSTRUCT_CLASS: 146 | if(n==0) { S<<"{}"; return S; } m=mxGetNumberOfFields(M); 147 | if(m==0) { S<<"["; for(i=0; i1) S<<"["; nms=new ostrm[m]; 149 | for(j=0; j1) S<<"]"; delete [] nms; return S; 156 | default: 157 | mexErrMsgTxt( "Unknown type." ); return S; 158 | } 159 | } 160 | 161 | mxArray* mxCreateStringRobust( const char* str ) { 162 | // convert char* to Matlab string (robust version of mxCreateString) 163 | mxArray *M; ushort *c; mwSize n[2]={1,strlen(str)}; 164 | M=mxCreateCharArray(2,n); c=(ushort*) mxGetData(M); 165 | for( siz i=0; i1 ) mexErrMsgTxt("One output expected."); 182 | 183 | if(!strcmp(action,"convert")) { 184 | if( nr!=1 ) mexErrMsgTxt("One input expected."); 185 | if( mxGetClassID(pr[0])==mxCHAR_CLASS ) { 186 | // object = mexFunction( string ) 187 | char *str = mxArrayToStringRobust(pr[0]); 188 | int status = jsonParse(str, &endptr, &val, allocator); 189 | if( status != JSON_OK) mexErrMsgTxt(jsonStrError(status)); 190 | pl[0] = json(val); mxFree(str); 191 | } else { 192 | // string = mexFunction( object ) 193 | ostrm S; S << std::setprecision(12); json(S,pr[0]); 194 | pl[0]=mxCreateStringRobust(S.str().c_str()); 195 | } 196 | 197 | } else if(!strcmp(action,"split")) { 198 | // strings = mexFunction( string, k ) 199 | if( nr!=2 ) mexErrMsgTxt("Two input expected."); 200 | char *str = mxArrayToStringRobust(pr[0]); 201 | int status = jsonParse(str, &endptr, &val, allocator); 202 | if( status != JSON_OK) mexErrMsgTxt(jsonStrError(status)); 203 | if( val.getTag()!=JSON_ARRAY ) mexErrMsgTxt("Array expected"); 204 | siz i=0, t=0, n=length(val), k=(siz) mxGetScalar(pr[1]); 205 | k=(k>n)?n:(k<1)?1:k; k=ceil(n/ceil(double(n)/k)); 206 | pl[0]=mxCreateCellMatrix(1,k); ostrm S; S<value); t--; if(!o->next) t=0; S << (t ? "," : "]"); 210 | if(!t) mxSetCell(pl[0],i++,mxCreateStringRobust(S.str().c_str())); 211 | } 212 | 213 | } else if(!strcmp(action,"merge")) { 214 | // string = mexFunction( strings ) 215 | if( nr!=1 ) mexErrMsgTxt("One input expected."); 216 | if(!mxIsCell(pr[0])) mexErrMsgTxt("Cell array expected."); 217 | siz n = mxGetNumberOfElements(pr[0]); 218 | ostrm S; S << std::setprecision(12); S << "["; 219 | for( siz i=0; ivalue) << (j->next ? "," : ""); 225 | mxFree(str); if(i1) 14 | % [ param1 ... paramN ] = getPrmDflt( prm, dfs, [checkExtra] ) 15 | % 16 | % INPUTS 17 | % prm - param struct or cell of form {'name1' v1 'name2' v2 ...} 18 | % dfs - cell of form {'name1' def1 'name2' def2 ...} 19 | % checkExtra - [0] if 1 throw error if prm contains params not in dfs 20 | % if -1 if prm contains params not in dfs adds them 21 | % 22 | % OUTPUTS (nargout==1) 23 | % prm - parameter struct with fields 'name1' through 'nameN' assigned 24 | % 25 | % OUTPUTS (nargout>1) 26 | % param1 - value assigned to parameter with 'name1' 27 | % ... 28 | % paramN - value assigned to parameter with 'nameN' 29 | % 30 | % EXAMPLE 31 | % dfs = { 'x','REQ', 'y',0, 'z',[], 'eps',1e-3 }; 32 | % prm = getPrmDflt( struct('x',1,'y',1), dfs ) 33 | % [ x y z eps ] = getPrmDflt( {'x',2,'y',1}, dfs ) 34 | % 35 | % See also INPUTPARSER 36 | % 37 | % Piotr's Computer Vision Matlab Toolbox Version 2.60 38 | % Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] 39 | % Licensed under the Simplified BSD License [see external/bsd.txt] 40 | 41 | if( mod(length(dfs),2) ), error('odd number of default parameters'); end 42 | if nargin<=2, checkExtra = 0; end 43 | 44 | % get the input parameters as two cell arrays: prmVal and prmField 45 | if iscell(prm) && length(prm)==1, prm=prm{1}; end 46 | if iscell(prm) 47 | if(mod(length(prm),2)), error('odd number of parameters in prm'); end 48 | prmField = prm(1:2:end); prmVal = prm(2:2:end); 49 | else 50 | if(~isstruct(prm)), error('prm must be a struct or a cell'); end 51 | prmVal = struct2cell(prm); prmField = fieldnames(prm); 52 | end 53 | 54 | % get and update default values using quick for loop 55 | dfsField = dfs(1:2:end); dfsVal = dfs(2:2:end); 56 | if checkExtra>0 57 | for i=1:length(prmField) 58 | j = find(strcmp(prmField{i},dfsField)); 59 | if isempty(j), error('parameter %s is not valid', prmField{i}); end 60 | dfsVal(j) = prmVal(i); 61 | end 62 | elseif checkExtra<0 63 | for i=1:length(prmField) 64 | j = find(strcmp(prmField{i},dfsField)); 65 | if isempty(j), j=length(dfsVal)+1; dfsField{j}=prmField{i}; end 66 | dfsVal(j) = prmVal(i); 67 | end 68 | else 69 | for i=1:length(prmField) 70 | dfsVal(strcmp(prmField{i},dfsField)) = prmVal(i); 71 | end 72 | end 73 | 74 | % check for missing values 75 | if any(strcmp('REQ',dfsVal)) 76 | cmpArray = find(strcmp('REQ',dfsVal)); 77 | error(['Required field ''' dfsField{cmpArray(1)} ''' not specified.'] ); 78 | end 79 | 80 | % set output 81 | if nargout==1 82 | varargout{1} = cell2struct( dfsVal, dfsField, 2 ); 83 | else 84 | varargout = dfsVal; 85 | end 86 | -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/PythonAPI/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | # install pycocotools locally 3 | python setup.py build_ext --inplace 4 | rm -rf build 5 | 6 | install: 7 | # install pycocotools to the Python site-packages 8 | python setup.py build_ext install 9 | rm -rf build -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/PythonAPI/pycocoEvalDemo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "from pycocotools.coco import COCO\n", 14 | "from pycocotools.cocoeval import COCOeval\n", 15 | "import numpy as np\n", 16 | "import skimage.io as io\n", 17 | "import pylab\n", 18 | "pylab.rcParams['figure.figsize'] = (10.0, 8.0)" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "Running demo for *bbox* results.\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "annType = ['segm','bbox','keypoints']\n", 38 | "annType = annType[1] #specify type here\n", 39 | "prefix = 'person_keypoints' if annType=='keypoints' else 'instances'\n", 40 | "print 'Running demo for *%s* results.'%(annType)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [ 50 | { 51 | "name": "stdout", 52 | "output_type": "stream", 53 | "text": [ 54 | "loading annotations into memory...\n", 55 | "Done (t=8.01s)\n", 56 | "creating index...\n", 57 | "index created!\n" 58 | ] 59 | } 60 | ], 61 | "source": [ 62 | "#initialize COCO ground truth api\n", 63 | "dataDir='../'\n", 64 | "dataType='val2014'\n", 65 | "annFile = '%s/annotations/%s_%s.json'%(dataDir,prefix,dataType)\n", 66 | "cocoGt=COCO(annFile)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 4, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [ 76 | { 77 | "name": "stdout", 78 | "output_type": "stream", 79 | "text": [ 80 | "Loading and preparing results... \n", 81 | "DONE (t=0.05s)\n", 82 | "creating index...\n", 83 | "index created!\n" 84 | ] 85 | } 86 | ], 87 | "source": [ 88 | "#initialize COCO detections api\n", 89 | "resFile='%s/results/%s_%s_fake%s100_results.json'\n", 90 | "resFile = resFile%(dataDir, prefix, dataType, annType)\n", 91 | "cocoDt=cocoGt.loadRes(resFile)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 5, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "imgIds=sorted(cocoGt.getImgIds())\n", 103 | "imgIds=imgIds[0:100]\n", 104 | "imgId = imgIds[np.random.randint(100)]" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 6, 110 | "metadata": { 111 | "collapsed": false 112 | }, 113 | "outputs": [ 114 | { 115 | "name": "stdout", 116 | "output_type": "stream", 117 | "text": [ 118 | "Running per image evaluation... \n", 119 | "DONE (t=0.46s).\n", 120 | "Accumulating evaluation results... \n", 121 | "DONE (t=0.38s).\n", 122 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.505\n", 123 | " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.697\n", 124 | " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.573\n", 125 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.586\n", 126 | " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.519\n", 127 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.501\n", 128 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.387\n", 129 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.594\n", 130 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.595\n", 131 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.640\n", 132 | " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.566\n", 133 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.564\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "# running evaluation\n", 139 | "cocoEval = COCOeval(cocoGt,cocoDt,annType)\n", 140 | "cocoEval.params.imgIds = imgIds\n", 141 | "cocoEval.evaluate()\n", 142 | "cocoEval.accumulate()\n", 143 | "cocoEval.summarize()" 144 | ] 145 | } 146 | ], 147 | "metadata": { 148 | "kernelspec": { 149 | "display_name": "Python 2", 150 | "language": "python", 151 | "name": "python2" 152 | }, 153 | "language_info": { 154 | "codemirror_mode": { 155 | "name": "ipython", 156 | "version": 2 157 | }, 158 | "file_extension": ".py", 159 | "mimetype": "text/x-python", 160 | "name": "python", 161 | "nbconvert_exporter": "python", 162 | "pygments_lexer": "ipython2", 163 | "version": "2.7.10" 164 | } 165 | }, 166 | "nbformat": 4, 167 | "nbformat_minor": 0 168 | } 169 | -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/PythonAPI/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/PythonAPI/pycocotools/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sohyun-l/ExLPose/a742a204da44323769073e9c1660305401cd69ff/pytorch-cpn/cocoapi/PythonAPI/pycocotools/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/PythonAPI/pycocotools/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sohyun-l/ExLPose/a742a204da44323769073e9c1660305401cd69ff/pytorch-cpn/cocoapi/PythonAPI/pycocotools/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/PythonAPI/pycocotools/__pycache__/coco.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sohyun-l/ExLPose/a742a204da44323769073e9c1660305401cd69ff/pytorch-cpn/cocoapi/PythonAPI/pycocotools/__pycache__/coco.cpython-37.pyc -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/PythonAPI/pycocotools/__pycache__/coco_custom.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sohyun-l/ExLPose/a742a204da44323769073e9c1660305401cd69ff/pytorch-cpn/cocoapi/PythonAPI/pycocotools/__pycache__/coco_custom.cpython-36.pyc -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/PythonAPI/pycocotools/__pycache__/coco_custom.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sohyun-l/ExLPose/a742a204da44323769073e9c1660305401cd69ff/pytorch-cpn/cocoapi/PythonAPI/pycocotools/__pycache__/coco_custom.cpython-37.pyc -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/PythonAPI/pycocotools/__pycache__/cocoeval.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sohyun-l/ExLPose/a742a204da44323769073e9c1660305401cd69ff/pytorch-cpn/cocoapi/PythonAPI/pycocotools/__pycache__/cocoeval.cpython-37.pyc -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/PythonAPI/pycocotools/__pycache__/cocoeval_custom.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sohyun-l/ExLPose/a742a204da44323769073e9c1660305401cd69ff/pytorch-cpn/cocoapi/PythonAPI/pycocotools/__pycache__/cocoeval_custom.cpython-36.pyc -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/PythonAPI/pycocotools/__pycache__/cocoeval_custom.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sohyun-l/ExLPose/a742a204da44323769073e9c1660305401cd69ff/pytorch-cpn/cocoapi/PythonAPI/pycocotools/__pycache__/cocoeval_custom.cpython-37.pyc -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/PythonAPI/pycocotools/__pycache__/mask.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sohyun-l/ExLPose/a742a204da44323769073e9c1660305401cd69ff/pytorch-cpn/cocoapi/PythonAPI/pycocotools/__pycache__/mask.cpython-36.pyc -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/PythonAPI/pycocotools/__pycache__/mask.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sohyun-l/ExLPose/a742a204da44323769073e9c1660305401cd69ff/pytorch-cpn/cocoapi/PythonAPI/pycocotools/__pycache__/mask.cpython-37.pyc -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/PythonAPI/pycocotools/_mask.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sohyun-l/ExLPose/a742a204da44323769073e9c1660305401cd69ff/pytorch-cpn/cocoapi/PythonAPI/pycocotools/_mask.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/PythonAPI/pycocotools/_mask.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sohyun-l/ExLPose/a742a204da44323769073e9c1660305401cd69ff/pytorch-cpn/cocoapi/PythonAPI/pycocotools/_mask.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/PythonAPI/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | import pycocotools._mask as _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | iou = _mask.iou 77 | merge = _mask.merge 78 | frPyObjects = _mask.frPyObjects 79 | 80 | def encode(bimask): 81 | if len(bimask.shape) == 3: 82 | return _mask.encode(bimask) 83 | elif len(bimask.shape) == 2: 84 | h, w = bimask.shape 85 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] 86 | 87 | def decode(rleObjs): 88 | if type(rleObjs) == list: 89 | return _mask.decode(rleObjs) 90 | else: 91 | return _mask.decode([rleObjs])[:,:,0] 92 | 93 | def area(rleObjs): 94 | if type(rleObjs) == list: 95 | return _mask.area(rleObjs) 96 | else: 97 | return _mask.area([rleObjs])[0] 98 | 99 | def toBbox(rleObjs): 100 | if type(rleObjs) == list: 101 | return _mask.toBbox(rleObjs) 102 | else: 103 | return _mask.toBbox([rleObjs])[0] -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/PythonAPI/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | import numpy as np 3 | 4 | # To compile and install locally run "python setup.py build_ext --inplace" 5 | # To install library to Python site-packages run "python setup.py build_ext install" 6 | 7 | ext_modules = [ 8 | Extension( 9 | 'pycocotools._mask', 10 | sources=['../common/maskApi.c', 'pycocotools/_mask.pyx'], 11 | include_dirs = [np.get_include(), '../common'], 12 | extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'], 13 | ) 14 | ] 15 | 16 | setup( 17 | name='pycocotools', 18 | packages=['pycocotools'], 19 | package_dir = {'pycocotools': 'pycocotools'}, 20 | install_requires=[ 21 | 'setuptools>=18.0', 22 | 'cython>=0.27.3', 23 | 'matplotlib>=2.1.0' 24 | ], 25 | version='2.0', 26 | ext_modules= ext_modules 27 | ) 28 | -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/README.txt: -------------------------------------------------------------------------------- 1 | COCO API - http://cocodataset.org/ 2 | 3 | COCO is a large image dataset designed for object detection, segmentation, person keypoints detection, stuff segmentation, and caption generation. This package provides Matlab, Python, and Lua APIs that assists in loading, parsing, and visualizing the annotations in COCO. Please visit http://cocodataset.org/ for more information on COCO, including for the data, paper, and tutorials. The exact format of the annotations is also described on the COCO website. The Matlab and Python APIs are complete, the Lua API provides only basic functionality. 4 | 5 | In addition to this API, please download both the COCO images and annotations in order to run the demos and use the API. Both are available on the project website. 6 | -Please download, unzip, and place the images in: coco/images/ 7 | -Please download and place the annotations in: coco/annotations/ 8 | For substantially more details on the API please see http://cocodataset.org/#download. 9 | 10 | After downloading the images and annotations, run the Matlab, Python, or Lua demos for example usage. 11 | 12 | To install: 13 | -For Matlab, add coco/MatlabApi to the Matlab path (OSX/Linux binaries provided) 14 | -For Python, run "make" under coco/PythonAPI 15 | -For Lua, run “luarocks make LuaAPI/rocks/coco-scm-1.rockspec” under coco/ 16 | -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/common/gason.cpp: -------------------------------------------------------------------------------- 1 | // https://github.com/vivkin/gason - pulled January 10, 2016 2 | #include "gason.h" 3 | #include 4 | 5 | #define JSON_ZONE_SIZE 4096 6 | #define JSON_STACK_SIZE 32 7 | 8 | const char *jsonStrError(int err) { 9 | switch (err) { 10 | #define XX(no, str) \ 11 | case JSON_##no: \ 12 | return str; 13 | JSON_ERRNO_MAP(XX) 14 | #undef XX 15 | default: 16 | return "unknown"; 17 | } 18 | } 19 | 20 | void *JsonAllocator::allocate(size_t size) { 21 | size = (size + 7) & ~7; 22 | 23 | if (head && head->used + size <= JSON_ZONE_SIZE) { 24 | char *p = (char *)head + head->used; 25 | head->used += size; 26 | return p; 27 | } 28 | 29 | size_t allocSize = sizeof(Zone) + size; 30 | Zone *zone = (Zone *)malloc(allocSize <= JSON_ZONE_SIZE ? JSON_ZONE_SIZE : allocSize); 31 | if (zone == nullptr) 32 | return nullptr; 33 | zone->used = allocSize; 34 | if (allocSize <= JSON_ZONE_SIZE || head == nullptr) { 35 | zone->next = head; 36 | head = zone; 37 | } else { 38 | zone->next = head->next; 39 | head->next = zone; 40 | } 41 | return (char *)zone + sizeof(Zone); 42 | } 43 | 44 | void JsonAllocator::deallocate() { 45 | while (head) { 46 | Zone *next = head->next; 47 | free(head); 48 | head = next; 49 | } 50 | } 51 | 52 | static inline bool isspace(char c) { 53 | return c == ' ' || (c >= '\t' && c <= '\r'); 54 | } 55 | 56 | static inline bool isdelim(char c) { 57 | return c == ',' || c == ':' || c == ']' || c == '}' || isspace(c) || !c; 58 | } 59 | 60 | static inline bool isdigit(char c) { 61 | return c >= '0' && c <= '9'; 62 | } 63 | 64 | static inline bool isxdigit(char c) { 65 | return (c >= '0' && c <= '9') || ((c & ~' ') >= 'A' && (c & ~' ') <= 'F'); 66 | } 67 | 68 | static inline int char2int(char c) { 69 | if (c <= '9') 70 | return c - '0'; 71 | return (c & ~' ') - 'A' + 10; 72 | } 73 | 74 | static double string2double(char *s, char **endptr) { 75 | char ch = *s; 76 | if (ch == '-') 77 | ++s; 78 | 79 | double result = 0; 80 | while (isdigit(*s)) 81 | result = (result * 10) + (*s++ - '0'); 82 | 83 | if (*s == '.') { 84 | ++s; 85 | 86 | double fraction = 1; 87 | while (isdigit(*s)) { 88 | fraction *= 0.1; 89 | result += (*s++ - '0') * fraction; 90 | } 91 | } 92 | 93 | if (*s == 'e' || *s == 'E') { 94 | ++s; 95 | 96 | double base = 10; 97 | if (*s == '+') 98 | ++s; 99 | else if (*s == '-') { 100 | ++s; 101 | base = 0.1; 102 | } 103 | 104 | unsigned int exponent = 0; 105 | while (isdigit(*s)) 106 | exponent = (exponent * 10) + (*s++ - '0'); 107 | 108 | double power = 1; 109 | for (; exponent; exponent >>= 1, base *= base) 110 | if (exponent & 1) 111 | power *= base; 112 | 113 | result *= power; 114 | } 115 | 116 | *endptr = s; 117 | return ch == '-' ? -result : result; 118 | } 119 | 120 | static inline JsonNode *insertAfter(JsonNode *tail, JsonNode *node) { 121 | if (!tail) 122 | return node->next = node; 123 | node->next = tail->next; 124 | tail->next = node; 125 | return node; 126 | } 127 | 128 | static inline JsonValue listToValue(JsonTag tag, JsonNode *tail) { 129 | if (tail) { 130 | auto head = tail->next; 131 | tail->next = nullptr; 132 | return JsonValue(tag, head); 133 | } 134 | return JsonValue(tag, nullptr); 135 | } 136 | 137 | int jsonParse(char *s, char **endptr, JsonValue *value, JsonAllocator &allocator) { 138 | JsonNode *tails[JSON_STACK_SIZE]; 139 | JsonTag tags[JSON_STACK_SIZE]; 140 | char *keys[JSON_STACK_SIZE]; 141 | JsonValue o; 142 | int pos = -1; 143 | bool separator = true; 144 | JsonNode *node; 145 | *endptr = s; 146 | 147 | while (*s) { 148 | while (isspace(*s)) { 149 | ++s; 150 | if (!*s) break; 151 | } 152 | *endptr = s++; 153 | switch (**endptr) { 154 | case '-': 155 | if (!isdigit(*s) && *s != '.') { 156 | *endptr = s; 157 | return JSON_BAD_NUMBER; 158 | } 159 | case '0': 160 | case '1': 161 | case '2': 162 | case '3': 163 | case '4': 164 | case '5': 165 | case '6': 166 | case '7': 167 | case '8': 168 | case '9': 169 | o = JsonValue(string2double(*endptr, &s)); 170 | if (!isdelim(*s)) { 171 | *endptr = s; 172 | return JSON_BAD_NUMBER; 173 | } 174 | break; 175 | case '"': 176 | o = JsonValue(JSON_STRING, s); 177 | for (char *it = s; *s; ++it, ++s) { 178 | int c = *it = *s; 179 | if (c == '\\') { 180 | c = *++s; 181 | switch (c) { 182 | case '\\': 183 | case '"': 184 | case '/': 185 | *it = c; 186 | break; 187 | case 'b': 188 | *it = '\b'; 189 | break; 190 | case 'f': 191 | *it = '\f'; 192 | break; 193 | case 'n': 194 | *it = '\n'; 195 | break; 196 | case 'r': 197 | *it = '\r'; 198 | break; 199 | case 't': 200 | *it = '\t'; 201 | break; 202 | case 'u': 203 | c = 0; 204 | for (int i = 0; i < 4; ++i) { 205 | if (isxdigit(*++s)) { 206 | c = c * 16 + char2int(*s); 207 | } else { 208 | *endptr = s; 209 | return JSON_BAD_STRING; 210 | } 211 | } 212 | if (c < 0x80) { 213 | *it = c; 214 | } else if (c < 0x800) { 215 | *it++ = 0xC0 | (c >> 6); 216 | *it = 0x80 | (c & 0x3F); 217 | } else { 218 | *it++ = 0xE0 | (c >> 12); 219 | *it++ = 0x80 | ((c >> 6) & 0x3F); 220 | *it = 0x80 | (c & 0x3F); 221 | } 222 | break; 223 | default: 224 | *endptr = s; 225 | return JSON_BAD_STRING; 226 | } 227 | } else if ((unsigned int)c < ' ' || c == '\x7F') { 228 | *endptr = s; 229 | return JSON_BAD_STRING; 230 | } else if (c == '"') { 231 | *it = 0; 232 | ++s; 233 | break; 234 | } 235 | } 236 | if (!isdelim(*s)) { 237 | *endptr = s; 238 | return JSON_BAD_STRING; 239 | } 240 | break; 241 | case 't': 242 | if (!(s[0] == 'r' && s[1] == 'u' && s[2] == 'e' && isdelim(s[3]))) 243 | return JSON_BAD_IDENTIFIER; 244 | o = JsonValue(JSON_TRUE); 245 | s += 3; 246 | break; 247 | case 'f': 248 | if (!(s[0] == 'a' && s[1] == 'l' && s[2] == 's' && s[3] == 'e' && isdelim(s[4]))) 249 | return JSON_BAD_IDENTIFIER; 250 | o = JsonValue(JSON_FALSE); 251 | s += 4; 252 | break; 253 | case 'n': 254 | if (!(s[0] == 'u' && s[1] == 'l' && s[2] == 'l' && isdelim(s[3]))) 255 | return JSON_BAD_IDENTIFIER; 256 | o = JsonValue(JSON_NULL); 257 | s += 3; 258 | break; 259 | case ']': 260 | if (pos == -1) 261 | return JSON_STACK_UNDERFLOW; 262 | if (tags[pos] != JSON_ARRAY) 263 | return JSON_MISMATCH_BRACKET; 264 | o = listToValue(JSON_ARRAY, tails[pos--]); 265 | break; 266 | case '}': 267 | if (pos == -1) 268 | return JSON_STACK_UNDERFLOW; 269 | if (tags[pos] != JSON_OBJECT) 270 | return JSON_MISMATCH_BRACKET; 271 | if (keys[pos] != nullptr) 272 | return JSON_UNEXPECTED_CHARACTER; 273 | o = listToValue(JSON_OBJECT, tails[pos--]); 274 | break; 275 | case '[': 276 | if (++pos == JSON_STACK_SIZE) 277 | return JSON_STACK_OVERFLOW; 278 | tails[pos] = nullptr; 279 | tags[pos] = JSON_ARRAY; 280 | keys[pos] = nullptr; 281 | separator = true; 282 | continue; 283 | case '{': 284 | if (++pos == JSON_STACK_SIZE) 285 | return JSON_STACK_OVERFLOW; 286 | tails[pos] = nullptr; 287 | tags[pos] = JSON_OBJECT; 288 | keys[pos] = nullptr; 289 | separator = true; 290 | continue; 291 | case ':': 292 | if (separator || keys[pos] == nullptr) 293 | return JSON_UNEXPECTED_CHARACTER; 294 | separator = true; 295 | continue; 296 | case ',': 297 | if (separator || keys[pos] != nullptr) 298 | return JSON_UNEXPECTED_CHARACTER; 299 | separator = true; 300 | continue; 301 | case '\0': 302 | continue; 303 | default: 304 | return JSON_UNEXPECTED_CHARACTER; 305 | } 306 | 307 | separator = false; 308 | 309 | if (pos == -1) { 310 | *endptr = s; 311 | *value = o; 312 | return JSON_OK; 313 | } 314 | 315 | if (tags[pos] == JSON_OBJECT) { 316 | if (!keys[pos]) { 317 | if (o.getTag() != JSON_STRING) 318 | return JSON_UNQUOTED_KEY; 319 | keys[pos] = o.toString(); 320 | continue; 321 | } 322 | if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode))) == nullptr) 323 | return JSON_ALLOCATION_FAILURE; 324 | tails[pos] = insertAfter(tails[pos], node); 325 | tails[pos]->key = keys[pos]; 326 | keys[pos] = nullptr; 327 | } else { 328 | if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode) - sizeof(char *))) == nullptr) 329 | return JSON_ALLOCATION_FAILURE; 330 | tails[pos] = insertAfter(tails[pos], node); 331 | } 332 | tails[pos]->value = o; 333 | } 334 | return JSON_BREAKING_BAD; 335 | } 336 | -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/common/gason.h: -------------------------------------------------------------------------------- 1 | // https://github.com/vivkin/gason - pulled January 10, 2016 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | enum JsonTag { 9 | JSON_NUMBER = 0, 10 | JSON_STRING, 11 | JSON_ARRAY, 12 | JSON_OBJECT, 13 | JSON_TRUE, 14 | JSON_FALSE, 15 | JSON_NULL = 0xF 16 | }; 17 | 18 | struct JsonNode; 19 | 20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL 21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL 22 | #define JSON_VALUE_TAG_MASK 0xF 23 | #define JSON_VALUE_TAG_SHIFT 47 24 | 25 | union JsonValue { 26 | uint64_t ival; 27 | double fval; 28 | 29 | JsonValue(double x) 30 | : fval(x) { 31 | } 32 | JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) { 33 | assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK); 34 | ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload; 35 | } 36 | bool isDouble() const { 37 | return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK; 38 | } 39 | JsonTag getTag() const { 40 | return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK); 41 | } 42 | uint64_t getPayload() const { 43 | assert(!isDouble()); 44 | return ival & JSON_VALUE_PAYLOAD_MASK; 45 | } 46 | double toNumber() const { 47 | assert(getTag() == JSON_NUMBER); 48 | return fval; 49 | } 50 | char *toString() const { 51 | assert(getTag() == JSON_STRING); 52 | return (char *)getPayload(); 53 | } 54 | JsonNode *toNode() const { 55 | assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT); 56 | return (JsonNode *)getPayload(); 57 | } 58 | }; 59 | 60 | struct JsonNode { 61 | JsonValue value; 62 | JsonNode *next; 63 | char *key; 64 | }; 65 | 66 | struct JsonIterator { 67 | JsonNode *p; 68 | 69 | void operator++() { 70 | p = p->next; 71 | } 72 | bool operator!=(const JsonIterator &x) const { 73 | return p != x.p; 74 | } 75 | JsonNode *operator*() const { 76 | return p; 77 | } 78 | JsonNode *operator->() const { 79 | return p; 80 | } 81 | }; 82 | 83 | inline JsonIterator begin(JsonValue o) { 84 | return JsonIterator{o.toNode()}; 85 | } 86 | inline JsonIterator end(JsonValue) { 87 | return JsonIterator{nullptr}; 88 | } 89 | 90 | #define JSON_ERRNO_MAP(XX) \ 91 | XX(OK, "ok") \ 92 | XX(BAD_NUMBER, "bad number") \ 93 | XX(BAD_STRING, "bad string") \ 94 | XX(BAD_IDENTIFIER, "bad identifier") \ 95 | XX(STACK_OVERFLOW, "stack overflow") \ 96 | XX(STACK_UNDERFLOW, "stack underflow") \ 97 | XX(MISMATCH_BRACKET, "mismatch bracket") \ 98 | XX(UNEXPECTED_CHARACTER, "unexpected character") \ 99 | XX(UNQUOTED_KEY, "unquoted key") \ 100 | XX(BREAKING_BAD, "breaking bad") \ 101 | XX(ALLOCATION_FAILURE, "allocation failure") 102 | 103 | enum JsonErrno { 104 | #define XX(no, str) JSON_##no, 105 | JSON_ERRNO_MAP(XX) 106 | #undef XX 107 | }; 108 | 109 | const char *jsonStrError(int err); 110 | 111 | class JsonAllocator { 112 | struct Zone { 113 | Zone *next; 114 | size_t used; 115 | } *head = nullptr; 116 | 117 | public: 118 | JsonAllocator() = default; 119 | JsonAllocator(const JsonAllocator &) = delete; 120 | JsonAllocator &operator=(const JsonAllocator &) = delete; 121 | JsonAllocator(JsonAllocator &&x) : head(x.head) { 122 | x.head = nullptr; 123 | } 124 | JsonAllocator &operator=(JsonAllocator &&x) { 125 | head = x.head; 126 | x.head = nullptr; 127 | return *this; 128 | } 129 | ~JsonAllocator() { 130 | deallocate(); 131 | } 132 | void *allocate(size_t size); 133 | void deallocate(); 134 | }; 135 | 136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator); 137 | -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/common/maskApi.c: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #include "maskApi.h" 8 | #include 9 | #include 10 | 11 | uint umin( uint a, uint b ) { return (ab) ? a : b; } 13 | 14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) { 15 | R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m); 16 | siz j; if(cnts) for(j=0; jcnts[j]=cnts[j]; 17 | } 18 | 19 | void rleFree( RLE *R ) { 20 | free(R->cnts); R->cnts=0; 21 | } 22 | 23 | void rlesInit( RLE **R, siz n ) { 24 | siz i; *R = (RLE*) malloc(sizeof(RLE)*n); 25 | for(i=0; i0 ) { 61 | c=umin(ca,cb); cc+=c; ct=0; 62 | ca-=c; if(!ca && a0) { 83 | crowd=iscrowd!=NULL && iscrowd[g]; 84 | if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; } 85 | siz ka, kb, a, b; uint c, ca, cb, ct, i, u; int va, vb; 86 | ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0; 87 | cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1; 88 | while( ct>0 ) { 89 | c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0; 90 | ca-=c; if(!ca && athr) keep[j]=0; 105 | } 106 | } 107 | } 108 | 109 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) { 110 | double h, w, i, u, ga, da; siz g, d; int crowd; 111 | for( g=0; gthr) keep[j]=0; 129 | } 130 | } 131 | } 132 | 133 | void rleToBbox( const RLE *R, BB bb, siz n ) { 134 | siz i; for( i=0; id?1:c=dy && xs>xe) || (dxye); 174 | if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; } 175 | s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy; 176 | if(dx>=dy) for( d=0; d<=dx; d++ ) { 177 | t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++; 178 | } else for( d=0; d<=dy; d++ ) { 179 | t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++; 180 | } 181 | } 182 | /* get points along y-boundary and downsample */ 183 | free(x); free(y); k=m; m=0; double xd, yd; 184 | x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k); 185 | for( j=1; jw-1 ) continue; 188 | yd=(double)(v[j]h) yd=h; yd=ceil(yd); 190 | x[m]=(int) xd; y[m]=(int) yd; m++; 191 | } 192 | /* compute rle encoding given y-boundary points */ 193 | k=m; a=malloc(sizeof(uint)*(k+1)); 194 | for( j=0; j0) b[m++]=a[j++]; else { 200 | j++; if(jm, p=0; long x; int more; 207 | char *s=malloc(sizeof(char)*m*6); 208 | for( i=0; icnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1; 210 | while( more ) { 211 | char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0; 212 | if(more) c |= 0x20; c+=48; s[p++]=c; 213 | } 214 | } 215 | s[p]=0; return s; 216 | } 217 | 218 | void rleFrString( RLE *R, char *s, siz h, siz w ) { 219 | siz m=0, p=0, k; long x; int more; uint *cnts; 220 | while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0; 221 | while( s[p] ) { 222 | x=0; k=0; more=1; 223 | while( more ) { 224 | char c=s[p]-48; x |= (c & 0x1f) << 5*k; 225 | more = c & 0x20; p++; k++; 226 | if(!more && (c & 0x10)) x |= -1 << 5*k; 227 | } 228 | if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x; 229 | } 230 | rleInit(R,h,w,m,cnts); free(cnts); 231 | } 232 | -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/common/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | 9 | typedef unsigned int uint; 10 | typedef unsigned long siz; 11 | typedef unsigned char byte; 12 | typedef double* BB; 13 | typedef struct { siz h, w, m; uint *cnts; } RLE; 14 | 15 | /* Initialize/destroy RLE. */ 16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 17 | void rleFree( RLE *R ); 18 | 19 | /* Initialize/destroy RLE array. */ 20 | void rlesInit( RLE **R, siz n ); 21 | void rlesFree( RLE **R, siz n ); 22 | 23 | /* Encode binary masks using RLE. */ 24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 25 | 26 | /* Decode binary masks encoded via RLE. */ 27 | void rleDecode( const RLE *R, byte *mask, siz n ); 28 | 29 | /* Compute union or intersection of encoded masks. */ 30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ); 31 | 32 | /* Compute area of encoded masks. */ 33 | void rleArea( const RLE *R, siz n, uint *a ); 34 | 35 | /* Compute intersection over union between masks. */ 36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 37 | 38 | /* Compute non-maximum suppression between bounding masks */ 39 | void rleNms( RLE *dt, siz n, uint *keep, double thr ); 40 | 41 | /* Compute intersection over union between bounding boxes. */ 42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 43 | 44 | /* Compute non-maximum suppression between bounding boxes */ 45 | void bbNms( BB dt, siz n, uint *keep, double thr ); 46 | 47 | /* Get bounding boxes surrounding encoded masks. */ 48 | void rleToBbox( const RLE *R, BB bb, siz n ); 49 | 50 | /* Convert bounding boxes to encoded masks. */ 51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 52 | 53 | /* Convert polygon to encoded mask. */ 54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 55 | 56 | /* Get compressed string representation of encoded mask. */ 57 | char* rleToString( const RLE *R ); 58 | 59 | /* Convert from compressed string representation of encoded mask. */ 60 | void rleFrString( RLE *R, char *s, siz h, siz w ); 61 | -------------------------------------------------------------------------------- /pytorch-cpn/cocoapi/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | The views and conclusions contained in the software and documentation are those 25 | of the authors and should not be interpreted as representing official policies, 26 | either expressed or implied, of the FreeBSD Project. 27 | -------------------------------------------------------------------------------- /pytorch-cpn/dataloader/loader_ExLPoseOC.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import json 4 | import random 5 | import math 6 | import cv2 7 | import skimage 8 | import skimage.transform 9 | 10 | import torch 11 | import torch.utils.data as data 12 | 13 | from utils.osutils import * 14 | from utils.imutils import * 15 | from utils.transforms import * 16 | import torch.utils.data as data 17 | import torchvision.transforms as TF 18 | 19 | class ExLPoseOC(data.Dataset): 20 | def __init__(self, cfg, train=True): 21 | self.img_folder = cfg.img_path 22 | self.is_train = train 23 | self.inp_res = cfg.data_shape 24 | self.out_res = cfg.output_shape 25 | self.pixel_means = cfg.pixel_means 26 | self.num_class = cfg.num_class 27 | self.cfg = cfg 28 | self.bbox_extend_factor = cfg.bbox_extend_factor 29 | if train: 30 | self.scale_factor = cfg.scale_factor 31 | self.rot_factor = cfg.rot_factor 32 | self.symmetry = cfg.symmetry 33 | with open(cfg.gt_path) as anno_file: 34 | self.anno = json.load(anno_file) 35 | 36 | def augmentationCropImage(self, img, bbox, joints=None): 37 | height, width = self.inp_res[0], self.inp_res[1] 38 | bbox = np.array(bbox).reshape(4, ).astype(np.float32) 39 | add = max(img.shape[0], img.shape[1]) 40 | mean_value = self.pixel_means 41 | bimg = cv2.copyMakeBorder(img, add, add, add, add, borderType=cv2.BORDER_CONSTANT, value=mean_value.tolist()) 42 | objcenter = np.array([(bbox[0] + bbox[2]) / 2., (bbox[1] + bbox[3]) / 2.]) 43 | bbox += add 44 | objcenter += add 45 | if self.is_train: 46 | joints[:, :2] += add 47 | inds = np.where(joints[:, -1] == 0) 48 | joints[inds, :2] = -1000000 # avoid influencing by data processing 49 | crop_width = (bbox[2] - bbox[0]) * (1 + self.bbox_extend_factor[0] * 2) 50 | crop_height = (bbox[3] - bbox[1]) * (1 + self.bbox_extend_factor[1] * 2) 51 | if self.is_train: 52 | crop_width = crop_width * (1 + 0.25) 53 | crop_height = crop_height * (1 + 0.25) 54 | if crop_height / height > crop_width / width: 55 | crop_size = crop_height 56 | min_shape = height 57 | else: 58 | crop_size = crop_width 59 | min_shape = width 60 | 61 | crop_size = min(crop_size, objcenter[0] / width * min_shape * 2. - 1.) 62 | crop_size = min(crop_size, (bimg.shape[1] - objcenter[0]) / width * min_shape * 2. - 1) 63 | crop_size = min(crop_size, objcenter[1] / height * min_shape * 2. - 1.) 64 | crop_size = min(crop_size, (bimg.shape[0] - objcenter[1]) / height * min_shape * 2. - 1) 65 | 66 | min_x = int(objcenter[0] - crop_size / 2. / min_shape * width) 67 | max_x = int(objcenter[0] + crop_size / 2. / min_shape * width) 68 | min_y = int(objcenter[1] - crop_size / 2. / min_shape * height) 69 | max_y = int(objcenter[1] + crop_size / 2. / min_shape * height) 70 | 71 | x_ratio = float(width) / (max_x - min_x) 72 | y_ratio = float(height) / (max_y - min_y) 73 | 74 | if self.is_train: 75 | joints[:, 0] = joints[:, 0] - min_x 76 | joints[:, 1] = joints[:, 1] - min_y 77 | 78 | joints[:, 0] *= x_ratio 79 | joints[:, 1] *= y_ratio 80 | label = joints[:, :2].copy() 81 | valid = joints[:, 2].copy() 82 | 83 | img = cv2.resize(bimg[min_y:max_y, min_x:max_x, :], (width, height)) 84 | details = np.asarray([min_x - add, min_y - add, max_x - add, max_y - add]).astype(np.float) 85 | 86 | if self.is_train: 87 | return img, joints, details 88 | else: 89 | return img, details 90 | 91 | 92 | 93 | def data_augmentation(self, img, label, operation): 94 | height, width = img.shape[0], img.shape[1] 95 | center = (width / 2., height / 2.) 96 | n = label.shape[0] 97 | affrat = random.uniform(self.scale_factor[0], self.scale_factor[1]) 98 | 99 | halfl_w = min(width - center[0], (width - center[0]) / 1.25 * affrat) 100 | halfl_h = min(height - center[1], (height - center[1]) / 1.25 * affrat) 101 | img = skimage.transform.resize(img[int(center[1] - halfl_h): int(center[1] + halfl_h + 1), 102 | int(center[0] - halfl_w): int(center[0] + halfl_w + 1)], (height, width)) 103 | for i in range(n): 104 | label[i][0] = (label[i][0] - center[0]) / halfl_w * (width - center[0]) + center[0] 105 | label[i][1] = (label[i][1] - center[1]) / halfl_h * (height - center[1]) + center[1] 106 | label[i][2] *= ( 107 | (label[i][0] >= 0) & (label[i][0] < width) & (label[i][1] >= 0) & (label[i][1] < height)) 108 | 109 | # flip augmentation 110 | # if operation == 1: 111 | # img = cv2.flip(img, 1) 112 | # cod = [] 113 | # allc = [] 114 | # for i in range(n): 115 | # x, y = label[i][0], label[i][1] 116 | # if x >= 0: 117 | # x = width - 1 - x 118 | # cod.append((x, y, label[i][2])) 119 | # # **** the joint index depends on the dataset **** 120 | # for (q, w) in self.symmetry: 121 | # cod[q], cod[w] = cod[w], cod[q] 122 | # for i in range(n): 123 | # allc.append(cod[i][0]) 124 | # allc.append(cod[i][1]) 125 | # allc.append(cod[i][2]) 126 | # label = np.array(allc).reshape(n, 3) 127 | 128 | # # rotated augmentation 129 | # if operation > 1: 130 | # angle = random.uniform(0, self.rot_factor) 131 | # if random.randint(0, 1): 132 | # angle *= -1 133 | # rotMat = cv2.getRotationMatrix2D(center, angle, 1.0) 134 | # img = cv2.warpAffine(img, rotMat, (width, height)) 135 | 136 | # allc = [] 137 | # for i in range(n): 138 | # x, y = label[i][0], label[i][1] 139 | # v = label[i][2] 140 | # coor = np.array([x, y]) 141 | # if x >= 0 and y >= 0: 142 | # R = rotMat[:, : 2] 143 | # W = np.array([rotMat[0][2], rotMat[1][2]]) 144 | # coor = np.dot(R, coor) + W 145 | # allc.append(int(coor[0])) 146 | # allc.append(int(coor[1])) 147 | # v *= ((coor[0] >= 0) & (coor[0] < width) & (coor[1] >= 0) & (coor[1] < height)) 148 | # allc.append(int(v)) 149 | # label = np.array(allc).reshape(n, 3).astype(np.int) 150 | return img, label 151 | 152 | 153 | def __getitem__(self, index): 154 | a = self.anno[index] 155 | image_name = a['imgInfo']['img_paths'] 156 | # print(image_name) 157 | # if '_3_' in image_name: 158 | # b = image_name.split('_') 159 | # new_name_dark = b[0]+'_'+b[1]+'_'+b[3]+'_'+b[4]+'_'+'img'+'_'+b[6]+'__'+b[10]+'_'+b[11] 160 | 161 | # elif 'manridang' in image_name: 162 | # b = image_name.split('_') 163 | # new_name_dark = b[0]+'_'+b[1]+'manridang'+'_'+b[3]+'_'+b[4]+'_'+'img'+'_'+b[6]+'__'+b[10]+'_'+b[11] 164 | 165 | 166 | # elif 'hwangridan' in image_name: 167 | # b = image_name.split('_') 168 | # new_name_dark = b[0]+'_'+b[1]+'hwangridan'+'_'+b[3]+'_'+b[4]+'_'+'img'+'_'+b[6]+'__'+b[10]+'_'+b[11] 169 | 170 | 171 | # elif 'bulguksa' in image_name: 172 | # b = image_name.split('_') 173 | # new_name_dark = b[0]+'_'+b[1]+'bulguksa'+'_'+b[3]+'_'+b[4]+'_'+'img'+'_'+b[6]+'__'+b[10]+'_'+b[11] 174 | 175 | # else: 176 | # b = image_name.split('_') 177 | # new_name_dark = b[0]+'_'+b[1]+'_'+b[2]+'_'+b[3]+'_'+'img'+'_'+b[5]+'__'+b[9]+'_'+b[10] 178 | 179 | # new_name_dark = bd[0]+'_'+bd[1]+'_'+bd[2]+'_'+'img'+'_'+bd[4]+'__'+bd[8]+'_'+bd[9] 180 | # img_path_dark = os.path.join(self.img_folder, image_name) 181 | # b = image_name.split('_') 182 | # new_name = b[0]+'_'+b[1]+'_'+b[2]+'_'+'img'+'_'+b[4]+'__'+b[8]+'_'+b[9] 183 | img_path = os.path.join(self.img_folder, image_name) 184 | if self.is_train: 185 | points = np.array(a['unit']['keypoints']).reshape(self.num_class, 3).astype(np.float32) 186 | gt_bbox = a['unit']['GT_bbox'] 187 | 188 | image = scipy.misc.imread(img_path, mode='RGB') 189 | 190 | 191 | rgb_mean_dark = np.mean(image, axis=(0, 1)) 192 | scaling_dark = 255*0.4 / rgb_mean_dark 193 | image = image * scaling_dark 194 | 195 | # img_dark = im_to_torch(image_dark) 196 | 197 | # rgb_mean_dark = torch.mean(img_dark, dim=[1, 2], keepdim=True) 198 | # scaling_dark = 0.4 / rgb_mean_dark 199 | # img_dark = img_dark * scaling_dark 200 | 201 | if self.is_train: 202 | image, points, details = self.augmentationCropImage(image, gt_bbox, points) 203 | else: 204 | image, details = self.augmentationCropImage(image, gt_bbox) 205 | 206 | # image = im_to_torch(image) 207 | # rgb_mean_dark = torch.mean(image, dim=[1, 2], keepdim=True) 208 | # scaling_dark = 0.4 / rgb_mean_dark 209 | # image = image * scaling_dark 210 | # image = image.permute(1, 2, 0) 211 | 212 | if self.is_train: 213 | image, points = self.data_augmentation(image, points, a['operation']) 214 | # print(image) 215 | img = im_to_torch(image) # CxHxW 216 | 217 | # rgb_mean_dark = torch.mean(img_dark, dim=[1, 2], keepdim=True) 218 | # scaling_dark = 0.4 / rgb_mean_dark 219 | # img_dark = img_dark * scaling_dark 220 | # Color dithering 221 | # img[0, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) 222 | # img[1, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) 223 | # img[2, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) 224 | 225 | points[:, :2] //= 4 # output size is 1/4 input size 226 | pts = torch.Tensor(points) 227 | else: 228 | img = im_to_torch(image) 229 | 230 | # print(img) 231 | 232 | if self.is_train: 233 | target15 = np.zeros((self.num_class, self.out_res[0], self.out_res[1])) 234 | target11 = np.zeros((self.num_class, self.out_res[0], self.out_res[1])) 235 | target9 = np.zeros((self.num_class, self.out_res[0], self.out_res[1])) 236 | target7 = np.zeros((self.num_class, self.out_res[0], self.out_res[1])) 237 | for i in range(self.num_class): 238 | if pts[i, 2] > 0: # COCO visible: 0-no label, 1-label + invisible, 2-label + visible 239 | target15[i] = generate_heatmap(target15[i], pts[i], self.cfg.gk15) 240 | target11[i] = generate_heatmap(target11[i], pts[i], self.cfg.gk11) 241 | target9[i] = generate_heatmap(target9[i], pts[i], self.cfg.gk9) 242 | target7[i] = generate_heatmap(target7[i], pts[i], self.cfg.gk7) 243 | 244 | targets = [torch.Tensor(target15), torch.Tensor(target11), torch.Tensor(target9), torch.Tensor(target7)] 245 | valid = pts[:, 2] 246 | 247 | meta = {'index' : index, 'imgID' : a['imgInfo']['imgID'], 248 | 'GT_bbox' : np.array([gt_bbox[0], gt_bbox[1], gt_bbox[2], gt_bbox[3]]), 249 | 'img_path' : img_path, 'augmentation_details' : details} 250 | 251 | if self.is_train: 252 | return img, targets, valid, meta 253 | else: 254 | meta['det_scores'] = a['score'] 255 | return img, meta 256 | 257 | def __len__(self): 258 | return len(self.anno) 259 | 260 | 261 | -------------------------------------------------------------------------------- /pytorch-cpn/dataloader/loader_eval_LL.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import json 4 | import random 5 | import math 6 | import cv2 7 | import skimage 8 | import skimage.transform 9 | 10 | import torch 11 | import torch.utils.data as data 12 | 13 | from utils.osutils import * 14 | from utils.imutils import * 15 | from utils.transforms import * 16 | import torch.utils.data as data 17 | import torchvision.transforms as TF 18 | 19 | class EvalLLData(data.Dataset): 20 | def __init__(self, cfg, train=True): 21 | self.img_folder = cfg.img_path 22 | self.is_train = train 23 | self.inp_res = cfg.data_shape 24 | self.out_res = cfg.output_shape 25 | self.pixel_means = cfg.pixel_means 26 | self.num_class = cfg.num_class 27 | self.cfg = cfg 28 | self.bbox_extend_factor = cfg.bbox_extend_factor 29 | if train: 30 | self.scale_factor = cfg.scale_factor 31 | self.rot_factor = cfg.rot_factor 32 | self.symmetry = cfg.symmetry 33 | with open(cfg.gt_path) as anno_file: 34 | self.anno = json.load(anno_file) 35 | 36 | def augmentationCropImage(self, img, bbox, joints=None): 37 | height, width = self.inp_res[0], self.inp_res[1] 38 | bbox = np.array(bbox).reshape(4, ).astype(np.float32) 39 | add = max(img.shape[0], img.shape[1]) 40 | mean_value = self.pixel_means 41 | bimg = cv2.copyMakeBorder(img, add, add, add, add, borderType=cv2.BORDER_CONSTANT, value=mean_value.tolist()) 42 | objcenter = np.array([(bbox[0] + bbox[2]) / 2., (bbox[1] + bbox[3]) / 2.]) 43 | bbox += add 44 | objcenter += add 45 | if self.is_train: 46 | joints[:, :2] += add 47 | inds = np.where(joints[:, -1] == 0) 48 | joints[inds, :2] = -1000000 # avoid influencing by data processing 49 | crop_width = (bbox[2] - bbox[0]) * (1 + self.bbox_extend_factor[0] * 2) 50 | crop_height = (bbox[3] - bbox[1]) * (1 + self.bbox_extend_factor[1] * 2) 51 | if self.is_train: 52 | crop_width = crop_width * (1 + 0.25) 53 | crop_height = crop_height * (1 + 0.25) 54 | if crop_height / height > crop_width / width: 55 | crop_size = crop_height 56 | min_shape = height 57 | else: 58 | crop_size = crop_width 59 | min_shape = width 60 | 61 | crop_size = min(crop_size, objcenter[0] / width * min_shape * 2. - 1.) 62 | crop_size = min(crop_size, (bimg.shape[1] - objcenter[0]) / width * min_shape * 2. - 1) 63 | crop_size = min(crop_size, objcenter[1] / height * min_shape * 2. - 1.) 64 | crop_size = min(crop_size, (bimg.shape[0] - objcenter[1]) / height * min_shape * 2. - 1) 65 | 66 | min_x = int(objcenter[0] - crop_size / 2. / min_shape * width) 67 | max_x = int(objcenter[0] + crop_size / 2. / min_shape * width) 68 | min_y = int(objcenter[1] - crop_size / 2. / min_shape * height) 69 | max_y = int(objcenter[1] + crop_size / 2. / min_shape * height) 70 | 71 | x_ratio = float(width) / (max_x - min_x) 72 | y_ratio = float(height) / (max_y - min_y) 73 | 74 | if self.is_train: 75 | joints[:, 0] = joints[:, 0] - min_x 76 | joints[:, 1] = joints[:, 1] - min_y 77 | 78 | joints[:, 0] *= x_ratio 79 | joints[:, 1] *= y_ratio 80 | label = joints[:, :2].copy() 81 | valid = joints[:, 2].copy() 82 | 83 | img = cv2.resize(bimg[min_y:max_y, min_x:max_x, :], (width, height)) 84 | details = np.asarray([min_x - add, min_y - add, max_x - add, max_y - add]).astype(np.float) 85 | 86 | if self.is_train: 87 | return img, joints, details 88 | else: 89 | return img, details 90 | 91 | 92 | 93 | def data_augmentation(self, img, label, operation): 94 | height, width = img.shape[0], img.shape[1] 95 | center = (width / 2., height / 2.) 96 | n = label.shape[0] 97 | affrat = random.uniform(self.scale_factor[0], self.scale_factor[1]) 98 | 99 | halfl_w = min(width - center[0], (width - center[0]) / 1.25 * affrat) 100 | halfl_h = min(height - center[1], (height - center[1]) / 1.25 * affrat) 101 | img = skimage.transform.resize(img[int(center[1] - halfl_h): int(center[1] + halfl_h + 1), 102 | int(center[0] - halfl_w): int(center[0] + halfl_w + 1)], (height, width)) 103 | for i in range(n): 104 | label[i][0] = (label[i][0] - center[0]) / halfl_w * (width - center[0]) + center[0] 105 | label[i][1] = (label[i][1] - center[1]) / halfl_h * (height - center[1]) + center[1] 106 | label[i][2] *= ( 107 | (label[i][0] >= 0) & (label[i][0] < width) & (label[i][1] >= 0) & (label[i][1] < height)) 108 | 109 | return img, label 110 | 111 | 112 | def __getitem__(self, index): 113 | a = self.anno[index] 114 | image_name = a['imgInfo']['img_paths'] 115 | 116 | if '_3_' in image_name: 117 | b = image_name.split('_') 118 | name_LL = b[0]+'_'+b[1]+'_'+b[3]+'_'+b[4]+'_'+'img'+'_'+b[6]+'__'+b[10]+'_'+b[11] 119 | 120 | elif 'manridang' in image_name: 121 | b = image_name.split('_') 122 | name_LL = b[0]+'_'+b[1]+'manridang'+'_'+b[3]+'_'+b[4]+'_'+'img'+'_'+b[6]+'__'+b[10]+'_'+b[11] 123 | 124 | 125 | elif 'hwangridan' in image_name: 126 | b = image_name.split('_') 127 | name_LL = b[0]+'_'+b[1]+'hwangridan'+'_'+b[3]+'_'+b[4]+'_'+'img'+'_'+b[6]+'__'+b[10]+'_'+b[11] 128 | 129 | 130 | elif 'bulguksa' in image_name: 131 | b = image_name.split('_') 132 | name_LL = b[0]+'_'+b[1]+'bulguksa'+'_'+b[3]+'_'+b[4]+'_'+'img'+'_'+b[6]+'__'+b[10]+'_'+b[11] 133 | 134 | else: 135 | b = image_name.split('_') 136 | name_LL = b[0]+'_'+b[1]+'_'+b[2]+'_'+b[3]+'_'+'img'+'_'+b[5]+'__'+b[9]+'_'+b[10] 137 | 138 | img_path = os.path.join(self.img_folder, 'dark/', name_LL.split('/')[1]) 139 | 140 | if self.is_train: 141 | points = np.array(a['unit']['keypoints']).reshape(self.num_class, 3).astype(np.float32) 142 | gt_bbox = a['unit']['GT_bbox'] 143 | 144 | image = scipy.misc.imread(img_path, mode='RGB') 145 | 146 | 147 | rgb_mean_LL = np.mean(image, axis=(0, 1)) 148 | scaling_LL = 255*0.4 / rgb_mean_LL 149 | image = image * scaling_LL 150 | 151 | 152 | if self.is_train: 153 | image, points, details = self.augmentationCropImage(image, gt_bbox, points) 154 | else: 155 | image, details = self.augmentationCropImage(image, gt_bbox) 156 | 157 | 158 | if self.is_train: 159 | image, points = self.data_augmentation(image, points, a['operation']) 160 | img = im_to_torch(image) # CxHxW 161 | 162 | points[:, :2] //= 4 # output size is 1/4 input size 163 | pts = torch.Tensor(points) 164 | else: 165 | img = im_to_torch(image) 166 | 167 | # print(img) 168 | 169 | if self.is_train: 170 | target15 = np.zeros((self.num_class, self.out_res[0], self.out_res[1])) 171 | target11 = np.zeros((self.num_class, self.out_res[0], self.out_res[1])) 172 | target9 = np.zeros((self.num_class, self.out_res[0], self.out_res[1])) 173 | target7 = np.zeros((self.num_class, self.out_res[0], self.out_res[1])) 174 | for i in range(self.num_class): 175 | if pts[i, 2] > 0: # COCO visible: 0-no label, 1-label + invisible, 2-label + visible 176 | target15[i] = generate_heatmap(target15[i], pts[i], self.cfg.gk15) 177 | target11[i] = generate_heatmap(target11[i], pts[i], self.cfg.gk11) 178 | target9[i] = generate_heatmap(target9[i], pts[i], self.cfg.gk9) 179 | target7[i] = generate_heatmap(target7[i], pts[i], self.cfg.gk7) 180 | 181 | targets = [torch.Tensor(target15), torch.Tensor(target11), torch.Tensor(target9), torch.Tensor(target7)] 182 | valid = pts[:, 2] 183 | 184 | meta = {'index' : index, 'imgID' : a['imgInfo']['imgID'], 185 | 'GT_bbox' : np.array([gt_bbox[0], gt_bbox[1], gt_bbox[2], gt_bbox[3]]), 186 | 'img_path' : img_path, 'augmentation_details' : details} 187 | 188 | if self.is_train: 189 | return img, targets, valid, meta 190 | else: 191 | meta['det_scores'] = a['score'] 192 | return img, meta 193 | 194 | def __len__(self): 195 | return len(self.anno) 196 | 197 | 198 | -------------------------------------------------------------------------------- /pytorch-cpn/dataloader/loader_eval_WL.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import json 4 | import random 5 | import math 6 | import cv2 7 | import skimage 8 | import skimage.transform 9 | 10 | import torch 11 | import torch.utils.data as data 12 | import torchvision.transforms as TF 13 | 14 | from utils.osutils import * 15 | from utils.imutils import * 16 | from utils.transforms import * 17 | 18 | class EvalWLData(data.Dataset): 19 | def __init__(self, cfg, train=True): 20 | self.img_folder = cfg.img_path 21 | self.is_train = train 22 | self.inp_res = cfg.data_shape 23 | self.out_res = cfg.output_shape 24 | self.pixel_means = cfg.pixel_means 25 | self.num_class = cfg.num_class 26 | self.cfg = cfg 27 | self.bbox_extend_factor = cfg.bbox_extend_factor 28 | if train: 29 | self.scale_factor = cfg.scale_factor 30 | self.rot_factor = cfg.rot_factor 31 | self.symmetry = cfg.symmetry 32 | with open(cfg.gt_path) as anno_file: 33 | self.anno = json.load(anno_file) 34 | 35 | def augmentationCropImage(self, img, bbox, joints=None): 36 | height, width = self.inp_res[0], self.inp_res[1] 37 | bbox = np.array(bbox).reshape(4, ).astype(np.float32) 38 | add = max(img.shape[0], img.shape[1]) 39 | mean_value = self.pixel_means 40 | bimg = cv2.copyMakeBorder(img, add, add, add, add, borderType=cv2.BORDER_CONSTANT, value=mean_value.tolist()) 41 | objcenter = np.array([(bbox[0] + bbox[2]) / 2., (bbox[1] + bbox[3]) / 2.]) 42 | bbox += add 43 | objcenter += add 44 | if self.is_train: 45 | joints[:, :2] += add 46 | inds = np.where(joints[:, -1] == 0) 47 | joints[inds, :2] = -1000000 # avoid influencing by data processing 48 | crop_width = (bbox[2] - bbox[0]) * (1 + self.bbox_extend_factor[0] * 2) 49 | crop_height = (bbox[3] - bbox[1]) * (1 + self.bbox_extend_factor[1] * 2) 50 | if self.is_train: 51 | crop_width = crop_width * (1 + 0.25) 52 | crop_height = crop_height * (1 + 0.25) 53 | if crop_height / height > crop_width / width: 54 | crop_size = crop_height 55 | min_shape = height 56 | else: 57 | crop_size = crop_width 58 | min_shape = width 59 | 60 | crop_size = min(crop_size, objcenter[0] / width * min_shape * 2. - 1.) 61 | crop_size = min(crop_size, (bimg.shape[1] - objcenter[0]) / width * min_shape * 2. - 1) 62 | crop_size = min(crop_size, objcenter[1] / height * min_shape * 2. - 1.) 63 | crop_size = min(crop_size, (bimg.shape[0] - objcenter[1]) / height * min_shape * 2. - 1) 64 | 65 | min_x = int(objcenter[0] - crop_size / 2. / min_shape * width) 66 | max_x = int(objcenter[0] + crop_size / 2. / min_shape * width) 67 | min_y = int(objcenter[1] - crop_size / 2. / min_shape * height) 68 | max_y = int(objcenter[1] + crop_size / 2. / min_shape * height) 69 | 70 | x_ratio = float(width) / (max_x - min_x) 71 | y_ratio = float(height) / (max_y - min_y) 72 | 73 | if self.is_train: 74 | joints[:, 0] = joints[:, 0] - min_x 75 | joints[:, 1] = joints[:, 1] - min_y 76 | 77 | joints[:, 0] *= x_ratio 78 | joints[:, 1] *= y_ratio 79 | label = joints[:, :2].copy() 80 | valid = joints[:, 2].copy() 81 | 82 | img = cv2.resize(bimg[min_y:max_y, min_x:max_x, :], (width, height)) 83 | details = np.asarray([min_x - add, min_y - add, max_x - add, max_y - add]).astype(np.float) 84 | 85 | if self.is_train: 86 | return img, joints, details 87 | else: 88 | return img, details 89 | 90 | 91 | 92 | def data_augmentation(self, img, label, operation): 93 | height, width = img.shape[0], img.shape[1] 94 | center = (width / 2., height / 2.) 95 | n = label.shape[0] 96 | affrat = random.uniform(self.scale_factor[0], self.scale_factor[1]) 97 | 98 | halfl_w = min(width - center[0], (width - center[0]) / 1.25 * affrat) 99 | halfl_h = min(height - center[1], (height - center[1]) / 1.25 * affrat) 100 | img = skimage.transform.resize(img[int(center[1] - halfl_h): int(center[1] + halfl_h + 1), 101 | int(center[0] - halfl_w): int(center[0] + halfl_w + 1)], (height, width)) 102 | for i in range(n): 103 | label[i][0] = (label[i][0] - center[0]) / halfl_w * (width - center[0]) + center[0] 104 | label[i][1] = (label[i][1] - center[1]) / halfl_h * (height - center[1]) + center[1] 105 | label[i][2] *= ( 106 | (label[i][0] >= 0) & (label[i][0] < width) & (label[i][1] >= 0) & (label[i][1] < height)) 107 | 108 | return img, label 109 | 110 | 111 | def __getitem__(self, index): 112 | a = self.anno[index] 113 | image_name = a['imgInfo']['img_paths'] 114 | 115 | if '_3_' in image_name: 116 | b = image_name.split('_') 117 | new_name = b[0]+'_'+b[1]+'_'+b[3]+'_'+b[4]+'_'+'img'+'_'+b[6]+'__'+b[10]+'_'+b[11] 118 | 119 | elif 'manridang' in image_name: 120 | b = image_name.split('_') 121 | new_name = b[0]+'_'+b[1]+'manridang'+'_'+b[3]+'_'+b[4]+'_'+'img'+'_'+b[6]+'__'+b[10]+'_'+b[11] 122 | 123 | 124 | elif 'hwangridan' in image_name: 125 | b = image_name.split('_') 126 | new_name = b[0]+'_'+b[1]+'hwangridan'+'_'+b[3]+'_'+b[4]+'_'+'img'+'_'+b[6]+'__'+b[10]+'_'+b[11] 127 | 128 | 129 | elif 'bulguksa' in image_name: 130 | b = image_name.split('_') 131 | new_name = b[0]+'_'+b[1]+'bulguksa'+'_'+b[3]+'_'+b[4]+'_'+'img'+'_'+b[6]+'__'+b[10]+'_'+b[11] 132 | 133 | else: 134 | b = image_name.split('_') 135 | new_name = b[0]+'_'+b[1]+'_'+b[2]+'_'+b[3]+'_'+'img'+'_'+b[5]+'__'+b[9]+'_'+b[10] 136 | 137 | img_path = os.path.join(self.img_folder, new_name) 138 | if self.is_train: 139 | points = np.array(a['unit']['keypoints']).reshape(self.num_class, 3).astype(np.float32) 140 | gt_bbox = a['unit']['GT_bbox'] 141 | 142 | image = scipy.misc.imread(img_path, mode='RGB') 143 | 144 | if self.is_train: 145 | image, points, details = self.augmentationCropImage(image, gt_bbox, points) 146 | else: 147 | image, details = self.augmentationCropImage(image, gt_bbox) 148 | 149 | if self.is_train: 150 | image, points = self.data_augmentation(image, points, a['operation']) 151 | img = im_to_torch(image) # CxHxW 152 | 153 | 154 | points[:, :2] //= 4 155 | pts = torch.Tensor(points) 156 | else: 157 | img = im_to_torch(image) 158 | 159 | img = TF.Normalize((0.3457, 0.3460, 0.3463), (0.1477, 0.1482, 0.1483))(img) 160 | 161 | 162 | if self.is_train: 163 | target15 = np.zeros((self.num_class, self.out_res[0], self.out_res[1])) 164 | target11 = np.zeros((self.num_class, self.out_res[0], self.out_res[1])) 165 | target9 = np.zeros((self.num_class, self.out_res[0], self.out_res[1])) 166 | target7 = np.zeros((self.num_class, self.out_res[0], self.out_res[1])) 167 | for i in range(self.num_class): 168 | if pts[i, 2] > 0: # COCO visible: 0-no label, 1-label + invisible, 2-label + visible 169 | target15[i] = generate_heatmap(target15[i], pts[i], self.cfg.gk15) 170 | target11[i] = generate_heatmap(target11[i], pts[i], self.cfg.gk11) 171 | target9[i] = generate_heatmap(target9[i], pts[i], self.cfg.gk9) 172 | target7[i] = generate_heatmap(target7[i], pts[i], self.cfg.gk7) 173 | 174 | targets = [torch.Tensor(target15), torch.Tensor(target11), torch.Tensor(target9), torch.Tensor(target7)] 175 | valid = pts[:, 2] 176 | 177 | meta = {'index' : index, 'imgID' : a['imgInfo']['imgID'], 178 | 'GT_bbox' : np.array([gt_bbox[0], gt_bbox[1], gt_bbox[2], gt_bbox[3]]), 179 | 'img_path' : img_path, 'augmentation_details' : details} 180 | 181 | if self.is_train: 182 | return img, targets, valid, meta 183 | else: 184 | meta['det_scores'] = a['score'] 185 | return img, meta 186 | 187 | def __len__(self): 188 | return len(self.anno) 189 | 190 | 191 | -------------------------------------------------------------------------------- /pytorch-cpn/dataloader/loader_training_pair.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import json 4 | import random 5 | import math 6 | import cv2 7 | import skimage 8 | import skimage.transform 9 | 10 | import torch 11 | import torch.utils.data as data 12 | 13 | from utils.osutils import * 14 | from utils.imutils import * 15 | from utils.transforms import * 16 | import torch.utils.data as data 17 | import torchvision.transforms as TF 18 | 19 | class TrainingData(data.Dataset): 20 | def __init__(self, cfg, cfg_LL, train=True): 21 | self.img_folder = cfg.img_path 22 | self.img_folder_LL = cfg_LL.img_path 23 | self.is_train = train 24 | self.inp_res = cfg.data_shape 25 | self.out_res = cfg.output_shape 26 | self.pixel_means = cfg.pixel_means 27 | self.num_class = cfg.num_class 28 | self.cfg = cfg 29 | self.cfg_LL = cfg_LL 30 | self.bbox_extend_factor = cfg.bbox_extend_factor 31 | if train: 32 | self.scale_factor = cfg.scale_factor 33 | self.rot_factor = cfg.rot_factor 34 | self.symmetry = cfg.symmetry 35 | with open(cfg.gt_path) as anno_file: 36 | self.anno = json.load(anno_file) 37 | 38 | def augmentationCropImage(self, img, img_LL, bbox, joints=None): 39 | height, width = self.inp_res[0], self.inp_res[1] 40 | bbox = np.array(bbox).reshape(4, ).astype(np.float32) 41 | add = max(img.shape[0], img.shape[1]) 42 | mean_value = self.pixel_means 43 | bimg = cv2.copyMakeBorder(img, add, add, add, add, borderType=cv2.BORDER_CONSTANT, value=mean_value.tolist()) 44 | bimg_LL = cv2.copyMakeBorder(img_LL, add, add, add, add, borderType=cv2.BORDER_CONSTANT, value=mean_value.tolist()) 45 | 46 | objcenter = np.array([(bbox[0] + bbox[2]) / 2., (bbox[1] + bbox[3]) / 2.]) 47 | bbox += add 48 | objcenter += add 49 | if self.is_train: 50 | joints[:, :2] += add 51 | inds = np.where(joints[:, -1] == 0) 52 | joints[inds, :2] = -1000000 # avoid influencing by data processing 53 | crop_width = (bbox[2] - bbox[0]) * (1 + self.bbox_extend_factor[0] * 2) 54 | crop_height = (bbox[3] - bbox[1]) * (1 + self.bbox_extend_factor[1] * 2) 55 | if self.is_train: 56 | crop_width = crop_width * (1 + 0.25) 57 | crop_height = crop_height * (1 + 0.25) 58 | if crop_height / height > crop_width / width: 59 | crop_size = crop_height 60 | min_shape = height 61 | else: 62 | crop_size = crop_width 63 | min_shape = width 64 | 65 | crop_size = min(crop_size, objcenter[0] / width * min_shape * 2. - 1.) 66 | crop_size = min(crop_size, (bimg.shape[1] - objcenter[0]) / width * min_shape * 2. - 1) 67 | crop_size = min(crop_size, objcenter[1] / height * min_shape * 2. - 1.) 68 | crop_size = min(crop_size, (bimg.shape[0] - objcenter[1]) / height * min_shape * 2. - 1) 69 | 70 | min_x = int(objcenter[0] - crop_size / 2. / min_shape * width) 71 | max_x = int(objcenter[0] + crop_size / 2. / min_shape * width) 72 | min_y = int(objcenter[1] - crop_size / 2. / min_shape * height) 73 | max_y = int(objcenter[1] + crop_size / 2. / min_shape * height) 74 | 75 | x_ratio = float(width) / (max_x - min_x) 76 | y_ratio = float(height) / (max_y - min_y) 77 | 78 | if self.is_train: 79 | joints[:, 0] = joints[:, 0] - min_x 80 | joints[:, 1] = joints[:, 1] - min_y 81 | 82 | joints[:, 0] *= x_ratio 83 | joints[:, 1] *= y_ratio 84 | label = joints[:, :2].copy() 85 | valid = joints[:, 2].copy() 86 | 87 | img = cv2.resize(bimg[min_y:max_y, min_x:max_x, :], (width, height)) 88 | img_LL = cv2.resize(bimg_LL[min_y:max_y, min_x:max_x, :], (width, height)) 89 | details = np.asarray([min_x - add, min_y - add, max_x - add, max_y - add]).astype(np.float) 90 | 91 | 92 | 93 | if self.is_train: 94 | return img, img_LL, joints, details 95 | else: 96 | return img, img_LL, details 97 | 98 | 99 | 100 | def data_augmentation(self, img, img_LL, label, operation): 101 | height, width = img.shape[0], img.shape[1] 102 | center = (width / 2., height / 2.) 103 | n = label.shape[0] 104 | affrat = random.uniform(self.scale_factor[0], self.scale_factor[1]) 105 | 106 | halfl_w = min(width - center[0], (width - center[0]) / 1.25 * affrat) 107 | halfl_h = min(height - center[1], (height - center[1]) / 1.25 * affrat) 108 | img = skimage.transform.resize(img[int(center[1] - halfl_h): int(center[1] + halfl_h + 1), 109 | int(center[0] - halfl_w): int(center[0] + halfl_w + 1)], (height, width)) 110 | img_LL = skimage.transform.resize(img_LL[int(center[1] - halfl_h): int(center[1] + halfl_h + 1), 111 | int(center[0] - halfl_w): int(center[0] + halfl_w + 1)], (height, width)) 112 | for i in range(n): 113 | label[i][0] = (label[i][0] - center[0]) / halfl_w * (width - center[0]) + center[0] 114 | label[i][1] = (label[i][1] - center[1]) / halfl_h * (height - center[1]) + center[1] 115 | label[i][2] *= ( 116 | (label[i][0] >= 0) & (label[i][0] < width) & (label[i][1] >= 0) & (label[i][1] < height)) 117 | 118 | return img, img_LL, label 119 | 120 | 121 | def __getitem__(self, index): 122 | a = self.anno[index] 123 | image_name = a['imgInfo']['img_paths'] 124 | 125 | if '_3_' in image_name: 126 | b = image_name.split('_') 127 | new_name = b[0]+'_'+b[1]+'_'+b[3]+'_'+b[4]+'_'+'img'+'_'+b[6]+'__'+b[10]+'_'+b[11] 128 | 129 | elif 'manridang' in image_name: 130 | b = image_name.split('_') 131 | new_name = b[0]+'_'+b[1]+'manridang'+'_'+b[3]+'_'+b[4]+'_'+'img'+'_'+b[6]+'__'+b[10]+'_'+b[11] 132 | 133 | 134 | elif 'hwangridan' in image_name: 135 | b = image_name.split('_') 136 | new_name = b[0]+'_'+b[1]+'hwangridan'+'_'+b[3]+'_'+b[4]+'_'+'img'+'_'+b[6]+'__'+b[10]+'_'+b[11] 137 | 138 | 139 | elif 'bulguksa' in image_name: 140 | b = image_name.split('_') 141 | new_name = b[0]+'_'+b[1]+'bulguksa'+'_'+b[3]+'_'+b[4]+'_'+'img'+'_'+b[6]+'__'+b[10]+'_'+b[11] 142 | 143 | else: 144 | b = image_name.split('_') 145 | new_name = b[0]+'_'+b[1]+'_'+b[2]+'_'+b[3]+'_'+'img'+'_'+b[5]+'__'+b[9]+'_'+b[10] 146 | 147 | img_path = os.path.join(self.img_folder, new_name) 148 | 149 | 150 | if '_3_' in image_name: 151 | b = image_name.split('_') 152 | name_LL = b[0]+'_'+b[1]+'_'+b[3]+'_'+b[4]+'_'+'img'+'_'+b[6]+'__'+b[10]+'_'+b[11] 153 | 154 | elif 'manridang' in image_name: 155 | b = image_name.split('_') 156 | name_LL = b[0]+'_'+b[1]+'manridang'+'_'+b[3]+'_'+b[4]+'_'+'img'+'_'+b[6]+'__'+b[10]+'_'+b[11] 157 | 158 | 159 | elif 'hwangridan' in image_name: 160 | b = image_name.split('_') 161 | name_LL = b[0]+'_'+b[1]+'hwangridan'+'_'+b[3]+'_'+b[4]+'_'+'img'+'_'+b[6]+'__'+b[10]+'_'+b[11] 162 | 163 | 164 | elif 'bulguksa' in image_name: 165 | b = image_name.split('_') 166 | name_LL = b[0]+'_'+b[1]+'bulguksa'+'_'+b[3]+'_'+b[4]+'_'+'img'+'_'+b[6]+'__'+b[10]+'_'+b[11] 167 | 168 | else: 169 | b = image_name.split('_') 170 | name_LL = b[0]+'_'+b[1]+'_'+b[2]+'_'+b[3]+'_'+'img'+'_'+b[5]+'__'+b[9]+'_'+b[10] 171 | 172 | img_path_LL = os.path.join(self.img_folder, 'dark/', name_LL.split('/')[1]) 173 | 174 | if self.is_train: 175 | points = np.array(a['unit']['keypoints']).reshape(self.num_class, 3).astype(np.float32) 176 | points_LL = np.array(a['unit']['keypoints']).reshape(self.num_class, 3).astype(np.float32) 177 | gt_bbox = a['unit']['GT_bbox'] 178 | 179 | image = scipy.misc.imread(img_path, mode='RGB') 180 | image_LL = scipy.misc.imread(img_path_LL, mode='RGB') 181 | 182 | 183 | rgb_mean_LL = np.mean(image_LL, axis=(0, 1)) 184 | scaling_LL = 255*0.4 / rgb_mean_LL 185 | image_LL = image_LL * scaling_LL 186 | 187 | if self.is_train: 188 | image, image_LL, points, details = self.augmentationCropImage(image, image_LL, gt_bbox, points) 189 | else: 190 | image, image_LL, details = self.augmentationCropImage(image, image_LL, gt_bbox) 191 | 192 | 193 | 194 | if self.is_train: 195 | image, image_LL, points = self.data_augmentation(image, image_LL, points, a['operation']) 196 | img = im_to_torch(image) # CxHxW 197 | img_LL = im_to_torch(image_LL) 198 | 199 | points[:, :2] //= 4 # output size is 1/4 input size 200 | pts = torch.Tensor(points) 201 | else: 202 | img = im_to_torch(image) 203 | img_LL = im_to_torch(image_LL) 204 | 205 | img = TF.Normalize((0.3457, 0.3460, 0.3463), (0.1477, 0.1482, 0.1483))(img) 206 | 207 | 208 | if self.is_train: 209 | target15 = np.zeros((self.num_class, self.out_res[0], self.out_res[1])) 210 | target11 = np.zeros((self.num_class, self.out_res[0], self.out_res[1])) 211 | target9 = np.zeros((self.num_class, self.out_res[0], self.out_res[1])) 212 | target7 = np.zeros((self.num_class, self.out_res[0], self.out_res[1])) 213 | for i in range(self.num_class): 214 | if pts[i, 2] > 0: # COCO visible: 0-no label, 1-label + invisible, 2-label + visible 215 | target15[i] = generate_heatmap(target15[i], pts[i], self.cfg.gk15) 216 | target11[i] = generate_heatmap(target11[i], pts[i], self.cfg.gk11) 217 | target9[i] = generate_heatmap(target9[i], pts[i], self.cfg.gk9) 218 | target7[i] = generate_heatmap(target7[i], pts[i], self.cfg.gk7) 219 | 220 | targets = [torch.Tensor(target15), torch.Tensor(target11), torch.Tensor(target9), torch.Tensor(target7)] 221 | valid = pts[:, 2] 222 | 223 | meta = {'index' : index, 'imgID' : a['imgInfo']['imgID'], 224 | 'GT_bbox' : np.array([gt_bbox[0], gt_bbox[1], gt_bbox[2], gt_bbox[3]]), 225 | 'img_path' : img_path, 'augmentation_details' : details} 226 | 227 | if self.is_train: 228 | return img_LL, img, targets, valid, meta 229 | else: 230 | meta['det_scores'] = a['score'] 231 | return img, meta 232 | 233 | def __len__(self): 234 | return len(self.anno) 235 | 236 | 237 | -------------------------------------------------------------------------------- /pytorch-cpn/networks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sohyun-l/ExLPose/a742a204da44323769073e9c1660305401cd69ff/pytorch-cpn/networks/__init__.py -------------------------------------------------------------------------------- /pytorch-cpn/networks/globalNet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import math 4 | 5 | class globalNet(nn.Module): 6 | def __init__(self, channel_settings, output_shape, num_class): 7 | super(globalNet, self).__init__() 8 | self.channel_settings = channel_settings 9 | laterals, upsamples, predict = [], [], [] 10 | for i in range(len(channel_settings)): 11 | laterals.append(self._lateral(channel_settings[i])) 12 | predict.append(self._predict(output_shape, num_class)) 13 | if i != len(channel_settings) - 1: 14 | upsamples.append(self._upsample()) 15 | self.laterals = nn.ModuleList(laterals) 16 | self.upsamples = nn.ModuleList(upsamples) 17 | self.predict = nn.ModuleList(predict) 18 | 19 | for m in self.modules(): 20 | if isinstance(m, nn.Conv2d): 21 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 22 | m.weight.data.normal_(0, math.sqrt(2. / n)) 23 | if m.bias is not None: 24 | m.bias.data.zero_() 25 | elif isinstance(m, nn.BatchNorm2d): 26 | m.weight.data.fill_(1) 27 | m.bias.data.zero_() 28 | 29 | def _lateral(self, input_size): 30 | layers = [] 31 | layers.append(nn.Conv2d(input_size, 256, 32 | kernel_size=1, stride=1, bias=False)) 33 | layers.append(nn.BatchNorm2d(256)) 34 | layers.append(nn.ReLU(inplace=True)) 35 | 36 | return nn.Sequential(*layers) 37 | 38 | def _upsample(self): 39 | layers = [] 40 | layers.append(torch.nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)) 41 | layers.append(torch.nn.Conv2d(256, 256, 42 | kernel_size=1, stride=1, bias=False)) 43 | layers.append(nn.BatchNorm2d(256)) 44 | 45 | return nn.Sequential(*layers) 46 | 47 | def _predict(self, output_shape, num_class): 48 | layers = [] 49 | layers.append(nn.Conv2d(256, 256, 50 | kernel_size=1, stride=1, bias=False)) 51 | layers.append(nn.BatchNorm2d(256)) 52 | layers.append(nn.ReLU(inplace=True)) 53 | 54 | layers.append(nn.Conv2d(256, num_class, 55 | kernel_size=3, stride=1, padding=1, bias=False)) 56 | layers.append(nn.Upsample(size=output_shape, mode='bilinear', align_corners=True)) 57 | layers.append(nn.BatchNorm2d(num_class)) 58 | 59 | return nn.Sequential(*layers) 60 | 61 | def forward(self, x): 62 | global_fms, global_outs = [], [] 63 | for i in range(len(self.channel_settings)): 64 | if i == 0: 65 | feature = self.laterals[i](x[i]) 66 | else: 67 | feature = self.laterals[i](x[i]) + up 68 | global_fms.append(feature) 69 | if i != len(self.channel_settings) - 1: 70 | up = self.upsamples[i](feature) 71 | feature = self.predict[i](feature) 72 | global_outs.append(feature) 73 | 74 | return global_fms, global_outs 75 | 76 | class globalNet_bist(nn.Module): 77 | def __init__(self, channel_settings, output_shape, num_class): 78 | super(globalNet, self).__init__() 79 | self.channel_settings = channel_settings 80 | laterals, upsamples, predict = [], [], [] 81 | for i in range(len(channel_settings)): 82 | laterals.append(self._lateral(channel_settings[i])) 83 | predict.append(self._predict(output_shape, num_class)) 84 | if i != len(channel_settings) - 1: 85 | upsamples.append(self._upsample()) 86 | self.laterals = nn.ModuleList(laterals) 87 | self.upsamples = nn.ModuleList(upsamples) 88 | self.predict = nn.ModuleList(predict) 89 | 90 | for m in self.modules(): 91 | if isinstance(m, nn.Conv2d): 92 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 93 | m.weight.data.normal_(0, math.sqrt(2. / n)) 94 | if m.bias is not None: 95 | m.bias.data.zero_() 96 | elif isinstance(m, nn.BatchNorm2d): 97 | m.weight.data.fill_(1) 98 | m.bias.data.zero_() 99 | 100 | def _lateral(self, input_size): 101 | layers = [] 102 | layers.append(nn.Conv2d(input_size, 256, 103 | kernel_size=1, stride=1, bias=False)) 104 | layers.append(nn.BatchNorm2d(256)) 105 | layers.append(nn.ReLU(inplace=True)) 106 | 107 | return nn.Sequential(*layers) 108 | 109 | def _upsample(self): 110 | layers = [] 111 | layers.append(torch.nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)) 112 | layers.append(torch.nn.Conv2d(256, 256, 113 | kernel_size=1, stride=1, bias=False)) 114 | layers.append(nn.BatchNorm2d(256)) 115 | 116 | return nn.Sequential(*layers) 117 | 118 | def _predict(self, output_shape, num_class): 119 | layers = [] 120 | layers.append(nn.Conv2d(256, 256, 121 | kernel_size=1, stride=1, bias=False)) 122 | layers.append(nn.BatchNorm2d(256)) 123 | layers.append(nn.ReLU(inplace=True)) 124 | 125 | layers.append(nn.Conv2d(256, num_class, 126 | kernel_size=3, stride=1, padding=1, bias=False)) 127 | layers.append(nn.Upsample(size=output_shape, mode='bilinear', align_corners=True)) 128 | layers.append(nn.BatchNorm2d(num_class)) 129 | 130 | return nn.Sequential(*layers) 131 | 132 | def forward(self, x): 133 | global_fms, global_outs = [], [] 134 | for i in range(len(self.channel_settings)): 135 | if i == 0: 136 | feature = self.laterals[i](x[i]) 137 | else: 138 | feature = self.laterals[i](x[i]) + up 139 | global_fms.append(feature) 140 | if i != len(self.channel_settings) - 1: 141 | up = self.upsamples[i](feature) 142 | feature = self.predict[i](feature) 143 | global_outs.append(feature) 144 | 145 | return global_fms, global_outs 146 | -------------------------------------------------------------------------------- /pytorch-cpn/networks/lsbn.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | class lsbn(nn.Module): 5 | _version = 2 6 | 7 | def __init__(self, num_features, num_classes, eps=1e-5, momentum=0.1, affine=True, 8 | track_running_stats=True): 9 | super(lsbn, self).__init__() 10 | self.bns = nn.ModuleList( 11 | [nn.BatchNorm2d(num_features, eps, momentum, affine, track_running_stats) for _ in range(num_classes)]) 12 | 13 | def reset_running_stats(self): 14 | for bn in self.bns: 15 | bn.reset_running_stats() 16 | 17 | def reset_parameters(self): 18 | for bn in self.bns: 19 | bn.reset_parameters() 20 | 21 | def _check_input_dim(self, input): 22 | raise NotImplementedError 23 | 24 | def forward(self, x, lighting_condition): 25 | self._check_input_dim(x) 26 | bn = self.bns[lighting_condition[0]] 27 | return bn(x), lighting_condition 28 | 29 | 30 | class LSBN2d(lsbn): 31 | def _check_input_dim(self, input): 32 | if input.dim() != 4: 33 | raise ValueError('expected 4D input (got {}D input)' 34 | .format(input.dim())) 35 | -------------------------------------------------------------------------------- /pytorch-cpn/networks/network.py: -------------------------------------------------------------------------------- 1 | from .resnet import * 2 | from .resnetlsbn import resnet50lsbn 3 | import torch.nn as nn 4 | import torch 5 | from .globalNet import globalNet 6 | from .globalNet import globalNet_bist 7 | from .refineNet import refineNet 8 | 9 | __all__ = ['CPN50', 'CPN101'] 10 | 11 | class CPN(nn.Module): 12 | def __init__(self, resnet, output_shape, num_class, pretrained=True): 13 | super(CPN, self).__init__() 14 | channel_settings = [2048, 1024, 512, 256] 15 | self.resnet = resnet 16 | self.global_net = globalNet(channel_settings, output_shape, num_class) 17 | self.refine_net = refineNet(channel_settings[-1], output_shape, num_class) 18 | 19 | def forward(self, x): 20 | res_out = self.resnet(x) 21 | global_fms, global_outs = self.global_net(res_out) 22 | refine_out = self.refine_net(global_fms) 23 | 24 | return global_outs, refine_out 25 | 26 | 27 | class CPN_lsbn(nn.Module): 28 | def __init__(self, resnet_lsbn, output_shape, num_class, pretrained=True): 29 | super(CPN_lsbn, self).__init__() 30 | channel_settings = [2048, 1024, 512, 256] 31 | self.resnet_lsbn = resnet_lsbn 32 | self.global_net = globalNet(channel_settings, output_shape, num_class) 33 | self.refine_net = refineNet(channel_settings[-1], output_shape, num_class) 34 | 35 | def forward(self, x, y): 36 | x0, x1, x2, x3, x4 = self.resnet_lsbn(x, y) 37 | res_out = [x4, x3, x2, x1] 38 | global_fms, global_outs = self.global_net(res_out) 39 | refine_out = self.refine_net(global_fms) 40 | 41 | return x0, x1, x2, x3, x4, global_outs, refine_out 42 | 43 | def CPN50(out_size,num_class,pretrained=True): 44 | res50 = resnet50(pretrained=pretrained) 45 | model = CPN(res50, output_shape=out_size,num_class=num_class, pretrained=pretrained) 46 | return model 47 | 48 | def CPN101(out_size,num_class,pretrained=True): 49 | res101 = resnet101(pretrained=pretrained) 50 | model = CPN(res101, output_shape=out_size,num_class=num_class, pretrained=pretrained) 51 | return model 52 | 53 | def LSBN_CPN50(out_size, num_class, in_features=0, num_conditions=2, pretrained=True): 54 | res50 = resnet50lsbn(pretrained=pretrained, num_class=num_class, in_features=in_features, num_conditions=num_conditions) 55 | model = CPN_lsbn(res50, output_shape=out_size,num_class=num_class, pretrained=pretrained) 56 | return model -------------------------------------------------------------------------------- /pytorch-cpn/networks/refineNet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | 4 | class Bottleneck(nn.Module): 5 | expansion = 4 6 | 7 | def __init__(self, inplanes, planes, stride=1): 8 | super(Bottleneck, self).__init__() 9 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 10 | self.bn1 = nn.BatchNorm2d(planes) 11 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 12 | padding=1, bias=False) 13 | self.bn2 = nn.BatchNorm2d(planes) 14 | self.conv3 = nn.Conv2d(planes, planes * 2, kernel_size=1, bias=False) 15 | self.bn3 = nn.BatchNorm2d(planes * 2) 16 | self.relu = nn.ReLU(inplace=True) 17 | 18 | self.downsample = nn.Sequential( 19 | nn.Conv2d(inplanes, planes * 2, 20 | kernel_size=1, stride=stride, bias=False), 21 | nn.BatchNorm2d(planes * 2), 22 | ) 23 | 24 | self.stride = stride 25 | 26 | def forward(self, x): 27 | residual = x 28 | 29 | out = self.conv1(x) 30 | out = self.bn1(out) 31 | out = self.relu(out) 32 | 33 | out = self.conv2(out) 34 | out = self.bn2(out) 35 | out = self.relu(out) 36 | 37 | out = self.conv3(out) 38 | out = self.bn3(out) 39 | 40 | if self.downsample is not None: 41 | residual = self.downsample(x) 42 | 43 | out += residual 44 | out = self.relu(out) 45 | 46 | return out 47 | 48 | class refineNet(nn.Module): 49 | def __init__(self, lateral_channel, out_shape, num_class): 50 | super(refineNet, self).__init__() 51 | cascade = [] 52 | num_cascade = 4 53 | for i in range(num_cascade): 54 | cascade.append(self._make_layer(lateral_channel, num_cascade-i-1, out_shape)) 55 | self.cascade = nn.ModuleList(cascade) 56 | self.final_predict = self._predict(4*lateral_channel, num_class) 57 | 58 | def _make_layer(self, input_channel, num, output_shape): 59 | layers = [] 60 | for i in range(num): 61 | layers.append(Bottleneck(input_channel, 128)) 62 | layers.append(nn.Upsample(size=output_shape, mode='bilinear', align_corners=True)) 63 | return nn.Sequential(*layers) 64 | 65 | def _predict(self, input_channel, num_class): 66 | layers = [] 67 | layers.append(Bottleneck(input_channel, 128)) 68 | layers.append(nn.Conv2d(256, num_class, 69 | kernel_size=3, stride=1, padding=1, bias=False)) 70 | layers.append(nn.BatchNorm2d(num_class)) 71 | return nn.Sequential(*layers) 72 | 73 | def forward(self, x): 74 | refine_fms = [] 75 | for i in range(4): 76 | refine_fms.append(self.cascade[i](x[i])) 77 | out = torch.cat(refine_fms, dim=1) 78 | out = self.final_predict(out) 79 | return out 80 | -------------------------------------------------------------------------------- /pytorch-cpn/networks/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | import torch.utils.model_zoo as model_zoo 4 | 5 | 6 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 7 | 'resnet152'] 8 | 9 | 10 | model_urls = { 11 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 12 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 13 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 14 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 15 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 16 | } 17 | 18 | 19 | def conv3x3(in_planes, out_planes, stride=1): 20 | "3x3 convolution with padding" 21 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 22 | padding=1, bias=False) 23 | 24 | 25 | class BasicBlock(nn.Module): 26 | expansion = 1 27 | 28 | def __init__(self, inplanes, planes, stride=1, downsample=None): 29 | super(BasicBlock, self).__init__() 30 | self.conv1 = conv3x3(inplanes, planes, stride) 31 | self.bn1 = nn.BatchNorm2d(planes) 32 | self.relu = nn.ReLU(inplace=True) 33 | self.conv2 = conv3x3(planes, planes) 34 | self.bn2 = nn.BatchNorm2d(planes) 35 | self.downsample = downsample 36 | self.stride = stride 37 | 38 | def forward(self, x): 39 | residual = x 40 | 41 | out = self.conv1(x) 42 | out = self.bn1(out) 43 | out = self.relu(out) 44 | 45 | out = self.conv2(out) 46 | out = self.bn2(out) 47 | 48 | if self.downsample is not None: 49 | residual = self.downsample(x) 50 | 51 | out += residual 52 | out = self.relu(out) 53 | 54 | return out 55 | 56 | 57 | class Bottleneck(nn.Module): 58 | expansion = 4 59 | 60 | def __init__(self, inplanes, planes, stride=1, downsample=None): 61 | super(Bottleneck, self).__init__() 62 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 63 | self.bn1 = nn.BatchNorm2d(planes) 64 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 65 | padding=1, bias=False) 66 | self.bn2 = nn.BatchNorm2d(planes) 67 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 68 | self.bn3 = nn.BatchNorm2d(planes * 4) 69 | self.relu = nn.ReLU(inplace=True) 70 | self.downsample = downsample 71 | self.stride = stride 72 | 73 | def forward(self, x): 74 | residual = x 75 | 76 | out = self.conv1(x) 77 | out = self.bn1(out) 78 | out = self.relu(out) 79 | 80 | out = self.conv2(out) 81 | out = self.bn2(out) 82 | out = self.relu(out) 83 | 84 | out = self.conv3(out) 85 | out = self.bn3(out) 86 | 87 | if self.downsample is not None: 88 | residual = self.downsample(x) 89 | 90 | out += residual 91 | out = self.relu(out) 92 | 93 | return out 94 | 95 | 96 | class ResNet(nn.Module): 97 | 98 | def __init__(self, block, layers, num_classes=1000): 99 | self.inplanes = 64 100 | super(ResNet, self).__init__() 101 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 102 | bias=False) 103 | self.bn1 = nn.BatchNorm2d(64) 104 | self.relu = nn.ReLU(inplace=True) 105 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 106 | self.layer1 = self._make_layer(block, 64, layers[0]) 107 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 108 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 109 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 110 | 111 | for m in self.modules(): 112 | if isinstance(m, nn.Conv2d): 113 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 114 | m.weight.data.normal_(0, math.sqrt(2. / n)) 115 | elif isinstance(m, nn.BatchNorm2d): 116 | m.weight.data.fill_(1) 117 | m.bias.data.zero_() 118 | 119 | def _make_layer(self, block, planes, blocks, stride=1): 120 | downsample = None 121 | if stride != 1 or self.inplanes != planes * block.expansion: 122 | downsample = nn.Sequential( 123 | nn.Conv2d(self.inplanes, planes * block.expansion, 124 | kernel_size=1, stride=stride, bias=False), 125 | nn.BatchNorm2d(planes * block.expansion), 126 | ) 127 | 128 | layers = [] 129 | layers.append(block(self.inplanes, planes, stride, downsample)) 130 | self.inplanes = planes * block.expansion 131 | for i in range(1, blocks): 132 | layers.append(block(self.inplanes, planes)) 133 | 134 | return nn.Sequential(*layers) 135 | 136 | def forward(self, x): 137 | x = self.conv1(x) 138 | x = self.bn1(x) 139 | x = self.relu(x) 140 | x = self.maxpool(x) 141 | 142 | x1 = self.layer1(x) 143 | x2 = self.layer2(x1) 144 | x3 = self.layer3(x2) 145 | x4 = self.layer4(x3) 146 | 147 | return [x4, x3, x2, x1] 148 | 149 | 150 | def resnet18(pretrained=False, **kwargs): 151 | """Constructs a ResNet-18 model. 152 | Args: 153 | pretrained (bool): If True, returns a model pre-trained on ImageNet 154 | """ 155 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 156 | if pretrained: 157 | from collections import OrderedDict 158 | state_dict = model.state_dict() 159 | pretrained_state_dict = model_zoo.load_url(model_urls['resnet18']) 160 | for k, v in pretrained_state_dict.items(): 161 | if k not in state_dict: 162 | continue 163 | state_dict[k] = v 164 | model.load_state_dict(state_dict) 165 | return model 166 | 167 | 168 | def resnet34(pretrained=False, **kwargs): 169 | """Constructs a ResNet-34 model. 170 | Args: 171 | pretrained (bool): If True, returns a model pre-trained on ImageNet 172 | """ 173 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 174 | if pretrained: 175 | from collections import OrderedDict 176 | state_dict = model.state_dict() 177 | pretrained_state_dict = model_zoo.load_url(model_urls['resnet34']) 178 | for k, v in pretrained_state_dict.items(): 179 | if k not in state_dict: 180 | continue 181 | state_dict[k] = v 182 | model.load_state_dict(state_dict) 183 | return model 184 | 185 | 186 | def resnet50(pretrained=False, **kwargs): 187 | """Constructs a ResNet-50 model. 188 | Args: 189 | pretrained (bool): If True, returns a model pre-trained on ImageNet 190 | """ 191 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 192 | if pretrained: 193 | print('Initialize with pre-trained ResNet') 194 | from collections import OrderedDict 195 | state_dict = model.state_dict() 196 | pretrained_state_dict = model_zoo.load_url(model_urls['resnet50']) 197 | for k, v in pretrained_state_dict.items(): 198 | if k not in state_dict: 199 | continue 200 | state_dict[k] = v 201 | print('successfully load '+str(len(state_dict.keys()))+' keys') 202 | model.load_state_dict(state_dict) 203 | return model 204 | 205 | 206 | def resnet101(pretrained=False, **kwargs): 207 | """Constructs a ResNet-101 model. 208 | Args: 209 | pretrained (bool): If True, returns a model pre-trained on ImageNet 210 | """ 211 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 212 | if pretrained: 213 | print('Initialize with pre-trained ResNet') 214 | from collections import OrderedDict 215 | state_dict = model.state_dict() 216 | pretrained_state_dict = model_zoo.load_url(model_urls['resnet101']) 217 | for k, v in pretrained_state_dict.items(): 218 | if k not in state_dict: 219 | continue 220 | state_dict[k] = v 221 | print('successfully load '+str(len(state_dict.keys()))+' keys') 222 | model.load_state_dict(state_dict) 223 | return model 224 | 225 | 226 | def resnet152(pretrained=False, **kwargs): 227 | """Constructs a ResNet-152 model. 228 | Args: 229 | pretrained (bool): If True, returns a model pre-trained on ImageNet 230 | """ 231 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 232 | if pretrained: 233 | from collections import OrderedDict 234 | state_dict = model.state_dict() 235 | pretrained_state_dict = model_zoo.load_url(model_urls['resnet152']) 236 | for k, v in pretrained_state_dict.items(): 237 | if k not in state_dict: 238 | continue 239 | state_dict[k] = v 240 | model.load_state_dict(state_dict) 241 | return model -------------------------------------------------------------------------------- /pytorch-cpn/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | project_root = pathlib.Path(__file__).resolve().parents[2] 4 | 5 | __all__ = ['project_root'] 6 | -------------------------------------------------------------------------------- /pytorch-cpn/utils/evaluation.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from random import randint 5 | 6 | from .misc import * 7 | from .transforms import transform, transform_preds 8 | 9 | __all__ = ['accuracy', 'AverageMeter'] 10 | 11 | def get_preds(scores): 12 | ''' get predictions from score maps in torch Tensor 13 | return type: torch.LongTensor 14 | ''' 15 | assert scores.dim() == 4, 'Score maps should be 4-dim' 16 | maxval, idx = torch.max(scores.view(scores.size(0), scores.size(1), -1), 2) 17 | 18 | maxval = maxval.view(scores.size(0), scores.size(1), 1) 19 | idx = idx.view(scores.size(0), scores.size(1), 1) + 1 20 | 21 | preds = idx.repeat(1, 1, 2).float() 22 | 23 | preds[:,:,0] = (preds[:,:,0] - 1) % scores.size(3) + 1 24 | preds[:,:,1] = torch.floor((preds[:,:,1] - 1) / scores.size(3)) + 1 25 | 26 | pred_mask = maxval.gt(0).repeat(1, 1, 2).float() 27 | preds *= pred_mask 28 | return preds 29 | 30 | def calc_dists(preds, target, normalize): 31 | preds = preds.float() 32 | target = target.float() 33 | dists = torch.zeros(preds.size(1), preds.size(0)) 34 | for n in range(preds.size(0)): 35 | for c in range(preds.size(1)): 36 | if target[n,c,0] > 1 and target[n, c, 1] > 1: 37 | dists[c, n] = torch.dist(preds[n,c,:], target[n,c,:])/normalize[n] 38 | else: 39 | dists[c, n] = -1 40 | return dists 41 | 42 | def dist_acc(dists, thr=0.5): 43 | ''' Return percentage below threshold while ignoring values with a -1 ''' 44 | if dists.ne(-1).sum() > 0: 45 | return dists.le(thr).eq(dists.ne(-1)).sum()*1.0 / dists.ne(-1).sum() 46 | else: 47 | return -1 48 | 49 | def accuracy(output, target, idxs, thr=0.5): 50 | ''' Calculate accuracy according to PCK, but uses ground truth heatmap rather than x,y locations 51 | First value to be returned is average accuracy across 'idxs', followed by individual accuracies 52 | ''' 53 | preds = get_preds(output) 54 | gts = get_preds(target) 55 | norm = torch.ones(preds.size(0))*output.size(3)/10 56 | dists = calc_dists(preds, gts, norm) 57 | 58 | acc = torch.zeros(len(idxs)+1) 59 | avg_acc = 0 60 | cnt = 0 61 | 62 | for i in range(len(idxs)): 63 | acc[i+1] = dist_acc(dists[idxs[i]-1]) 64 | if acc[i+1] >= 0: 65 | avg_acc = avg_acc + acc[i+1] 66 | cnt += 1 67 | 68 | if cnt != 0: 69 | acc[0] = avg_acc / cnt 70 | return acc 71 | 72 | def final_preds(output, center, scale, res): 73 | coords = get_preds(output) # float type 74 | 75 | # pose-processing 76 | for n in range(coords.size(0)): 77 | for p in range(coords.size(1)): 78 | hm = output[n][p] 79 | px = int(math.floor(coords[n][p][0])) 80 | py = int(math.floor(coords[n][p][1])) 81 | if px > 1 and px < res[0] and py > 1 and py < res[1]: 82 | diff = torch.Tensor([hm[py - 1][px] - hm[py - 1][px - 2], hm[py][px - 1]-hm[py - 2][px - 1]]) 83 | coords[n][p] += diff.sign() * .25 84 | coords += 0.5 85 | preds = coords.clone() 86 | 87 | # Transform back 88 | for i in range(coords.size(0)): 89 | preds[i] = transform_preds(coords[i], center[i], scale[i], res) 90 | 91 | if preds.dim() < 3: 92 | preds = preds.view(1, preds.size()) 93 | 94 | return preds 95 | 96 | 97 | class AverageMeter(object): 98 | """Computes and stores the average and current value""" 99 | def __init__(self): 100 | self.reset() 101 | 102 | def reset(self): 103 | self.val = 0 104 | self.avg = 0 105 | self.sum = 0 106 | self.count = 0 107 | 108 | def update(self, val, n=1): 109 | self.val = val 110 | self.sum += val * n 111 | self.count += n 112 | self.avg = self.sum / self.count 113 | -------------------------------------------------------------------------------- /pytorch-cpn/utils/func.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | from advent.utils.loss import cross_entropy_2d 6 | 7 | 8 | def bce_loss(y_pred, y_label): 9 | y_truth_tensor = torch.FloatTensor(y_pred.size()) 10 | y_truth_tensor.fill_(y_label) 11 | y_truth_tensor = y_truth_tensor.to(y_pred.get_device()) 12 | return nn.BCEWithLogitsLoss()(y_pred, y_truth_tensor) 13 | 14 | 15 | def loss_calc(pred, label, device): 16 | """ 17 | This function returns cross entropy loss for semantic segmentation 18 | """ 19 | # out shape batch_size x channels x h x w -> batch_size x channels x h x w 20 | # label shape h x w x 1 x batch_size -> batch_size x 1 x h x w 21 | label = label.long().to(device) 22 | return cross_entropy_2d(pred, label) 23 | 24 | 25 | def lr_poly(base_lr, iter, max_iter, power): 26 | """ Poly_LR scheduler 27 | """ 28 | return base_lr * ((1 - float(iter) / max_iter) ** power) 29 | 30 | 31 | def _adjust_learning_rate(optimizer, i_iter, cfg, learning_rate): 32 | lr = lr_poly(learning_rate, i_iter, cfg.TRAIN.MAX_ITERS, cfg.TRAIN.POWER) 33 | optimizer.param_groups[0]['lr'] = lr 34 | if len(optimizer.param_groups) > 1: 35 | optimizer.param_groups[1]['lr'] = lr * 10 36 | 37 | 38 | def adjust_learning_rate(optimizer, i_iter, cfg): 39 | """ adject learning rate for main segnet 40 | """ 41 | _adjust_learning_rate(optimizer, i_iter, cfg, cfg.TRAIN.LEARNING_RATE) 42 | 43 | 44 | def adjust_learning_rate_discriminator(optimizer, i_iter, cfg): 45 | _adjust_learning_rate(optimizer, i_iter, cfg, cfg.TRAIN.LEARNING_RATE_D) 46 | 47 | 48 | def prob_2_entropy(prob): 49 | """ convert probabilistic prediction maps to weighted self-information maps 50 | """ 51 | n, c, h, w = prob.size() 52 | return -torch.mul(prob, torch.log2(prob + 1e-30)) / np.log2(c) 53 | 54 | 55 | def fast_hist(a, b, n): 56 | k = (a >= 0) & (a < n) 57 | return np.bincount(n * a[k].astype(int) + b[k], minlength=n ** 2).reshape(n, n) 58 | 59 | 60 | def per_class_iu(hist): 61 | return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) 62 | -------------------------------------------------------------------------------- /pytorch-cpn/utils/imutils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import scipy.misc 5 | import cv2 6 | 7 | from .misc import * 8 | 9 | def im_to_numpy(img): 10 | img = to_numpy(img) 11 | img = np.transpose(img, (1, 2, 0)) # H*W*C 12 | return img 13 | 14 | def im_to_torch(img): 15 | img = np.transpose(img, (2, 0, 1)) # C*H*W 16 | img = to_torch(img).float() 17 | if img.max() > 1: 18 | img /= 255 19 | return img 20 | 21 | def load_image(img_path): 22 | # H x W x C => C x H x W 23 | return im_to_torch(scipy.misc.imread(img_path, mode='RGB')) 24 | 25 | def resize(img, owidth, oheight): 26 | img = im_to_numpy(img) 27 | print('%f %f' % (img.min(), img.max())) 28 | img = scipy.misc.imresize( 29 | img, 30 | (oheight, owidth) 31 | ) 32 | img = im_to_torch(img) 33 | print('%f %f' % (img.min(), img.max())) 34 | return img 35 | 36 | 37 | def generate_heatmap(heatmap, pt, sigma): 38 | heatmap[int(pt[1])][int(pt[0])] = 1 39 | heatmap = cv2.GaussianBlur(heatmap, sigma, 0) 40 | am = np.amax(heatmap) 41 | heatmap /= am / 255 42 | return heatmap 43 | 44 | 45 | # ============================================================================= 46 | # Helpful display functions 47 | # ============================================================================= 48 | 49 | def gauss(x, a, b, c, d=0): 50 | return a * np.exp(-(x - b)**2 / (2 * c**2)) + d 51 | 52 | def color_heatmap(x): 53 | x = to_numpy(x) 54 | color = np.zeros((x.shape[0],x.shape[1],3)) 55 | color[:,:,0] = gauss(x, .5, .6, .2) + gauss(x, 1, .8, .3) 56 | color[:,:,1] = gauss(x, 1, .5, .3) 57 | color[:,:,2] = gauss(x, 1, .2, .3) 58 | color[color > 1] = 1 59 | color = (color * 255).astype(np.uint8) 60 | return color 61 | 62 | def imshow(img): 63 | npimg = im_to_numpy(img*255).astype(np.uint8) 64 | plt.imshow(npimg) 65 | plt.axis('off') 66 | 67 | def show_joints(img, pts): 68 | imshow(img) 69 | 70 | for i in range(pts.size(0)): 71 | if pts[i, 2] > 0: 72 | plt.plot(pts[i, 0], pts[i, 1], 'yo') 73 | plt.axis('off') 74 | 75 | def show_sample(inputs, target): 76 | num_sample = inputs.size(0) 77 | num_joints = target.size(1) 78 | height = target.size(2) 79 | width = target.size(3) 80 | 81 | for n in range(num_sample): 82 | inp = resize(inputs[n], width, height) 83 | out = inp 84 | for p in range(num_joints): 85 | tgt = inp*0.5 + color_heatmap(target[n,p,:,:])*0.5 86 | out = torch.cat((out, tgt), 2) 87 | 88 | imshow(out) 89 | plt.show() 90 | 91 | def sample_with_heatmap(inp, out, num_rows=2, parts_to_show=None): 92 | inp = to_numpy(inp * 255) 93 | out = to_numpy(out) 94 | 95 | img = np.zeros((inp.shape[1], inp.shape[2], inp.shape[0])) 96 | for i in range(3): 97 | img[:, :, i] = inp[i, :, :] 98 | 99 | if parts_to_show is None: 100 | parts_to_show = np.arange(out.shape[0]) 101 | 102 | # Generate a single image to display input/output pair 103 | num_cols = int(np.ceil(float(len(parts_to_show)) / num_rows)) 104 | size = img.shape[0] // num_rows 105 | 106 | full_img = np.zeros((img.shape[0], size * (num_cols + num_rows), 3), np.uint8) 107 | full_img[:img.shape[0], :img.shape[1]] = img 108 | 109 | inp_small = scipy.misc.imresize(img, [size, size]) 110 | 111 | # Set up heatmap display for each part 112 | for i, part in enumerate(parts_to_show): 113 | part_idx = part 114 | out_resized = scipy.misc.imresize(out[part_idx], [size, size]) 115 | out_resized = out_resized.astype(float)/255 116 | out_img = inp_small.copy() * .3 117 | color_hm = color_heatmap(out_resized) 118 | out_img += color_hm * .7 119 | 120 | col_offset = (i % num_cols + num_rows) * size 121 | row_offset = (i // num_cols) * size 122 | full_img[row_offset:row_offset + size, col_offset:col_offset + size] = out_img 123 | 124 | return full_img 125 | 126 | def batch_with_heatmap(inputs, outputs, mean=torch.Tensor([0.5, 0.5, 0.5]), num_rows=2, parts_to_show=None): 127 | batch_img = [] 128 | for n in range(min(inputs.size(0), 4)): 129 | inp = inputs[n] + mean.view(3, 1, 1).expand_as(inputs[n]) 130 | batch_img.append( 131 | sample_with_heatmap(inp.clamp(0, 1), outputs[n], num_rows=num_rows, parts_to_show=parts_to_show) 132 | ) 133 | return np.concatenate(batch_img) 134 | -------------------------------------------------------------------------------- /pytorch-cpn/utils/logger.py: -------------------------------------------------------------------------------- 1 | # A simple torch style logger 2 | # (C) Wei YANG 2017 3 | import os 4 | import sys 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | 8 | __all__ = ['Logger', 'LoggerMonitor', 'savefig'] 9 | 10 | def savefig(fname, dpi=None): 11 | dpi = 150 if dpi == None else dpi 12 | plt.savefig(fname, dpi=dpi) 13 | 14 | def plot_overlap(logger, names=None): 15 | names = logger.names if names == None else names 16 | numbers = logger.numbers 17 | for _, name in enumerate(names): 18 | x = np.arange(len(numbers[name])) 19 | plt.plot(x, np.asarray(numbers[name])) 20 | return [logger.title + '(' + name + ')' for name in names] 21 | 22 | class Logger(object): 23 | '''Save training process to log file with simple plot function.''' 24 | def __init__(self, fpath, title=None, resume=False): 25 | self.file = None 26 | self.resume = resume 27 | self.title = '' if title == None else title 28 | if fpath is not None: 29 | if resume: 30 | self.file = open(fpath, 'r') 31 | name = self.file.readline() 32 | self.names = name.rstrip().split('\t') 33 | self.numbers = {} 34 | for _, name in enumerate(self.names): 35 | self.numbers[name] = [] 36 | 37 | for numbers in self.file: 38 | numbers = numbers.rstrip().split('\t') 39 | for i in range(0, len(numbers)): 40 | self.numbers[self.names[i]].append(numbers[i]) 41 | self.file.close() 42 | self.file = open(fpath, 'a') 43 | else: 44 | self.file = open(fpath, 'w') 45 | 46 | def set_names(self, names): 47 | if self.resume: 48 | pass 49 | # initialize numbers as empty list 50 | self.numbers = {} 51 | self.names = names 52 | for _, name in enumerate(self.names): 53 | self.file.write(name) 54 | self.file.write('\t') 55 | self.numbers[name] = [] 56 | self.file.write('\n') 57 | self.file.flush() 58 | 59 | 60 | def append(self, numbers): 61 | assert len(self.names) == len(numbers), 'Numbers do not match names' 62 | for index, num in enumerate(numbers): 63 | self.file.write("{0:.6f}".format(num)) 64 | self.file.write('\t') 65 | self.numbers[self.names[index]].append(num) 66 | self.file.write('\n') 67 | self.file.flush() 68 | 69 | def plot(self, names=None): 70 | names = self.names if names == None else names 71 | numbers = self.numbers 72 | for _, name in enumerate(names): 73 | x = np.arange(len(numbers[name])) 74 | plt.plot(x, np.asarray(numbers[name])) 75 | plt.legend([self.title + '(' + name + ')' for name in names]) 76 | plt.grid(True) 77 | 78 | def close(self): 79 | if self.file is not None: 80 | self.file.close() 81 | 82 | class LoggerMonitor(object): 83 | '''Load and visualize multiple logs.''' 84 | def __init__ (self, paths): 85 | '''paths is a distionary with {name:filepath} pair''' 86 | self.loggers = [] 87 | for title, path in paths.items(): 88 | logger = Logger(path, title=title, resume=True) 89 | self.loggers.append(logger) 90 | 91 | def plot(self, names=None): 92 | plt.figure() 93 | plt.subplot(121) 94 | legend_text = [] 95 | for logger in self.loggers: 96 | legend_text += plot_overlap(logger, names) 97 | plt.legend(legend_text, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) 98 | plt.grid(True) 99 | 100 | if __name__ == '__main__': 101 | # # Example 102 | # logger = Logger('test.txt') 103 | # logger.set_names(['Train loss', 'Valid loss','Test loss']) 104 | 105 | # length = 100 106 | # t = np.arange(length) 107 | # train_loss = np.exp(-t / 10.0) + np.random.rand(length) * 0.1 108 | # valid_loss = np.exp(-t / 10.0) + np.random.rand(length) * 0.1 109 | # test_loss = np.exp(-t / 10.0) + np.random.rand(length) * 0.1 110 | 111 | # for i in range(0, length): 112 | # logger.append([train_loss[i], valid_loss[i], test_loss[i]]) 113 | # logger.plot() 114 | 115 | # Example: logger monitor 116 | paths = { 117 | 'resadvnet20':'/home/wyang/code/pytorch-classification/checkpoint/cifar10/resadvnet20/log.txt', 118 | 'resadvnet32':'/home/wyang/code/pytorch-classification/checkpoint/cifar10/resadvnet32/log.txt', 119 | 'resadvnet44':'/home/wyang/code/pytorch-classification/checkpoint/cifar10/resadvnet44/log.txt', 120 | } 121 | 122 | field = ['Valid Acc.'] 123 | 124 | monitor = LoggerMonitor(paths) 125 | monitor.plot(names=field) 126 | savefig('test.eps') -------------------------------------------------------------------------------- /pytorch-cpn/utils/loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | 6 | 7 | def cross_entropy_2d(predict, target): 8 | """ 9 | Args: 10 | predict:(n, c, h, w) 11 | target:(n, h, w) 12 | """ 13 | assert not target.requires_grad 14 | assert predict.dim() == 4 15 | assert target.dim() == 3 16 | assert predict.size(0) == target.size(0), f"{predict.size(0)} vs {target.size(0)}" 17 | assert predict.size(2) == target.size(1), f"{predict.size(2)} vs {target.size(1)}" 18 | assert predict.size(3) == target.size(2), f"{predict.size(3)} vs {target.size(3)}" 19 | n, c, h, w = predict.size() 20 | target_mask = (target >= 0) * (target != 255) 21 | target = target[target_mask] 22 | if not target.data.dim(): 23 | return Variable(torch.zeros(1)) 24 | predict = predict.transpose(1, 2).transpose(2, 3).contiguous() 25 | predict = predict[target_mask.view(n, h, w, 1).repeat(1, 1, 1, c)].view(-1, c) 26 | loss = F.cross_entropy(predict, target, size_average=True) 27 | return loss 28 | 29 | 30 | def entropy_loss(v): 31 | """ 32 | Entropy loss for probabilistic prediction vectors 33 | input: batch_size x channels x h x w 34 | output: batch_size x 1 x h x w 35 | """ 36 | assert v.dim() == 4 37 | n, c, h, w = v.size() 38 | return -torch.sum(torch.mul(v, torch.log2(v + 1e-30))) / (n * h * w * np.log2(c)) 39 | -------------------------------------------------------------------------------- /pytorch-cpn/utils/misc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import torch 4 | import math 5 | import numpy as np 6 | import scipy.io 7 | import matplotlib.pyplot as plt 8 | 9 | def to_numpy(tensor): 10 | if torch.is_tensor(tensor): 11 | return tensor.cpu().numpy() 12 | elif type(tensor).__module__ != 'numpy': 13 | raise ValueError("Cannot convert {} to numpy array" 14 | .format(type(tensor))) 15 | return tensor 16 | 17 | 18 | def to_torch(ndarray): 19 | if type(ndarray).__module__ == 'numpy': 20 | return torch.from_numpy(ndarray) 21 | elif not torch.is_tensor(ndarray): 22 | raise ValueError("Cannot convert {} to torch tensor" 23 | .format(type(ndarray))) 24 | return ndarray 25 | 26 | 27 | def save_checkpoint(state, preds, is_best, checkpoint='checkpoint', filename='checkpoint.pth.tar', snapshot=None): 28 | preds = to_numpy(preds) 29 | filepath = os.path.join(checkpoint, filename) 30 | torch.save(state, filepath) 31 | scipy.io.savemat(os.path.join(checkpoint, 'preds.mat'), mdict={'preds' : preds}) 32 | 33 | if snapshot and state.epoch % snapshot == 0: 34 | shutil.copyfile(filepath, os.path.join(checkpoint, 'checkpoint_{}.pth.tar'.format(state.epoch))) 35 | 36 | if is_best: 37 | shutil.copyfile(filepath, os.path.join(checkpoint, 'model_best.pth.tar')) 38 | scipy.io.savemat(os.path.join(checkpoint, 'preds_best.mat'), mdict={'preds' : preds}) 39 | 40 | def copy_log(filepath = 'checkpoint'): 41 | filepath = os.path.join(filepath, 'log.txt') 42 | shutil.copyfile(filepath, os.path.join('log_backup.txt')) 43 | 44 | def save_model(state, checkpoint='checkpoint', filename='checkpoint.pth.tar'): 45 | filename = 'epoch'+str(state['epoch']) + filename 46 | filepath = os.path.join(checkpoint, filename) 47 | torch.save(state, filepath) 48 | 49 | # if snapshot and state.epoch % snapshot == 0: 50 | # shutil.copyfile(filepath, os.path.join(checkpoint, 'checkpoint_{}.pth.tar'.format(state.epoch))) 51 | 52 | def save_pred(preds, checkpoint='checkpoint', filename='preds_valid.mat'): 53 | preds = to_numpy(preds) 54 | filepath = os.path.join(checkpoint, filename) 55 | scipy.io.savemat(filepath, mdict={'preds' : preds}) 56 | 57 | 58 | def adjust_learning_rate(optimizer, epoch, schedule, gamma): 59 | """Sets the learning rate to the initial LR decayed by schedule""" 60 | if epoch in schedule: 61 | for param_group in optimizer.param_groups: 62 | param_group['lr'] *= gamma 63 | return optimizer.state_dict()['param_groups'][0]['lr'] 64 | -------------------------------------------------------------------------------- /pytorch-cpn/utils/osutils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import errno 3 | 4 | def mkdir_p(dir_path): 5 | try: 6 | os.makedirs(dir_path) 7 | except OSError as e: 8 | if e.errno != errno.EEXIST: 9 | raise 10 | 11 | def isfile(fname): 12 | return os.path.isfile(fname) 13 | 14 | def isdir(dirname): 15 | return os.path.isdir(dirname) 16 | 17 | def join(path, *paths): 18 | return os.path.join(path, *paths) 19 | 20 | def add_pypath(path): 21 | if path not in sys.path: 22 | sys.path.insert(0, path) -------------------------------------------------------------------------------- /pytorch-cpn/utils/serialization.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import json 3 | import yaml 4 | from pathlib import Path 5 | import os 6 | 7 | 8 | def make_parent(file_path): 9 | file_path = Path(file_path) 10 | os.makedirs(file_path.parent, exist_ok=True) 11 | 12 | 13 | def pickle_dump(python_object, file_path): 14 | make_parent(file_path) 15 | with open(file_path, 'wb') as f: 16 | pickle.dump(python_object, f) 17 | 18 | 19 | def pickle_load(file_path): 20 | with open(file_path, 'rb') as f: 21 | return pickle.load(f) 22 | 23 | 24 | def json_load(file_path): 25 | with open(file_path, 'r') as fp: 26 | return json.load(fp) 27 | 28 | 29 | def yaml_dump(python_object, file_path): 30 | make_parent(file_path) 31 | with open(file_path, 'w') as f: 32 | yaml.dump(python_object, f, default_flow_style=False) 33 | 34 | 35 | def yaml_load(file_path): 36 | with open(file_path, 'r') as f: 37 | return yaml.load(f) 38 | -------------------------------------------------------------------------------- /pytorch-cpn/utils/transforms.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import scipy.misc 4 | import matplotlib.pyplot as plt 5 | import torch 6 | 7 | from .misc import * 8 | from .imutils import * 9 | 10 | 11 | def color_normalize(x, mean): 12 | if x.size(0) == 1: 13 | x = x.repeat(3, 1, 1) 14 | normalized_mean = mean / 255 15 | for t, m in zip(x, normalized_mean): 16 | t.sub_(m) 17 | return x 18 | 19 | 20 | def flip_back(flip_output, dataset='mpii'): 21 | """ 22 | flip output map 23 | """ 24 | if dataset == 'mpii': 25 | matchedParts = ( 26 | [0,5], [1,4], [2,3], 27 | [10,15], [11,14], [12,13] 28 | ) 29 | else: 30 | print('Not supported dataset: ' + dataset) 31 | 32 | # flip output horizontally 33 | flip_output = fliplr(flip_output.numpy()) 34 | 35 | # Change left-right parts 36 | for pair in matchedParts: 37 | tmp = np.copy(flip_output[:, pair[0], :, :]) 38 | flip_output[:, pair[0], :, :] = flip_output[:, pair[1], :, :] 39 | flip_output[:, pair[1], :, :] = tmp 40 | 41 | return torch.from_numpy(flip_output).float() 42 | 43 | 44 | def shufflelr(x, width, dataset='mpii'): 45 | """ 46 | flip coords 47 | """ 48 | if dataset == 'mpii': 49 | matchedParts = ( 50 | [0,5], [1,4], [2,3], 51 | [10,15], [11,14], [12,13] 52 | ) 53 | else: 54 | print('Not supported dataset: ' + dataset) 55 | 56 | # Flip horizontal 57 | x[:, 0] = width - x[:, 0] 58 | 59 | # Change left-right parts 60 | for pair in matchedParts: 61 | tmp = x[pair[0], :].clone() 62 | x[pair[0], :] = x[pair[1], :] 63 | x[pair[1], :] = tmp 64 | 65 | return x 66 | 67 | 68 | def fliplr(x): 69 | if x.ndim == 3: 70 | x = np.transpose(np.fliplr(np.transpose(x, (0, 2, 1))), (0, 2, 1)) 71 | elif x.ndim == 4: 72 | for i in range(x.shape[0]): 73 | x[i] = np.transpose(np.fliplr(np.transpose(x[i], (0, 2, 1))), (0, 2, 1)) 74 | return x.astype(float) 75 | 76 | 77 | def get_transform(center, scale, res, rot=0): 78 | """ 79 | General image processing functions 80 | """ 81 | # Generate transformation matrix 82 | h = 200 * scale 83 | t = np.zeros((3, 3)) 84 | t[0, 0] = float(res[1]) / h 85 | t[1, 1] = float(res[0]) / h 86 | t[0, 2] = res[1] * (-float(center[0]) / h + .5) 87 | t[1, 2] = res[0] * (-float(center[1]) / h + .5) 88 | t[2, 2] = 1 89 | if not rot == 0: 90 | rot = -rot # To match direction of rotation from cropping 91 | rot_mat = np.zeros((3,3)) 92 | rot_rad = rot * np.pi / 180 93 | sn,cs = np.sin(rot_rad), np.cos(rot_rad) 94 | rot_mat[0,:2] = [cs, -sn] 95 | rot_mat[1,:2] = [sn, cs] 96 | rot_mat[2,2] = 1 97 | # Need to rotate around center 98 | t_mat = np.eye(3) 99 | t_mat[0,2] = -res[1]/2 100 | t_mat[1,2] = -res[0]/2 101 | t_inv = t_mat.copy() 102 | t_inv[:2,2] *= -1 103 | t = np.dot(t_inv,np.dot(rot_mat,np.dot(t_mat,t))) 104 | return t 105 | 106 | 107 | def transform(pt, center, scale, res, invert=0, rot=0): 108 | # Transform pixel location to different reference 109 | t = get_transform(center, scale, res, rot=rot) 110 | if invert: 111 | t = np.linalg.inv(t) 112 | new_pt = np.array([pt[0] - 1, pt[1] - 1, 1.]).T 113 | new_pt = np.dot(t, new_pt) 114 | return new_pt[:2].astype(int) + 1 115 | 116 | 117 | def transform_preds(coords, center, scale, res): 118 | for p in range(coords.size(0)): 119 | coords[p, 0:2] = to_torch(transform(coords[p, 0:2], center, scale, res, 1, 0)) 120 | return coords 121 | 122 | 123 | def crop(img, center, scale, res, rot=0): 124 | img = im_to_numpy(img) 125 | 126 | # Preprocessing for efficient cropping 127 | ht, wd = img.shape[0], img.shape[1] 128 | sf = scale * 200.0 / res[0] 129 | if sf < 2: 130 | sf = 1 131 | else: 132 | new_size = int(np.math.floor(max(ht, wd) / sf)) 133 | new_ht = int(np.math.floor(ht / sf)) 134 | new_wd = int(np.math.floor(wd / sf)) 135 | if new_size < 2: 136 | return torch.zeros(res[0], res[1], img.shape[2]) \ 137 | if len(img.shape) > 2 else torch.zeros(res[0], res[1]) 138 | else: 139 | img = scipy.misc.imresize(img, [new_ht, new_wd]) 140 | center = center * 1.0 / sf 141 | scale = scale / sf 142 | 143 | # Upper left point 144 | ul = np.array(transform([0, 0], center, scale, res, invert=1)) 145 | # Bottom right point 146 | br = np.array(transform(res, center, scale, res, invert=1)) 147 | 148 | # Padding so that when rotated proper amount of context is included 149 | pad = int(np.linalg.norm(br - ul) / 2 - float(br[1] - ul[1]) / 2) 150 | if not rot == 0: 151 | ul -= pad 152 | br += pad 153 | 154 | new_shape = [br[1] - ul[1], br[0] - ul[0]] 155 | if len(img.shape) > 2: 156 | new_shape += [img.shape[2]] 157 | new_img = np.zeros(new_shape) 158 | 159 | # Range to fill new array 160 | new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0] 161 | new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1] 162 | # Range to sample from original image 163 | old_x = max(0, ul[0]), min(len(img[0]), br[0]) 164 | old_y = max(0, ul[1]), min(len(img), br[1]) 165 | new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1], old_x[0]:old_x[1]] 166 | 167 | if not rot == 0: 168 | # Remove padding 169 | new_img = scipy.misc.imrotate(new_img, rot) 170 | new_img = new_img[pad:-pad, pad:-pad] 171 | 172 | new_img = im_to_torch(scipy.misc.imresize(new_img, res)) 173 | return new_img 174 | -------------------------------------------------------------------------------- /pytorch-cpn/utils/viz_segmask.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | 4 | palette = [128, 64, 128, 244, 35, 232, 70, 70, 70, 102, 102, 156, 5 | 190, 153, 153, 153, 153, 153, 250, 6 | 170, 30, 7 | 220, 220, 0, 107, 142, 35, 152, 251, 152, 8 | 70, 130, 180, 220, 20, 60, 255, 0, 0, 0, 0, 9 | 142, 0, 0, 70, 10 | 0, 60, 100, 0, 80, 100, 0, 0, 230, 119, 11, 32] 11 | zero_pad = 256 * 3 - len(palette) 12 | for i in range(zero_pad): 13 | palette.append(0) 14 | 15 | 16 | def colorize_mask(mask): 17 | # mask: numpy array of the mask 18 | new_mask = Image.fromarray(mask.astype(np.uint8)).convert('P') 19 | new_mask.putpalette(palette) 20 | return new_mask 21 | --------------------------------------------------------------------------------