├── .gitignore ├── LICENSE ├── README.md ├── checkpoint └── Experiments.txt ├── data └── coco.sh ├── opt.py ├── src ├── __init__.py ├── data_loader.py ├── eval.py ├── gaussian.py ├── model.py ├── requirements.txt └── utils.py ├── test.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | # 2 | *.pyc 3 | *.json 4 | *.pth.tar 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MULTIPOSENET: FAST MULTI-PERSON POSE ESTIMATION USING POSE RESIDUAL NETWORK 2 | SOFTWARE LICENSE AGREEMENT 3 | ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY 4 | 5 | BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT. 6 | IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE. 7 | 8 | This is a license agreement ("Agreement") between your academic institution or non-profit organization or self 9 | (called "Licensee" or "You" in this Agreement) and Carnegie Mellon University (called "Licensor" in this Agreement). 10 | All rights not specifically granted to you in this Agreement are reserved for Licensor. 11 | 12 | RESERVATION OF OWNERSHIP AND GRANT OF LICENSE: 13 | Licensor retains exclusive ownership of any copy of the Software (as defined below) licensed under this Agreement 14 | and hereby grants to Licensee a personal, non-exclusive, 15 | non-transferable license to use the Software for noncommercial research purposes, without the right to sublicense, 16 | pursuant to the terms and conditions of this Agreement. As used in this Agreement, the term "Software" means (i) 17 | the actual copy of all or any portion of code for program routines made accessible to Licensee by Licensor pursuant 18 | to this Agreement, inclusive of backups, updates, and/or merged copies permitted hereunder or subsequently supplied by 19 | Licensor, including all or any file structures, programming instructions, user interfaces and screen formats and 20 | sequences as well as any and all documentation and instructions related to it, and (ii) all or any derivatives and/or 21 | modifications created or made by You to any of the items specified in (i). 22 | 23 | CONFIDENTIALITY: Licensee acknowledges that the Software is proprietary to Licensor, and as such, Licensee agrees to 24 | receive all such materials in confidence and use the Software only in accordance with the terms of this Agreement. 25 | Licensee agrees to use reasonable effort to protect the Software from unauthorized use, reproduction, distribution, 26 | or publication. 27 | 28 | COPYRIGHT: The Software is owned by Licensor and is protected by United 29 | States copyright laws and applicable international treaties and/or conventions. 30 | 31 | PERMITTED USES: The Software may be used for your own noncommercial internal research purposes. You understand and 32 | agree that Licensor is not obligated to implement any suggestions and/or feedback you might provide regarding the 33 | Software, but to the extent Licensor does so, you are not entitled to any compensation related thereto. 34 | 35 | DERIVATIVES: You may create derivatives of or make modifications to the Software, however, You agree that all and 36 | any such derivatives and modifications will be owned by Licensor and become a part of the Software licensed to You 37 | under this Agreement. You may only use such derivatives and modifications for your own noncommercial internal 38 | research purposes, and you may not otherwise use, distribute or copy such derivatives and modifications in 39 | violation of this Agreement. 40 | 41 | BACKUPS: If Licensee is an organization, it may make that number of copies of the Software necessary for 42 | internal noncommercial use at a single site within its organization provided that all information appearing 43 | in or on the original labels, including the copyright and trademark notices are copied onto the labels of the copies. 44 | 45 | USES NOT PERMITTED: You may not distribute, copy or use the Software except as explicitly permitted herein. 46 | Licensee has not been granted any trademark license as part of this Agreement and may not use the name or mark 47 | “EpipolarPose", "Middle East Technical University" or any renditions thereof without 48 | the prior written permission of Licensor. 49 | 50 | You may not sell, rent, lease, sublicense, lend, time-share or transfer, in whole or in part, or provide third 51 | parties access to prior or present versions (or any parts thereof) of the Software. 52 | 53 | ASSIGNMENT: You may not assign this Agreement or your rights hereunder without the prior written consent of Licensor. 54 | Any attempted assignment without such consent shall be null and void. 55 | 56 | TERM: The term of the license granted by this Agreement is from Licensee's acceptance of this Agreement by 57 | downloading the Software or by using the Software until terminated as provided below. 58 | 59 | The Agreement automatically terminates without notice if you fail to comply with any provision of this Agreement. 60 | Licensee may terminate this Agreement by ceasing using the Software. Upon any termination of this Agreement, 61 | Licensee will delete any and all copies of the Software. You agree that all provisions which operate to protect 62 | the proprietary rights of Licensor shall remain in force should breach occur and that the obligation of 63 | confidentiality described in this Agreement is binding in perpetuity and, as such, survives the term of the Agreement. 64 | 65 | FEE: Provided Licensee abides completely by the terms and conditions of this Agreement, there is no fee due to 66 | Licensor for Licensee's use of the Software in accordance with this Agreement. 67 | 68 | DISCLAIMER OF WARRANTIES: THE SOFTWARE IS PROVIDED "AS-IS" WITHOUT WARRANTY OF ANY KIND INCLUDING ANY WARRANTIES 69 | OF PERFORMANCE OR MERCHANTABILITY OR FITNESS FOR A PARTICULAR USE OR PURPOSE OR OF NON-INFRINGEMENT. LICENSEE BEARS 70 | ALL RISK RELATING TO QUALITY AND PERFORMANCE OF THE SOFTWARE AND RELATED MATERIALS. 71 | 72 | SUPPORT AND MAINTENANCE: No Software support or training by the Licensor is provided as part of this Agreement. 73 | 74 | EXCLUSIVE REMEDY AND LIMITATION OF LIABILITY: To the maximum extent permitted under applicable law, Licensor 75 | shall not be liable for direct, indirect, special, incidental, or consequential damages or lost profits related to 76 | Licensee's use of and/or inability to use the Software, even if Licensor is advised of the possibility of such damage. 77 | 78 | EXPORT REGULATION: Licensee agrees to comply with any and all applicable 79 | U.S. export control laws, regulations, and/or other laws related to embargoes and sanction programs administered 80 | by the Office of Foreign Assets Control. 81 | 82 | SEVERABILITY: If any provision(s) of this Agreement shall be held to be invalid, illegal, or unenforceable by a court 83 | or other tribunal of competent jurisdiction, the validity, legality and enforceability of the remaining provisions 84 | shall not in any way be affected or impaired thereby. 85 | 86 | NO IMPLIED WAIVERS: No failure or delay by Licensor in enforcing any right or remedy under this Agreement shall be 87 | construed as a waiver of any future or other exercise of such right or remedy by Licensor. 88 | 89 | ENTIRE AGREEMENT AND AMENDMENTS: This Agreement constitutes the sole and entire agreement between Licensee and 90 | Licensor as to the matter set forth herein and supersedes any previous agreements, understandings, 91 | and arrangements between the parties relating hereto. 92 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pose Residual Network 2 | 3 | This repository contains a PyTorch implementation of the Pose Residual Network (PRN) presented in our ECCV 2018 paper: 4 | 5 | Muhammed Kocabas, Salih Karagoz, Emre Akbas. MultiPoseNet: Fast Multi-Person Pose Estimation using Pose Residual Network. In ECCV, 2018. [arxiv](https://arxiv.org/abs/1807.04067) 6 | 7 | PRN is described in Section 3.2 of the paper. 8 | 9 | ## Getting Started 10 | We have tested our method on [Coco Dataset](http://cocodataset.org) 11 | 12 | ### Prerequisites 13 | 14 | ``` 15 | python 16 | pytorch 17 | numpy 18 | tqdm 19 | pycocotools 20 | progress 21 | scikit-image 22 | ``` 23 | 24 | ### Installing 25 | 26 | 1. Clone this repository 27 | `git clone https://github.com/salihkaragoz/pose-residual-network-pytorch.git` 28 | 29 | 2. Install [Pytorch](https://pytorch.org/) 30 | 31 | 3. `pip install -r src/requirements.txt` 32 | 33 | 4. To download COCO dataset train2017 and val2017 annotations run: `bash data/coco.sh`. (data size: ~240Mb) 34 | 35 | ## Training 36 | 37 | `python train.py` 38 | 39 | For more options look at opt.py 40 | 41 | ## Testing 42 | 43 | 1. Download pre-train [model](https://drive.google.com/file/d/1OhdMllLGnpRAk6Wexw8LzXF_EHiolVj1/view?usp=sharing) 44 | 45 | 2. `python test.py --test_cp=PathToPreTrainModel/PRN.pth.tar` 46 | 47 | ## Results 48 | Results on COCO val2017 Ground Truth data. 49 | 50 | ``` 51 | Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.892 52 | Average Precision (AP) @[ IoU=0.50 | area= all | maxDets= 20 ] = 0.978 53 | Average Precision (AP) @[ IoU=0.75 | area= all | maxDets= 20 ] = 0.921 54 | Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.883 55 | Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.912 56 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.917 57 | Average Recall (AR) @[ IoU=0.50 | area= all | maxDets= 20 ] = 0.982 58 | Average Recall (AR) @[ IoU=0.75 | area= all | maxDets= 20 ] = 0.937 59 | Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.902 60 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.944 61 | 62 | ``` 63 | 64 | ## License 65 | 66 | ## Citation 67 | If you find this code useful for your research, please consider citing our paper: 68 | ``` 69 | @Inproceedings{kocabas18prn, 70 | Title = {Multi{P}ose{N}et: Fast Multi-Person Pose Estimation using Pose Residual Network}, 71 | Author = {Kocabas, Muhammed and Karagoz, Salih and Akbas, Emre}, 72 | Booktitle = {European Conference on Computer Vision (ECCV)}, 73 | Year = {2018} 74 | } 75 | ``` 76 | -------------------------------------------------------------------------------- /checkpoint/Experiments.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /data/coco.sh: -------------------------------------------------------------------------------- 1 | wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip 2 | unzip annotations_trainval2017.zip 3 | rm annotations_trainval2017.zip 4 | -------------------------------------------------------------------------------- /opt.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pprint import pprint 3 | 4 | 5 | class Options: 6 | def __init__(self): 7 | self.parser = argparse.ArgumentParser() 8 | self.opt = None 9 | 10 | def _initial(self): 11 | 12 | # -------------------------- General Training Options 13 | self.parser.add_argument('--lr', type=float, default=1.0e-3, help='Learning Rate') 14 | self.parser.add_argument('--lr_gamma', type=float, default=0.9, help='Gamma Rate') 15 | self.parser.add_argument('--number_of_epoch', type=int, default=16) 16 | self.parser.add_argument('--num_workers', type=int, default=4) 17 | self.parser.add_argument('--batch_size', type=int, default=8) 18 | self.parser.add_argument('--node_count', type=int, default=1024, help='Hidden Layer Node Count') 19 | # -------------------------- General Training Options 20 | 21 | self.parser.add_argument('--exp', type=str, default='test/', help='Experiment name') 22 | 23 | # -------------------------- 24 | self.parser.add_argument('--coeff', type=int, default=2, help='Coefficient of bbox size') 25 | self.parser.add_argument('--threshold', type=int, default=0.21, help='BBOX threshold') 26 | self.parser.add_argument('--test_cp', type=str,default='checkpoint/test/default.pth.tar' ,help='Path to model for testing') 27 | self.parser.add_argument('--num_of_keypoints', type=int, default=3, help='Minimum number of keypoints for each bbox in training') 28 | self.parser.add_argument('--test_keypoint_count', type=int, default=0, help='Validating with different keypoint count') 29 | self.parser.add_argument('--window_size', type=int, default=15, help='Windows size for cropping') 30 | # -------------------------- 31 | 32 | def _print(self): 33 | print("\n==================Options=================") 34 | pprint(vars(self.opt), indent=4) 35 | print("==========================================\n") 36 | 37 | def parse(self): 38 | self._initial() 39 | self.opt = self.parser.parse_args() 40 | self._print() 41 | return self.opt 42 | 43 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | sys.path.append(os.path.join(os.path.dirname(__file__), "progress")) 5 | -------------------------------------------------------------------------------- /src/data_loader.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | from skimage.filters import gaussian 4 | from torch.utils.data import Dataset 5 | 6 | class CocoDataset(Dataset): 7 | def __init__(self,coco_train,opt): 8 | self.coco_train = coco_train 9 | self.num_of_keypoints = opt.num_of_keypoints 10 | self.anns = self.get_anns(self.coco_train) 11 | self.bbox_height = opt.coeff *28 12 | self.bbox_width = opt.coeff *18 13 | self.threshold = opt.threshold 14 | 15 | def __len__(self): 16 | return len(self.anns) 17 | 18 | def __getitem__(self, item): 19 | ann_data = self.anns[item] 20 | 21 | input, label = self.get_data(ann_data, self.coco_train ) 22 | 23 | return input, label 24 | 25 | def get_data(self, ann_data, coco): 26 | weights = np.zeros((self.bbox_height, self.bbox_width, 17)) 27 | output = np.zeros((self.bbox_height, self.bbox_width, 17)) 28 | 29 | bbox = ann_data['bbox'] 30 | x = int(bbox[0]) 31 | y = int(bbox[1]) 32 | w = float(bbox[2]) 33 | h = float(bbox[3]) 34 | 35 | x_scale = float(self.bbox_width) / math.ceil(w) 36 | y_scale = float(self.bbox_height) / math.ceil(h) 37 | 38 | kpx = ann_data['keypoints'][0::3] 39 | kpy = ann_data['keypoints'][1::3] 40 | kpv = ann_data['keypoints'][2::3] 41 | 42 | 43 | for j in range(17): 44 | if kpv[j] > 0: 45 | x0 = int((kpx[j] - x) * x_scale) 46 | y0 = int((kpy[j] - y) * y_scale) 47 | 48 | if x0 >= self.bbox_width and y0 >= self.bbox_height: 49 | output[self.bbox_height - 1, self.bbox_width - 1, j] = 1 50 | elif x0 >= self.bbox_width: 51 | output[y0, self.bbox_width - 1, j] = 1 52 | elif y0 >= self.bbox_height: 53 | try: 54 | output[self.bbox_height - 1, x0, j] = 1 55 | except: 56 | output[self.bbox_height - 1, 0, j] = 1 57 | elif x0 < 0 and y0 < 0: 58 | output[0, 0, j] = 1 59 | elif x0 < 0: 60 | output[y0, 0, j] = 1 61 | elif y0 < 0: 62 | output[0, x0, j] = 1 63 | else: 64 | output[y0, x0, j] = 1 65 | 66 | img_id = ann_data['image_id'] 67 | img_data = coco.loadImgs(img_id)[0] 68 | ann_data = coco.loadAnns(coco.getAnnIds(img_data['id'])) 69 | 70 | for ann in ann_data: 71 | kpx = ann['keypoints'][0::3] 72 | kpy = ann['keypoints'][1::3] 73 | kpv = ann['keypoints'][2::3] 74 | 75 | for j in range(17): 76 | if kpv[j] > 0: 77 | if (kpx[j] > bbox[0] - bbox[2] * self.threshold and kpx[j] < bbox[0] + bbox[2] * (1 + self.threshold)): 78 | if (kpy[j] > bbox[1] - bbox[3] * self.threshold and kpy[j] < bbox[1] + bbox[3] * (1 + self.threshold)): 79 | x0 = int((kpx[j] - x) * x_scale) 80 | y0 = int((kpy[j] - y) * y_scale) 81 | 82 | if x0 >= self.bbox_width and y0 >= self.bbox_height: 83 | weights[self.bbox_height - 1, self.bbox_width - 1, j] = 1 84 | elif x0 >= self.bbox_width: 85 | weights[y0, self.bbox_width - 1, j] = 1 86 | elif y0 >= self.bbox_height: 87 | weights[self.bbox_height - 1, x0, j] = 1 88 | elif x0 < 0 and y0 < 0: 89 | weights[0, 0, j] = 1 90 | elif x0 < 0: 91 | weights[y0, 0, j] = 1 92 | elif y0 < 0: 93 | weights[0, x0, j] = 1 94 | else: 95 | weights[y0, x0, j] = 1 96 | 97 | for t in range(17): 98 | weights[:, :, t] = gaussian(weights[:, :, t]) 99 | output = gaussian(output, sigma=2, mode='constant', multichannel=True) 100 | # weights = gaussian_multi_input_mp(weights) 101 | # output = gaussian_multi_output(output) 102 | return weights, output 103 | 104 | def get_anns(self, coco): 105 | #:param coco: COCO instance 106 | #:return: anns: List of annotations that contain person with at least 6 keypoints 107 | ann_ids = coco.getAnnIds() 108 | anns = [] 109 | for i in ann_ids: 110 | ann = coco.loadAnns(i)[0] 111 | if ann['iscrowd'] == 0 and ann['num_keypoints'] > self.num_of_keypoints: 112 | anns.append(ann) # ann 113 | sorted_list = sorted(anns, key=lambda k: k['num_keypoints'], reverse=True) 114 | return sorted_list 115 | -------------------------------------------------------------------------------- /src/eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import json 4 | import argparse 5 | import numpy as np 6 | from tqdm import tqdm 7 | from random import shuffle 8 | 9 | from pycocotools.coco import COCO 10 | from pycocotools.cocoeval import COCOeval 11 | from .gaussian import gaussian, crop, gaussian_multi_input_mp 12 | 13 | import torch 14 | 15 | def Evaluation(model,optin): 16 | print ('------------Evaulation Started------------') 17 | coeff = optin.coeff 18 | in_thres = optin.threshold 19 | test_keypoint_count = optin.test_keypoint_count 20 | n_kernel = optin.window_size 21 | modelname = 'temporary' 22 | 23 | model.eval() 24 | 25 | cocodir = 'data/annotations/person_keypoints_val2017.json' 26 | ann = json.load(open(cocodir)) 27 | bbox_results = ann['annotations'] 28 | 29 | coco = COCO(cocodir) 30 | img_ids = coco.getImgIds(catIds=[1]) 31 | 32 | peak_results = [] 33 | 34 | for i in img_ids: 35 | anns = coco.loadAnns(coco.getAnnIds(imgIds=i)) 36 | kps = [a['keypoints'] for a in anns] 37 | 38 | idx = 0 39 | 40 | ks = [] 41 | for i in range(17): 42 | t = [] 43 | for k in kps: 44 | x = k[0::3][i] 45 | y = k[1::3][i] 46 | v = k[2::3][i] 47 | 48 | if v > 0: 49 | t.append([x, y, 1, idx]) 50 | idx += 1 51 | ks.append(t) 52 | image_id = anns[0]['image_id'] 53 | peaks = ks 54 | 55 | element = { 56 | 'image_id': image_id, 57 | 'peaks': peaks, 58 | 'file_name': coco.loadImgs(image_id)[0]['file_name'] 59 | } 60 | 61 | peak_results.append(element) 62 | 63 | shuffle(peak_results) 64 | 65 | my_results = [] 66 | image_ids = [] 67 | 68 | w = int(18 * coeff) 69 | h = int(28 * coeff) 70 | 71 | temporary_peak_res = [] 72 | for p in peak_results: 73 | if (sum(1 for i in p['peaks'] if i != []) >= test_keypoint_count): 74 | temporary_peak_res.append(p) 75 | peak_results = temporary_peak_res 76 | 77 | for p in tqdm(peak_results): 78 | idx = p['image_id'] 79 | image_ids.append(idx) 80 | 81 | peaks = p['peaks'] 82 | bboxes = [k['bbox'] for k in bbox_results if k['image_id'] == idx] 83 | 84 | 85 | if len(bboxes) == 0 or len(peaks) == 0: 86 | continue 87 | 88 | weights_bbox = np.zeros((len(bboxes), h, w, 4, 17)) 89 | 90 | for joint_id, peak in enumerate(peaks): 91 | 92 | 93 | for instance_id, instance in enumerate(peak): 94 | 95 | p_x = instance[0] 96 | p_y = instance[1] 97 | 98 | for bbox_id, b in enumerate(bboxes): 99 | 100 | is_inside = p_x > b[0] - b[2] * in_thres and \ 101 | p_y > b[1] - b[3] * in_thres and \ 102 | p_x < b[0] + b[2] * (1.0 + in_thres) and \ 103 | p_y < b[1] + b[3] * (1.0 + in_thres) 104 | 105 | if is_inside: 106 | x_scale = float(w) / math.ceil(b[2]) 107 | y_scale = float(h) / math.ceil(b[3]) 108 | 109 | x0 = int((p_x - b[0]) * x_scale) 110 | y0 = int((p_y - b[1]) * y_scale) 111 | 112 | if x0 >= w and y0 >= h: 113 | x0 = w - 1 114 | y0 = h - 1 115 | elif x0 >= w: 116 | x0 = w - 1 117 | elif y0 >= h: 118 | y0 = h - 1 119 | elif x0 < 0 and y0 < 0: 120 | x0 = 0 121 | y0 = 0 122 | elif x0 < 0: 123 | x0 = 0 124 | elif y0 < 0: 125 | y0 = 0 126 | 127 | p = 1e-9 128 | 129 | weights_bbox[bbox_id, y0, x0, :, joint_id] = [1, instance[2], instance[3], p] 130 | 131 | old_weights_bbox = np.copy(weights_bbox) 132 | 133 | for j in range(weights_bbox.shape[0]): 134 | for t in range(17): 135 | weights_bbox[j, :, :, 0, t] = gaussian(weights_bbox[j, :, :, 0, t]) 136 | # weights_bbox[j, :, :, 0, :] = gaussian_multi_input_mp(weights_bbox[j, :, :, 0, :]) 137 | 138 | output_bbox = [] 139 | for j in range(weights_bbox.shape[0]): 140 | inp = weights_bbox[j, :, :, 0, :] 141 | input = torch.from_numpy(np.expand_dims(inp, axis=0)).cuda().float() 142 | output = model(input) 143 | temp = np.reshape(output.data.cpu().numpy(), (56,36,17)) 144 | output_bbox.append(temp) 145 | 146 | output_bbox = np.array(output_bbox) 147 | 148 | keypoints_score = [] 149 | 150 | for t in range(17): 151 | indexes = np.argwhere(old_weights_bbox[:, :, :, 0, t] == 1) 152 | keypoint = [] 153 | for i in indexes: 154 | cr = crop(output_bbox[i[0], :, :, t], (i[1], i[2]), N=n_kernel) 155 | score = np.sum(cr) 156 | 157 | kp_id = old_weights_bbox[i[0], i[1], i[2], 2, t] 158 | kp_score = old_weights_bbox[i[0], i[1], i[2], 1, t] 159 | p_score = old_weights_bbox[i[0], i[1], i[2], 3, t] ## ?? 160 | bbox_id = i[0] 161 | 162 | score = kp_score * score 163 | 164 | s = [kp_id, bbox_id, kp_score, score] 165 | 166 | keypoint.append(s) 167 | keypoints_score.append(keypoint) 168 | 169 | bbox_keypoints = np.zeros((weights_bbox.shape[0], 17, 3)) 170 | bbox_ids = np.arange(len(bboxes)).tolist() 171 | 172 | # kp_id, bbox_id, kp_score, my_score 173 | for i in range(17): 174 | joint_keypoints = keypoints_score[i] 175 | if len(joint_keypoints) > 0: 176 | 177 | kp_ids = list(set([x[0] for x in joint_keypoints])) 178 | 179 | table = np.zeros((len(bbox_ids), len(kp_ids), 4)) 180 | 181 | for b_id, bbox in enumerate(bbox_ids): 182 | for k_id, kp in enumerate(kp_ids): 183 | own = [x for x in joint_keypoints if x[0] == kp and x[1] == bbox] 184 | 185 | if len(own) > 0: 186 | table[bbox, k_id] = own[0] 187 | else: 188 | table[bbox, k_id] = [0] * 4 189 | 190 | for b_id, bbox in enumerate(bbox_ids): 191 | 192 | row = np.argsort(-table[bbox, :, 3]) 193 | 194 | if table[bbox, row[0], 3] > 0: 195 | for r in row: 196 | if table[bbox, r, 3] > 0: 197 | column = np.argsort(-table[:, r, 3]) 198 | 199 | if bbox == column[0]: 200 | bbox_keypoints[bbox, i, :] = [x[:3] for x in peaks[i] if x[3] == table[bbox, r, 0]][0] 201 | break 202 | else: 203 | row2 = np.argsort(table[column[0], :, 3]) 204 | if row2[0] == r: 205 | bbox_keypoints[bbox, i, :] = \ 206 | [x[:3] for x in peaks[i] if x[3] == table[bbox, r, 0]][0] 207 | break 208 | else: 209 | for j in range(weights_bbox.shape[0]): 210 | b = bboxes[j] 211 | x_scale = float(w) / math.ceil(b[2]) 212 | y_scale = float(h) / math.ceil(b[3]) 213 | 214 | for t in range(17): 215 | indexes = np.argwhere(old_weights_bbox[j, :, :, 0, t] == 1) 216 | if len(indexes) == 0: 217 | max_index = np.argwhere(output_bbox[j, :, :, t] == np.max(output_bbox[j, :, :, t])) 218 | bbox_keypoints[j, t, :] = [max_index[0][1] / x_scale + b[0], 219 | max_index[0][0] / y_scale + b[1], 0] 220 | 221 | my_keypoints = [] 222 | 223 | for i in range(bbox_keypoints.shape[0]): 224 | k = np.zeros(51) 225 | k[0::3] = bbox_keypoints[i, :, 0] 226 | k[1::3] = bbox_keypoints[i, :, 1] 227 | k[2::3] = [2] * 17 228 | 229 | pose_score = 0 230 | count = 0 231 | for f in range(17): 232 | if bbox_keypoints[i, f, 0] != 0 and bbox_keypoints[i, f, 1] != 0: 233 | count += 1 234 | pose_score += bbox_keypoints[i, f, 2] 235 | pose_score /= 17.0 236 | 237 | my_keypoints.append(k) 238 | 239 | image_data = { 240 | 'image_id': idx, 241 | 'bbox': bboxes[i], 242 | 'score': pose_score, 243 | 'category_id': 1, 244 | 'keypoints': k.tolist() 245 | } 246 | my_results.append(image_data) 247 | 248 | 249 | ann_filename = 'data/val2017_PRN_keypoint_results_{}.json'.format(modelname) 250 | # write output 251 | json.dump(my_results, open(ann_filename, 'w'), indent=4) 252 | 253 | # load results in COCO evaluation tool 254 | coco_pred = coco.loadRes(ann_filename) 255 | 256 | # run COCO evaluation 257 | coco_eval = COCOeval(coco, coco_pred, 'keypoints') 258 | coco_eval.params.imgIds = image_ids 259 | coco_eval.evaluate() 260 | coco_eval.accumulate() 261 | coco_eval.summarize() 262 | 263 | os.remove(ann_filename) 264 | 265 | 266 | -------------------------------------------------------------------------------- /src/gaussian.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from skimage.filters import gaussian 3 | 4 | sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89] * 100) 5 | 6 | 7 | def multivariate_gaussian(N, sigma=2): 8 | t = 4 9 | X = np.linspace(-t, t, N) 10 | Y = np.linspace(-t, t, N) 11 | X, Y = np.meshgrid(X, Y) 12 | pos = np.empty(X.shape + (2,)) 13 | pos[:, :, 0] = X 14 | pos[:, :, 1] = Y 15 | mu = np.array([0., 0.]) 16 | sigma = np.array([[sigma, 0], [0, sigma]]) 17 | n = mu.shape[0] 18 | Sigma_det = np.linalg.det(sigma) 19 | Sigma_inv = np.linalg.inv(sigma) 20 | N = np.sqrt((2 * np.pi) ** n * Sigma_det) 21 | fac = np.einsum('...k,kl,...l->...', pos - mu, Sigma_inv, pos - mu) 22 | return np.exp(-fac / 2) / N 23 | 24 | 25 | def crop_paste(img, c, N=13, sigma=2): 26 | Z = multivariate_gaussian(N, sigma) 27 | 28 | H = img.shape[1] 29 | W = img.shape[0] 30 | 31 | h = (Z.shape[0] - 1) / 2 32 | 33 | N = Z.shape[0] 34 | x1 = (c[0] - h) 35 | y1 = (c[1] - h) 36 | 37 | x2 = (c[0] + h) + 1 38 | y2 = (c[1] + h) + 1 39 | 40 | zx1 = 0 41 | zy1 = 0 42 | zx2 = N + 1 43 | zy2 = N + 1 44 | 45 | if x1 < 0: 46 | x1 = 0 47 | zx1 = 0 - (c[0] - h) 48 | 49 | if y1 < 0: 50 | y1 = 0 51 | zy1 = 0 - (c[1] - h) 52 | 53 | if x2 > W - 1: 54 | x2 = W - 1 55 | zx2 = x2 - x1 + 1 56 | x2 = W 57 | 58 | if y2 > H - 1: 59 | y2 = H - 1 60 | zy2 = y2 - y1 + 1 61 | y2 = H 62 | 63 | img[x1:x2, y1:y2] = np.maximum(Z[zx1:zx2, zy1:zy2], img[x1:x2, y1:y2]) 64 | 65 | 66 | ''' 67 | def gaussian(img, N = 13, sigma=2): 68 | cs = np.where(img==1) 69 | img = np.zeros_like(img) 70 | for c in zip(cs[0], cs[1]): 71 | crop_paste(img, c, N, sigma) 72 | return img 73 | ''' 74 | 75 | 76 | def gaussian_multi_input_mp(inp): 77 | ''' 78 | :param inp: Multi person ground truth heatmap input (17 ch) Each channel contains multiple joints. 79 | :return: out: Gaussian augmented output. Values are between 0. and 1. 80 | ''' 81 | 82 | h, w, ch = inp.shape 83 | out = np.zeros_like(inp) 84 | for i in range(ch): 85 | layer = inp[:, :, i] 86 | ind = np.argwhere(layer == 1) 87 | b = [] 88 | if len(ind) > 0: 89 | for j in ind: 90 | t = np.zeros((h, w)) 91 | t[j[0], j[1]] = 1 92 | t = gaussian(t, sigma=2, mode='constant') 93 | t = t * (1 / t.max()) 94 | b.append(t) 95 | 96 | out[:, :, i] = np.maximum.reduce(b) 97 | else: 98 | out[:, :, i] = np.zeros((h, w)) 99 | return out 100 | 101 | 102 | def gaussian_multi_output(inp): 103 | ''' 104 | :param inp: Single person ground truth heatmap input (17 ch) Each channel contains one joint. 105 | :return: out: Gaussian augmented output. Values are between 0. and 1. 106 | ''' 107 | h, w, ch = inp.shape 108 | out = np.zeros_like(inp) 109 | for i in range(ch): 110 | j = np.argwhere(inp[:, :, i] == 1) 111 | if len(j) == 0: 112 | out[:, :, i] = np.zeros((h, w)) 113 | continue 114 | j = j[0] 115 | t = np.zeros((h, w)) 116 | t[j[0], j[1]] = 1 117 | t = gaussian(t, sigma=5, mode='constant') 118 | out[:, :, i] = t * (1 / t.max()) 119 | return out 120 | 121 | 122 | def crop(img, c, N=13): 123 | H = img.shape[1] 124 | W = img.shape[0] 125 | 126 | h = (N - 1) / 2 127 | 128 | x1 = int(c[0] - h) 129 | y1 = int(c[1] - h) 130 | 131 | x2 = int(c[0] + h) + 1 132 | y2 = int(c[1] + h) + 1 133 | 134 | if x1 < 0: 135 | x1 = 0 136 | 137 | if y1 < 0: 138 | y1 = 0 139 | 140 | if x2 > W - 1: 141 | x2 = W 142 | 143 | if y2 > H - 1: 144 | y2 = H 145 | 146 | return img[x1:x2, y1:y2] 147 | 148 | -------------------------------------------------------------------------------- /src/model.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from torch import nn 4 | import torch.nn.functional as F 5 | 6 | class Flatten(nn.Module): 7 | def forward(self, input): 8 | return input.view(input.size(0), -1) 9 | 10 | 11 | class Add(nn.Module): 12 | def forward(self, input1, input2): 13 | return torch.add(input1, input2) 14 | 15 | class PRN(nn.Module): 16 | def __init__(self,node_count,coeff): 17 | super(PRN, self).__init__() 18 | self.flatten = Flatten() 19 | self.height = coeff*28 20 | self.width = coeff*18 21 | self.dens1 = nn.Linear(self.height*self.width*17, node_count) 22 | self.bneck = nn.Linear(node_count, node_count) 23 | self.dens2 = nn.Linear(node_count, self.height*self.width*17) 24 | self.drop = nn.Dropout() 25 | self.add = Add() 26 | self.softmax = nn.Softmax(dim=1) 27 | 28 | def forward(self, x): 29 | res = self.flatten(x) 30 | out = self.drop(F.relu(self.dens1(res))) 31 | out = self.drop(F.relu(self.bneck(out))) 32 | out = F.relu(self.dens2(out)) 33 | out = self.add(out,res) 34 | out = self.softmax(out) 35 | out = out.view(out.size()[0],self.height, self.width, 17) 36 | 37 | return out 38 | 39 | -------------------------------------------------------------------------------- /src/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | tqdm 3 | pycocotools 4 | progress 5 | scikit-image -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import torch 4 | 5 | def save_options(opt, path,model,criterion, optimizer): 6 | file_path = os.path.join(path, 'opt.json') 7 | model_struc = model.__str__() 8 | model_struc = {'Model': model_struc, 'Loss Function': criterion, 'Optimizer': optimizer} 9 | 10 | with open(file_path, 'w') as f: 11 | f.write(json.dumps(vars(opt), sort_keys=True, indent=4)) 12 | f.write(json.dumps(model_struc, sort_keys=True, indent=4)) 13 | 14 | 15 | def save_model(state, checkpoint, filename='checkpoint.pth.tar'): 16 | filename = 'epoch'+str(state['epoch']) + filename 17 | filepath = os.path.join(checkpoint, filename) 18 | torch.save(state, filepath) 19 | 20 | def adjust_lr(optimizer, epoch, gamma): 21 | schedule = list(range(3,32,2)) 22 | """Sets the learning rate to the initial LR decayed by schedule""" 23 | if epoch in schedule: 24 | for param_group in optimizer.param_groups: 25 | param_group['lr'] *= gamma 26 | return optimizer.state_dict()['param_groups'][0]['lr'] 27 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | from opt import Options 4 | from src.eval import Evaluation 5 | from src.model import PRN 6 | 7 | 8 | 9 | if __name__ == "__main__": 10 | option = Options().parse() 11 | 12 | model = PRN(option.node_count, option.coeff).cuda() 13 | checkpoint = torch.load(option.test_cp) 14 | model.load_state_dict(checkpoint['state_dict']) 15 | 16 | Evaluation(model, option) 17 | 18 | 19 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tqdm import tqdm 3 | from progress.bar import Bar 4 | from pycocotools.coco import COCO 5 | 6 | import torch 7 | import torch.backends.cudnn as cudnn 8 | from torch.utils.data import DataLoader 9 | 10 | from opt import Options 11 | from src.model import PRN 12 | from src.eval import Evaluation 13 | from src.utils import save_options 14 | from src.utils import save_model, adjust_lr 15 | from src.data_loader import CocoDataset 16 | 17 | 18 | def main(optin): 19 | if not os.path.exists('checkpoint/'+optin.exp): 20 | os.makedirs('checkpoint/'+optin.exp) 21 | 22 | model = PRN(optin.node_count,optin.coeff).cuda() 23 | #model = torch.nn.DataParallel(model).cuda() 24 | optimizer = torch.optim.Adam(model.parameters(), lr=optin.lr) 25 | criterion = torch.nn.BCELoss().cuda() 26 | 27 | print (model) 28 | print(">>> total params: {:.2f}M".format(sum(p.numel() for p in model.parameters()) / 1000000.0)) 29 | 30 | save_options(optin, os.path.join('checkpoint/' + optin.exp), model.__str__(), criterion.__str__(), optimizer.__str__()) 31 | 32 | print ('---------Loading Coco Training Set--------') 33 | coco_train = COCO(os.path.join('data/annotations/person_keypoints_train2017.json')) 34 | trainloader = DataLoader(dataset=CocoDataset(coco_train,optin),batch_size=optin.batch_size, num_workers=optin.num_workers, shuffle=True) 35 | 36 | bar = Bar('-->', fill='>', max=len(trainloader)) 37 | 38 | cudnn.benchmark = True 39 | for epoch in range(optin.number_of_epoch): 40 | print ('-------------Training Epoch {}-------------'.format(epoch)) 41 | print ('Total Step:', len(trainloader), '| Total Epoch:', optin.number_of_epoch) 42 | lr = adjust_lr(optimizer, epoch, optin.lr_gamma) 43 | print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr)) 44 | for idx, (input, label) in tqdm(enumerate(trainloader)): 45 | 46 | input = input.cuda().float() 47 | label = label.cuda().float() 48 | 49 | outputs = model(input) 50 | 51 | optimizer.zero_grad() 52 | loss = criterion(outputs, label) 53 | loss.backward() 54 | optimizer.step() 55 | 56 | if idx % 200 == 0: 57 | bar.suffix = 'Epoch: {epoch} Total: {ttl} | ETA: {eta:} | loss:{loss}' \ 58 | .format(ttl=bar.elapsed_td, eta=bar.eta_td, loss=loss.data, epoch=epoch) 59 | bar.next() 60 | 61 | Evaluation(model, optin) 62 | 63 | save_model({ 64 | 'epoch': epoch + 1, 65 | 'state_dict': model.state_dict(), 66 | 'optimizer' : optimizer.state_dict(), 67 | }, checkpoint='checkpoint/' + optin.exp) 68 | 69 | model.train() 70 | 71 | if __name__ == "__main__": 72 | option = Options().parse() 73 | main(option) 74 | --------------------------------------------------------------------------------