├── .gitignore
├── LICENSE
├── README.md
├── checkpoint
    └── Experiments.txt
├── data
    └── coco.sh
├── opt.py
├── src
    ├── __init__.py
    ├── data_loader.py
    ├── eval.py
    ├── gaussian.py
    ├── model.py
    ├── requirements.txt
    └── utils.py
├── test.py
└── train.py


/.gitignore:
--------------------------------------------------------------------------------
1 | #
2 | *.pyc
3 | *.json
4 | *.pth.tar
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MULTIPOSENET: FAST MULTI-PERSON POSE ESTIMATION USING POSE RESIDUAL NETWORK
 2 | SOFTWARE LICENSE AGREEMENT
 3 | ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
 4 | 
 5 | BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
 6 | IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
 7 | 
 8 | This is a license agreement ("Agreement") between your academic institution or non-profit organization or self
 9 | (called "Licensee" or "You" in this Agreement) and Carnegie Mellon University (called "Licensor" in this Agreement).
10 | All rights not specifically granted to you in this Agreement are reserved for Licensor.
11 | 
12 | RESERVATION OF OWNERSHIP AND GRANT OF LICENSE:
13 | Licensor retains exclusive ownership of any copy of the Software (as defined below) licensed under this Agreement
14 | and hereby grants to Licensee a personal, non-exclusive,
15 | non-transferable license to use the Software for noncommercial research purposes, without the right to sublicense,
16 | pursuant to the terms and conditions of this Agreement.  As used in this Agreement, the term "Software" means (i)
17 | the actual copy of all or any portion of code for program routines made accessible to Licensee by Licensor pursuant
18 | to this Agreement, inclusive of backups, updates, and/or merged copies permitted hereunder or subsequently supplied by
19 | Licensor,  including all or any file structures, programming instructions, user interfaces and screen formats and
20 | sequences as well as any and all documentation and instructions related to it, and (ii) all or any derivatives and/or
21 | modifications created or made by You to any of the items specified in (i).
22 | 
23 | CONFIDENTIALITY: Licensee acknowledges that the Software is proprietary to Licensor, and as such, Licensee agrees to
24 | receive all such materials in confidence and use the Software only in accordance with the terms of this Agreement.
25 | Licensee agrees to use reasonable effort to protect the Software from unauthorized use, reproduction, distribution,
26 | or publication.
27 | 
28 | COPYRIGHT: The Software is owned by Licensor and is protected by United
29 | States copyright laws and applicable international treaties and/or conventions.
30 | 
31 | PERMITTED USES:  The Software may be used for your own noncommercial internal research purposes. You understand and
32 | agree that Licensor is not obligated to implement any suggestions and/or feedback you might provide regarding the
33 | Software, but to the extent Licensor does so, you are not entitled to any compensation related thereto.
34 | 
35 | DERIVATIVES: You may create derivatives of or make modifications to the Software, however, You agree that all and
36 | any such derivatives and modifications will be owned by Licensor and become a part of the Software licensed to You
37 | under this Agreement.  You may only use such derivatives and modifications for your own noncommercial internal
38 | research purposes, and you may not otherwise use, distribute or copy such derivatives and modifications in
39 | violation of this Agreement.
40 | 
41 | BACKUPS:  If Licensee is an organization, it may make that number of copies of the Software necessary for
42 | internal noncommercial use at a single site within its organization provided that all information appearing
43 | in or on the original labels, including the copyright and trademark notices are copied onto the labels of the copies.
44 | 
45 | USES NOT PERMITTED:  You may not distribute, copy or use the Software except as explicitly permitted herein.
46 | Licensee has not been granted any trademark license as part of this Agreement and may not use the name or mark
47 | “EpipolarPose", "Middle East Technical University" or any renditions thereof without
48 | the prior written permission of Licensor.
49 | 
50 | You may not sell, rent, lease, sublicense, lend, time-share or transfer, in whole or in part, or provide third
51 | parties access to prior or present versions (or any parts thereof) of the Software.
52 | 
53 | ASSIGNMENT: You may not assign this Agreement or your rights hereunder without the prior written consent of Licensor.
54 | Any attempted assignment without such consent shall be null and void.
55 | 
56 | TERM: The term of the license granted by this Agreement is from Licensee's acceptance of this Agreement by
57 | downloading the Software or by using the Software until terminated as provided below.
58 | 
59 | The Agreement automatically terminates without notice if you fail to comply with any provision of this Agreement.
60 | Licensee may terminate this Agreement by ceasing using the Software.  Upon any termination of this Agreement,
61 | Licensee will delete any and all copies of the Software. You agree that all provisions which operate to protect
62 | the proprietary rights of Licensor shall remain in force should breach occur and that the obligation of
63 | confidentiality described in this Agreement is binding in perpetuity and, as such, survives the term of the Agreement.
64 | 
65 | FEE: Provided Licensee abides completely by the terms and conditions of this Agreement, there is no fee due to
66 | Licensor for Licensee's use of the Software in accordance with this Agreement.
67 | 
68 | DISCLAIMER OF WARRANTIES:  THE SOFTWARE IS PROVIDED "AS-IS" WITHOUT WARRANTY OF ANY KIND INCLUDING ANY WARRANTIES
69 | OF PERFORMANCE OR MERCHANTABILITY OR FITNESS FOR A PARTICULAR USE OR PURPOSE OR OF NON-INFRINGEMENT.  LICENSEE BEARS
70 | ALL RISK RELATING TO QUALITY AND PERFORMANCE OF THE SOFTWARE AND RELATED MATERIALS.
71 | 
72 | SUPPORT AND MAINTENANCE: No Software support or training by the Licensor is provided as part of this Agreement.
73 | 
74 | EXCLUSIVE REMEDY AND LIMITATION OF LIABILITY: To the maximum extent permitted under applicable law, Licensor
75 | shall not be liable for direct, indirect, special, incidental, or consequential damages or lost profits related to
76 | Licensee's use of and/or inability to use the Software, even if Licensor is advised of the possibility of such damage.
77 | 
78 | EXPORT REGULATION: Licensee agrees to comply with any and all applicable
79 | U.S. export control laws, regulations, and/or other laws related to embargoes and sanction programs administered
80 | by the Office of Foreign Assets Control.
81 | 
82 | SEVERABILITY: If any provision(s) of this Agreement shall be held to be invalid, illegal, or unenforceable by a court
83 | or other tribunal of competent jurisdiction, the validity, legality and enforceability of the remaining provisions
84 | shall not in any way be affected or impaired thereby.
85 | 
86 | NO IMPLIED WAIVERS: No failure or delay by Licensor in enforcing any right or remedy under this Agreement shall be
87 | construed as a waiver of any future or other exercise of such right or remedy by Licensor.
88 | 
89 | ENTIRE AGREEMENT AND AMENDMENTS: This Agreement constitutes the sole and entire agreement between Licensee and
90 | Licensor as to the matter set forth herein and supersedes any previous agreements, understandings,
91 | and arrangements between the parties relating hereto.
92 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Pose Residual Network
 2 | 
 3 | This repository contains a PyTorch implementation of the Pose Residual Network (PRN) presented in our ECCV 2018 paper: 
 4 | 
 5 | Muhammed Kocabas, Salih Karagoz, Emre Akbas. MultiPoseNet: Fast Multi-Person Pose Estimation using Pose Residual Network. In ECCV, 2018. [arxiv](https://arxiv.org/abs/1807.04067)
 6 | 
 7 | PRN is described in Section 3.2 of the  paper.
 8 | 
 9 | ## Getting Started
10 | We have tested our method on [Coco Dataset](http://cocodataset.org)
11 | 
12 | ### Prerequisites
13 | 
14 | ```
15 | python
16 | pytorch
17 | numpy
18 | tqdm
19 | pycocotools
20 | progress
21 | scikit-image
22 | ```
23 | 
24 | ### Installing
25 | 
26 | 1. Clone this repository 
27 | `git clone https://github.com/salihkaragoz/pose-residual-network-pytorch.git`
28 | 
29 | 2. Install [Pytorch](https://pytorch.org/)
30 | 
31 | 3. `pip install -r src/requirements.txt`
32 | 
33 | 4. To download COCO dataset train2017 and val2017 annotations run: `bash data/coco.sh`. (data size: ~240Mb)
34 | 
35 | ## Training
36 | 
37 | `python train.py`
38 | 
39 | For more options look at opt.py
40 | 
41 | ## Testing
42 | 
43 | 1. Download pre-train [model](https://drive.google.com/file/d/1OhdMllLGnpRAk6Wexw8LzXF_EHiolVj1/view?usp=sharing)
44 | 
45 | 2. `python test.py --test_cp=PathToPreTrainModel/PRN.pth.tar`
46 | 
47 | ## Results
48 | Results on COCO val2017 Ground Truth data.
49 | 
50 | ```
51 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.892
52 |  Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets= 20 ] = 0.978
53 |  Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets= 20 ] = 0.921
54 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.883
55 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.912
56 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.917
57 |  Average Recall     (AR) @[ IoU=0.50      | area=   all | maxDets= 20 ] = 0.982
58 |  Average Recall     (AR) @[ IoU=0.75      | area=   all | maxDets= 20 ] = 0.937
59 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.902
60 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.944
61 | 
62 | ```
63 | 
64 | ## License
65 | 
66 | ## Citation
67 | If you find this code useful for your research, please consider citing our paper:
68 | ```
69 | @Inproceedings{kocabas18prn,
70 |   Title          = {Multi{P}ose{N}et: Fast Multi-Person Pose Estimation using Pose Residual Network},
71 |   Author         = {Kocabas, Muhammed and Karagoz, Salih and Akbas, Emre},
72 |   Booktitle      = {European Conference on Computer Vision (ECCV)},
73 |   Year           = {2018}
74 | }
75 | ```
76 | 


--------------------------------------------------------------------------------
/checkpoint/Experiments.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/data/coco.sh:
--------------------------------------------------------------------------------
1 | wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
2 | unzip annotations_trainval2017.zip
3 | rm annotations_trainval2017.zip
4 | 


--------------------------------------------------------------------------------
/opt.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from pprint import pprint
 3 | 
 4 | 
 5 | class Options:
 6 |     def __init__(self):
 7 |         self.parser = argparse.ArgumentParser()
 8 |         self.opt = None
 9 | 
10 |     def _initial(self):
11 | 
12 |         # --------------------------  General Training Options
13 |         self.parser.add_argument('--lr', type=float, default=1.0e-3, help='Learning Rate')
14 |         self.parser.add_argument('--lr_gamma', type=float, default=0.9, help='Gamma Rate')
15 |         self.parser.add_argument('--number_of_epoch', type=int, default=16)
16 |         self.parser.add_argument('--num_workers', type=int, default=4)
17 |         self.parser.add_argument('--batch_size', type=int, default=8)
18 |         self.parser.add_argument('--node_count', type=int, default=1024, help='Hidden Layer Node Count')
19 |         # --------------------------  General Training Options
20 | 
21 |         self.parser.add_argument('--exp', type=str, default='test/', help='Experiment name')
22 | 
23 |         # --------------------------
24 |         self.parser.add_argument('--coeff', type=int, default=2, help='Coefficient of bbox size')
25 |         self.parser.add_argument('--threshold', type=int, default=0.21, help='BBOX threshold')
26 |         self.parser.add_argument('--test_cp', type=str,default='checkpoint/test/default.pth.tar' ,help='Path to model for testing')
27 |         self.parser.add_argument('--num_of_keypoints', type=int, default=3, help='Minimum number of keypoints for each bbox in training')
28 |         self.parser.add_argument('--test_keypoint_count', type=int, default=0, help='Validating with different keypoint count')
29 |         self.parser.add_argument('--window_size', type=int, default=15, help='Windows size for cropping')
30 |         # --------------------------
31 | 
32 |     def _print(self):
33 |         print("\n==================Options=================")
34 |         pprint(vars(self.opt), indent=4)
35 |         print("==========================================\n")
36 | 
37 |     def parse(self):
38 |         self._initial()
39 |         self.opt = self.parser.parse_args()
40 |         self._print()
41 |         return self.opt
42 | 
43 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | 
4 | sys.path.append(os.path.join(os.path.dirname(__file__), "progress"))
5 | 


--------------------------------------------------------------------------------
/src/data_loader.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | from skimage.filters import gaussian
  4 | from torch.utils.data import Dataset
  5 | 
  6 | class CocoDataset(Dataset):
  7 |     def __init__(self,coco_train,opt):
  8 |         self.coco_train = coco_train
  9 |         self.num_of_keypoints = opt.num_of_keypoints
 10 |         self.anns = self.get_anns(self.coco_train)
 11 |         self.bbox_height = opt.coeff *28
 12 |         self.bbox_width = opt.coeff *18
 13 |         self.threshold = opt.threshold
 14 | 
 15 |     def __len__(self):
 16 |         return len(self.anns)
 17 | 
 18 |     def __getitem__(self, item):
 19 |         ann_data = self.anns[item]
 20 | 
 21 |         input, label = self.get_data(ann_data, self.coco_train )
 22 | 
 23 |         return input, label
 24 | 
 25 |     def get_data(self, ann_data, coco):
 26 |         weights = np.zeros((self.bbox_height, self.bbox_width, 17))
 27 |         output = np.zeros((self.bbox_height, self.bbox_width, 17))
 28 | 
 29 |         bbox = ann_data['bbox']
 30 |         x = int(bbox[0])
 31 |         y = int(bbox[1])
 32 |         w = float(bbox[2])
 33 |         h = float(bbox[3])
 34 | 
 35 |         x_scale = float(self.bbox_width) / math.ceil(w)
 36 |         y_scale = float(self.bbox_height) / math.ceil(h)
 37 | 
 38 |         kpx = ann_data['keypoints'][0::3]
 39 |         kpy = ann_data['keypoints'][1::3]
 40 |         kpv = ann_data['keypoints'][2::3]
 41 | 
 42 | 
 43 |         for j in range(17):
 44 |             if kpv[j] > 0:
 45 |                 x0 = int((kpx[j] - x) * x_scale)
 46 |                 y0 = int((kpy[j] - y) * y_scale)
 47 | 
 48 |                 if x0 >= self.bbox_width and y0 >= self.bbox_height:
 49 |                     output[self.bbox_height - 1, self.bbox_width - 1, j] = 1
 50 |                 elif x0 >= self.bbox_width:
 51 |                     output[y0, self.bbox_width - 1, j] = 1
 52 |                 elif y0 >= self.bbox_height:
 53 |                     try:
 54 |                         output[self.bbox_height - 1, x0, j] = 1
 55 |                     except:
 56 |                         output[self.bbox_height - 1, 0, j] = 1
 57 |                 elif x0 < 0 and y0 < 0:
 58 |                     output[0, 0, j] = 1
 59 |                 elif x0 < 0:
 60 |                     output[y0, 0, j] = 1
 61 |                 elif y0 < 0:
 62 |                     output[0, x0, j] = 1
 63 |                 else:
 64 |                     output[y0, x0, j] = 1
 65 | 
 66 |         img_id = ann_data['image_id']
 67 |         img_data = coco.loadImgs(img_id)[0]
 68 |         ann_data = coco.loadAnns(coco.getAnnIds(img_data['id']))
 69 | 
 70 |         for ann in ann_data:
 71 |             kpx = ann['keypoints'][0::3]
 72 |             kpy = ann['keypoints'][1::3]
 73 |             kpv = ann['keypoints'][2::3]
 74 | 
 75 |             for j in range(17):
 76 |                 if kpv[j] > 0:
 77 |                     if (kpx[j] > bbox[0] - bbox[2] * self.threshold and kpx[j] < bbox[0] + bbox[2] * (1 + self.threshold)):
 78 |                         if (kpy[j] > bbox[1] - bbox[3] * self.threshold and kpy[j] < bbox[1] + bbox[3] * (1 + self.threshold)):
 79 |                             x0 = int((kpx[j] - x) * x_scale)
 80 |                             y0 = int((kpy[j] - y) * y_scale)
 81 | 
 82 |                             if x0 >= self.bbox_width and y0 >= self.bbox_height:
 83 |                                 weights[self.bbox_height - 1, self.bbox_width - 1, j] = 1
 84 |                             elif x0 >= self.bbox_width:
 85 |                                 weights[y0, self.bbox_width - 1, j] = 1
 86 |                             elif y0 >= self.bbox_height:
 87 |                                 weights[self.bbox_height - 1, x0, j] = 1
 88 |                             elif x0 < 0 and y0 < 0:
 89 |                                 weights[0, 0, j] = 1
 90 |                             elif x0 < 0:
 91 |                                 weights[y0, 0, j] = 1
 92 |                             elif y0 < 0:
 93 |                                 weights[0, x0, j] = 1
 94 |                             else:
 95 |                                 weights[y0, x0, j] = 1
 96 | 
 97 |         for t in range(17):
 98 |             weights[:, :, t] = gaussian(weights[:, :, t])
 99 |         output = gaussian(output, sigma=2, mode='constant', multichannel=True)
100 |         # weights = gaussian_multi_input_mp(weights)
101 |         # output = gaussian_multi_output(output)
102 |         return weights, output
103 | 
104 |     def get_anns(self, coco):
105 |         #:param coco: COCO instance
106 |         #:return: anns: List of annotations that contain person with at least 6 keypoints
107 |         ann_ids = coco.getAnnIds()
108 |         anns = []
109 |         for i in ann_ids:
110 |             ann = coco.loadAnns(i)[0]
111 |             if ann['iscrowd'] == 0 and ann['num_keypoints'] > self.num_of_keypoints:
112 |                 anns.append(ann)  # ann
113 |         sorted_list = sorted(anns, key=lambda k: k['num_keypoints'], reverse=True)
114 |         return sorted_list
115 | 


--------------------------------------------------------------------------------
/src/eval.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import math
  3 | import json
  4 | import argparse
  5 | import numpy as np
  6 | from tqdm import tqdm
  7 | from random import shuffle
  8 | 
  9 | from pycocotools.coco import COCO
 10 | from pycocotools.cocoeval import COCOeval
 11 | from .gaussian import gaussian, crop, gaussian_multi_input_mp
 12 | 
 13 | import torch
 14 | 
 15 | def Evaluation(model,optin):
 16 |     print ('------------Evaulation Started------------')
 17 |     coeff = optin.coeff
 18 |     in_thres = optin.threshold
 19 |     test_keypoint_count = optin.test_keypoint_count
 20 |     n_kernel = optin.window_size
 21 |     modelname = 'temporary'
 22 | 
 23 |     model.eval()
 24 | 
 25 |     cocodir = 'data/annotations/person_keypoints_val2017.json'
 26 |     ann = json.load(open(cocodir))
 27 |     bbox_results = ann['annotations']
 28 | 
 29 |     coco = COCO(cocodir)
 30 |     img_ids = coco.getImgIds(catIds=[1])
 31 | 
 32 |     peak_results = []
 33 | 
 34 |     for i in img_ids:
 35 |         anns = coco.loadAnns(coco.getAnnIds(imgIds=i))
 36 |         kps = [a['keypoints'] for a in anns]
 37 | 
 38 |         idx = 0
 39 | 
 40 |         ks = []
 41 |         for i in range(17):
 42 |             t = []
 43 |             for k in kps:
 44 |                 x = k[0::3][i]
 45 |                 y = k[1::3][i]
 46 |                 v = k[2::3][i]
 47 | 
 48 |                 if v > 0:
 49 |                     t.append([x, y, 1, idx])
 50 |                     idx += 1
 51 |             ks.append(t)
 52 |         image_id = anns[0]['image_id']
 53 |         peaks = ks
 54 | 
 55 |         element = {
 56 |             'image_id': image_id,
 57 |             'peaks': peaks,
 58 |             'file_name': coco.loadImgs(image_id)[0]['file_name']
 59 |         }
 60 | 
 61 |         peak_results.append(element)
 62 | 
 63 |     shuffle(peak_results)
 64 | 
 65 |     my_results = []
 66 |     image_ids = []
 67 | 
 68 |     w = int(18 * coeff)
 69 |     h = int(28 * coeff)
 70 | 
 71 |     temporary_peak_res = []
 72 |     for p in peak_results:
 73 |         if (sum(1 for i in p['peaks'] if i != []) >= test_keypoint_count):
 74 |             temporary_peak_res.append(p)
 75 |     peak_results = temporary_peak_res
 76 | 
 77 |     for p in tqdm(peak_results):
 78 |         idx = p['image_id']
 79 |         image_ids.append(idx)
 80 | 
 81 |         peaks = p['peaks']
 82 |         bboxes = [k['bbox'] for k in bbox_results if k['image_id'] == idx]
 83 | 
 84 | 
 85 |         if len(bboxes) == 0 or len(peaks) == 0:
 86 |             continue
 87 | 
 88 |         weights_bbox = np.zeros((len(bboxes), h, w, 4, 17))
 89 | 
 90 |         for joint_id, peak in enumerate(peaks):
 91 | 
 92 | 
 93 |             for instance_id, instance in enumerate(peak):
 94 | 
 95 |                 p_x = instance[0]
 96 |                 p_y = instance[1]
 97 | 
 98 |                 for bbox_id, b in enumerate(bboxes):
 99 | 
100 |                     is_inside = p_x > b[0] - b[2] * in_thres and \
101 |                                 p_y > b[1] - b[3] * in_thres and \
102 |                                 p_x < b[0] + b[2] * (1.0 + in_thres) and \
103 |                                 p_y < b[1] + b[3] * (1.0 + in_thres)
104 | 
105 |                     if is_inside:
106 |                         x_scale = float(w) / math.ceil(b[2])
107 |                         y_scale = float(h) / math.ceil(b[3])
108 | 
109 |                         x0 = int((p_x - b[0]) * x_scale)
110 |                         y0 = int((p_y - b[1]) * y_scale)
111 | 
112 |                         if x0 >= w and y0 >= h:
113 |                             x0 = w - 1
114 |                             y0 = h - 1
115 |                         elif x0 >= w:
116 |                             x0 = w - 1
117 |                         elif y0 >= h:
118 |                             y0 = h - 1
119 |                         elif x0 < 0 and y0 < 0:
120 |                             x0 = 0
121 |                             y0 = 0
122 |                         elif x0 < 0:
123 |                             x0 = 0
124 |                         elif y0 < 0:
125 |                             y0 = 0
126 | 
127 |                         p = 1e-9
128 | 
129 |                         weights_bbox[bbox_id, y0, x0, :, joint_id] = [1, instance[2], instance[3], p]
130 | 
131 |         old_weights_bbox = np.copy(weights_bbox)
132 | 
133 |         for j in range(weights_bbox.shape[0]):
134 |             for t in range(17):
135 |                 weights_bbox[j, :, :, 0, t] = gaussian(weights_bbox[j, :, :, 0, t])
136 |             # weights_bbox[j, :, :, 0, :]      = gaussian_multi_input_mp(weights_bbox[j, :, :, 0, :])
137 | 
138 |         output_bbox = []
139 |         for j in range(weights_bbox.shape[0]):
140 |             inp = weights_bbox[j, :, :, 0, :]
141 |             input = torch.from_numpy(np.expand_dims(inp, axis=0)).cuda().float()
142 |             output = model(input)
143 |             temp = np.reshape(output.data.cpu().numpy(), (56,36,17))
144 |             output_bbox.append(temp)
145 | 
146 |         output_bbox = np.array(output_bbox)
147 | 
148 |         keypoints_score = []
149 | 
150 |         for t in range(17):
151 |             indexes = np.argwhere(old_weights_bbox[:, :, :, 0, t] == 1)
152 |             keypoint = []
153 |             for i in indexes:
154 |                 cr = crop(output_bbox[i[0], :, :, t], (i[1], i[2]), N=n_kernel)
155 |                 score = np.sum(cr)
156 | 
157 |                 kp_id = old_weights_bbox[i[0], i[1], i[2], 2, t]
158 |                 kp_score = old_weights_bbox[i[0], i[1], i[2], 1, t]
159 |                 p_score = old_weights_bbox[i[0], i[1], i[2], 3, t]  ## ??
160 |                 bbox_id = i[0]
161 | 
162 |                 score = kp_score * score
163 | 
164 |                 s = [kp_id, bbox_id, kp_score, score]
165 | 
166 |                 keypoint.append(s)
167 |             keypoints_score.append(keypoint)
168 | 
169 |         bbox_keypoints = np.zeros((weights_bbox.shape[0], 17, 3))
170 |         bbox_ids = np.arange(len(bboxes)).tolist()
171 | 
172 |         # kp_id, bbox_id, kp_score, my_score
173 |         for i in range(17):
174 |             joint_keypoints = keypoints_score[i]
175 |             if len(joint_keypoints) > 0:
176 | 
177 |                 kp_ids = list(set([x[0] for x in joint_keypoints]))
178 | 
179 |                 table = np.zeros((len(bbox_ids), len(kp_ids), 4))
180 | 
181 |                 for b_id, bbox in enumerate(bbox_ids):
182 |                     for k_id, kp in enumerate(kp_ids):
183 |                         own = [x for x in joint_keypoints if x[0] == kp and x[1] == bbox]
184 | 
185 |                         if len(own) > 0:
186 |                             table[bbox, k_id] = own[0]
187 |                         else:
188 |                             table[bbox, k_id] = [0] * 4
189 | 
190 |                 for b_id, bbox in enumerate(bbox_ids):
191 | 
192 |                     row = np.argsort(-table[bbox, :, 3])
193 | 
194 |                     if table[bbox, row[0], 3] > 0:
195 |                         for r in row:
196 |                             if table[bbox, r, 3] > 0:
197 |                                 column = np.argsort(-table[:, r, 3])
198 | 
199 |                                 if bbox == column[0]:
200 |                                     bbox_keypoints[bbox, i, :] = [x[:3] for x in peaks[i] if x[3] == table[bbox, r, 0]][0]
201 |                                     break
202 |                                 else:
203 |                                     row2 = np.argsort(table[column[0], :, 3])
204 |                                     if row2[0] == r:
205 |                                         bbox_keypoints[bbox, i, :] = \
206 |                                         [x[:3] for x in peaks[i] if x[3] == table[bbox, r, 0]][0]
207 |                                         break
208 |             else:
209 |                 for j in range(weights_bbox.shape[0]):
210 |                     b = bboxes[j]
211 |                     x_scale = float(w) / math.ceil(b[2])
212 |                     y_scale = float(h) / math.ceil(b[3])
213 | 
214 |                     for t in range(17):
215 |                         indexes = np.argwhere(old_weights_bbox[j, :, :, 0, t] == 1)
216 |                         if len(indexes) == 0:
217 |                             max_index = np.argwhere(output_bbox[j, :, :, t] == np.max(output_bbox[j, :, :, t]))
218 |                             bbox_keypoints[j, t, :] = [max_index[0][1] / x_scale + b[0],
219 |                                                        max_index[0][0] / y_scale + b[1], 0]
220 | 
221 |         my_keypoints = []
222 | 
223 |         for i in range(bbox_keypoints.shape[0]):
224 |             k = np.zeros(51)
225 |             k[0::3] = bbox_keypoints[i, :, 0]
226 |             k[1::3] = bbox_keypoints[i, :, 1]
227 |             k[2::3] = [2] * 17
228 | 
229 |             pose_score = 0
230 |             count = 0
231 |             for f in range(17):
232 |                 if bbox_keypoints[i, f, 0] != 0 and bbox_keypoints[i, f, 1] != 0:
233 |                     count += 1
234 |                 pose_score += bbox_keypoints[i, f, 2]
235 |             pose_score /= 17.0
236 | 
237 |             my_keypoints.append(k)
238 | 
239 |             image_data = {
240 |                 'image_id': idx,
241 |                 'bbox': bboxes[i],
242 |                 'score': pose_score,
243 |                 'category_id': 1,
244 |                 'keypoints': k.tolist()
245 |             }
246 |             my_results.append(image_data)
247 | 
248 | 
249 |     ann_filename = 'data/val2017_PRN_keypoint_results_{}.json'.format(modelname)
250 |     # write output
251 |     json.dump(my_results, open(ann_filename, 'w'), indent=4)
252 | 
253 |     # load results in COCO evaluation tool
254 |     coco_pred = coco.loadRes(ann_filename)
255 | 
256 |     # run COCO evaluation
257 |     coco_eval = COCOeval(coco, coco_pred, 'keypoints')
258 |     coco_eval.params.imgIds = image_ids
259 |     coco_eval.evaluate()
260 |     coco_eval.accumulate()
261 |     coco_eval.summarize()
262 | 
263 |     os.remove(ann_filename)
264 | 
265 | 
266 | 


--------------------------------------------------------------------------------
/src/gaussian.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from skimage.filters import gaussian
  3 | 
  4 | sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89] * 100)
  5 | 
  6 | 
  7 | def multivariate_gaussian(N, sigma=2):
  8 |     t = 4
  9 |     X = np.linspace(-t, t, N)
 10 |     Y = np.linspace(-t, t, N)
 11 |     X, Y = np.meshgrid(X, Y)
 12 |     pos = np.empty(X.shape + (2,))
 13 |     pos[:, :, 0] = X
 14 |     pos[:, :, 1] = Y
 15 |     mu = np.array([0., 0.])
 16 |     sigma = np.array([[sigma, 0], [0, sigma]])
 17 |     n = mu.shape[0]
 18 |     Sigma_det = np.linalg.det(sigma)
 19 |     Sigma_inv = np.linalg.inv(sigma)
 20 |     N = np.sqrt((2 * np.pi) ** n * Sigma_det)
 21 |     fac = np.einsum('...k,kl,...l->...', pos - mu, Sigma_inv, pos - mu)
 22 |     return np.exp(-fac / 2) / N
 23 | 
 24 | 
 25 | def crop_paste(img, c, N=13, sigma=2):
 26 |     Z = multivariate_gaussian(N, sigma)
 27 | 
 28 |     H = img.shape[1]
 29 |     W = img.shape[0]
 30 | 
 31 |     h = (Z.shape[0] - 1) / 2
 32 | 
 33 |     N = Z.shape[0]
 34 |     x1 = (c[0] - h)
 35 |     y1 = (c[1] - h)
 36 | 
 37 |     x2 = (c[0] + h) + 1
 38 |     y2 = (c[1] + h) + 1
 39 | 
 40 |     zx1 = 0
 41 |     zy1 = 0
 42 |     zx2 = N + 1
 43 |     zy2 = N + 1
 44 | 
 45 |     if x1 < 0:
 46 |         x1 = 0
 47 |         zx1 = 0 - (c[0] - h)
 48 | 
 49 |     if y1 < 0:
 50 |         y1 = 0
 51 |         zy1 = 0 - (c[1] - h)
 52 | 
 53 |     if x2 > W - 1:
 54 |         x2 = W - 1
 55 |         zx2 = x2 - x1 + 1
 56 |         x2 = W
 57 | 
 58 |     if y2 > H - 1:
 59 |         y2 = H - 1
 60 |         zy2 = y2 - y1 + 1
 61 |         y2 = H
 62 | 
 63 |     img[x1:x2, y1:y2] = np.maximum(Z[zx1:zx2, zy1:zy2], img[x1:x2, y1:y2])
 64 | 
 65 | 
 66 | '''
 67 | def gaussian(img, N = 13, sigma=2):
 68 |     cs = np.where(img==1)
 69 |     img = np.zeros_like(img)
 70 |     for c in zip(cs[0], cs[1]):
 71 |         crop_paste(img, c, N, sigma)
 72 |     return img
 73 | '''
 74 | 
 75 | 
 76 | def gaussian_multi_input_mp(inp):
 77 |     '''
 78 |     :param inp: Multi person ground truth heatmap input (17 ch) Each channel contains multiple joints.
 79 |     :return: out: Gaussian augmented output. Values are between 0. and 1.
 80 |     '''
 81 | 
 82 |     h, w, ch = inp.shape
 83 |     out = np.zeros_like(inp)
 84 |     for i in range(ch):
 85 |         layer = inp[:, :, i]
 86 |         ind = np.argwhere(layer == 1)
 87 |         b = []
 88 |         if len(ind) > 0:
 89 |             for j in ind:
 90 |                 t = np.zeros((h, w))
 91 |                 t[j[0], j[1]] = 1
 92 |                 t = gaussian(t, sigma=2, mode='constant')
 93 |                 t = t * (1 / t.max())
 94 |                 b.append(t)
 95 | 
 96 |             out[:, :, i] = np.maximum.reduce(b)
 97 |         else:
 98 |             out[:, :, i] = np.zeros((h, w))
 99 |     return out
100 | 
101 | 
102 | def gaussian_multi_output(inp):
103 |     '''
104 |     :param inp: Single person ground truth heatmap input (17 ch) Each channel contains one joint.
105 |     :return: out: Gaussian augmented output. Values are between 0. and 1.
106 |     '''
107 |     h, w, ch = inp.shape
108 |     out = np.zeros_like(inp)
109 |     for i in range(ch):
110 |         j = np.argwhere(inp[:, :, i] == 1)
111 |         if len(j) == 0:
112 |             out[:, :, i] = np.zeros((h, w))
113 |             continue
114 |         j = j[0]
115 |         t = np.zeros((h, w))
116 |         t[j[0], j[1]] = 1
117 |         t = gaussian(t, sigma=5, mode='constant')
118 |         out[:, :, i] = t * (1 / t.max())
119 |     return out
120 | 
121 | 
122 | def crop(img, c, N=13):
123 |     H = img.shape[1]
124 |     W = img.shape[0]
125 | 
126 |     h = (N - 1) / 2
127 | 
128 |     x1 = int(c[0] - h)
129 |     y1 = int(c[1] - h)
130 | 
131 |     x2 = int(c[0] + h) + 1
132 |     y2 = int(c[1] + h) + 1
133 | 
134 |     if x1 < 0:
135 |         x1 = 0
136 | 
137 |     if y1 < 0:
138 |         y1 = 0
139 | 
140 |     if x2 > W - 1:
141 |         x2 = W
142 | 
143 |     if y2 > H - 1:
144 |         y2 = H
145 | 
146 |     return img[x1:x2, y1:y2]
147 | 
148 | 


--------------------------------------------------------------------------------
/src/model.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from torch import nn
 4 | import torch.nn.functional as F
 5 | 
 6 | class Flatten(nn.Module):
 7 |     def forward(self, input):
 8 |         return input.view(input.size(0), -1)
 9 | 
10 | 
11 | class Add(nn.Module):
12 |     def forward(self, input1, input2):
13 |         return torch.add(input1, input2)
14 | 
15 | class PRN(nn.Module):
16 |     def __init__(self,node_count,coeff):
17 |         super(PRN, self).__init__()
18 |         self.flatten   = Flatten()
19 |         self.height    = coeff*28
20 |         self.width     = coeff*18
21 |         self.dens1     = nn.Linear(self.height*self.width*17, node_count)
22 |         self.bneck     = nn.Linear(node_count, node_count)
23 |         self.dens2     = nn.Linear(node_count, self.height*self.width*17)
24 |         self.drop      = nn.Dropout()
25 |         self.add       = Add()
26 |         self.softmax   = nn.Softmax(dim=1)
27 | 
28 |     def forward(self, x):
29 |         res = self.flatten(x)
30 |         out = self.drop(F.relu(self.dens1(res)))
31 |         out = self.drop(F.relu(self.bneck(out)))
32 |         out = F.relu(self.dens2(out))
33 |         out = self.add(out,res)
34 |         out = self.softmax(out)
35 |         out = out.view(out.size()[0],self.height, self.width, 17)
36 | 
37 |         return out
38 | 
39 | 


--------------------------------------------------------------------------------
/src/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | tqdm
3 | pycocotools
4 | progress
5 | scikit-image


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import torch
 4 | 
 5 | def save_options(opt, path,model,criterion, optimizer):
 6 |     file_path = os.path.join(path, 'opt.json')
 7 |     model_struc = model.__str__()
 8 |     model_struc = {'Model': model_struc, 'Loss Function': criterion, 'Optimizer': optimizer}
 9 | 
10 |     with open(file_path, 'w') as f:
11 |         f.write(json.dumps(vars(opt), sort_keys=True, indent=4))
12 |         f.write(json.dumps(model_struc, sort_keys=True, indent=4))
13 | 
14 | 
15 | def save_model(state, checkpoint, filename='checkpoint.pth.tar'):
16 |     filename = 'epoch'+str(state['epoch']) + filename
17 |     filepath = os.path.join(checkpoint, filename)
18 |     torch.save(state, filepath)
19 | 
20 | def adjust_lr(optimizer, epoch, gamma):
21 |     schedule = list(range(3,32,2))
22 |     """Sets the learning rate to the initial LR decayed by schedule"""
23 |     if epoch in schedule:
24 |         for param_group in optimizer.param_groups:
25 |             param_group['lr'] *= gamma
26 |     return optimizer.state_dict()['param_groups'][0]['lr']
27 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import torch
 3 | from opt import Options
 4 | from src.eval import Evaluation
 5 | from src.model import PRN
 6 | 
 7 | 
 8 | 
 9 | if __name__ == "__main__":
10 |     option = Options().parse()
11 |     
12 |     model = PRN(option.node_count, option.coeff).cuda()
13 |     checkpoint = torch.load(option.test_cp)
14 |     model.load_state_dict(checkpoint['state_dict'])
15 | 
16 |     Evaluation(model, option)
17 | 
18 |  
19 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from tqdm import tqdm
 3 | from progress.bar import Bar
 4 | from pycocotools.coco import COCO
 5 | 
 6 | import torch
 7 | import torch.backends.cudnn as cudnn
 8 | from torch.utils.data import DataLoader
 9 | 
10 | from opt import Options
11 | from src.model import PRN
12 | from src.eval import Evaluation
13 | from src.utils import save_options
14 | from src.utils import save_model, adjust_lr
15 | from src.data_loader import CocoDataset
16 | 
17 | 
18 | def main(optin):
19 |     if not os.path.exists('checkpoint/'+optin.exp):
20 |         os.makedirs('checkpoint/'+optin.exp)
21 | 
22 |     model = PRN(optin.node_count,optin.coeff).cuda()
23 |     #model = torch.nn.DataParallel(model).cuda()
24 |     optimizer = torch.optim.Adam(model.parameters(), lr=optin.lr)
25 |     criterion = torch.nn.BCELoss().cuda()
26 | 
27 |     print (model)
28 |     print(">>> total params: {:.2f}M".format(sum(p.numel() for p in model.parameters()) / 1000000.0))
29 | 
30 |     save_options(optin, os.path.join('checkpoint/' + optin.exp), model.__str__(), criterion.__str__(), optimizer.__str__())
31 | 
32 |     print ('---------Loading Coco Training Set--------')
33 |     coco_train = COCO(os.path.join('data/annotations/person_keypoints_train2017.json'))
34 |     trainloader = DataLoader(dataset=CocoDataset(coco_train,optin),batch_size=optin.batch_size, num_workers=optin.num_workers, shuffle=True)
35 | 
36 |     bar = Bar('-->', fill='>', max=len(trainloader))
37 | 
38 |     cudnn.benchmark = True
39 |     for epoch in range(optin.number_of_epoch):
40 |         print ('-------------Training Epoch {}-------------'.format(epoch))
41 |         print ('Total Step:', len(trainloader), '| Total Epoch:', optin.number_of_epoch)
42 |         lr = adjust_lr(optimizer, epoch, optin.lr_gamma)
43 |         print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr))
44 |         for idx, (input, label) in tqdm(enumerate(trainloader)):
45 | 
46 |             input = input.cuda().float()
47 |             label = label.cuda().float()
48 | 
49 |             outputs = model(input)
50 | 
51 |             optimizer.zero_grad()
52 |             loss = criterion(outputs, label)
53 |             loss.backward()
54 |             optimizer.step()
55 | 
56 |             if idx % 200 == 0:
57 |                 bar.suffix = 'Epoch: {epoch} Total: {ttl} | ETA: {eta:} | loss:{loss}' \
58 |                 .format(ttl=bar.elapsed_td, eta=bar.eta_td, loss=loss.data, epoch=epoch)
59 |                 bar.next()
60 | 
61 |         Evaluation(model, optin)
62 | 
63 |         save_model({
64 |             'epoch': epoch + 1,
65 |             'state_dict': model.state_dict(),
66 |             'optimizer' : optimizer.state_dict(),
67 |         }, checkpoint='checkpoint/' + optin.exp)
68 | 
69 |         model.train()
70 | 
71 | if __name__ == "__main__":
72 |     option = Options().parse()
73 |     main(option)
74 | 


--------------------------------------------------------------------------------