├── lib
    ├── __init__.py
    ├── core
    │   ├── __init__.py
    │   └── config.py
    ├── nms
    │   ├── __init__.py
    │   ├── _ext
    │   │   ├── __init__.py
    │   │   └── nms
    │   │   │   ├── _nms.so
    │   │   │   └── __init__.py
    │   ├── src
    │   │   ├── cuda
    │   │   │   ├── nms_kernel.cu.o
    │   │   │   ├── nms_kernel.h
    │   │   │   └── nms_kernel.cu
    │   │   ├── nms_cuda.h
    │   │   ├── nms.h
    │   │   ├── nms_cuda.c
    │   │   └── nms.c
    │   ├── build.py
    │   └── pth_nms.py
    ├── utils
    │   ├── __init__.py
    │   ├── path.py
    │   ├── log.py
    │   ├── meter.py
    │   └── timer.py
    └── build.sh
├── datasets
    ├── __init__.py
    ├── coco_data
    │   ├── __init__.py
    │   ├── preprocessing.py
    │   ├── heatmap.py
    │   ├── prn_gaussian.py
    │   ├── prn_data_pipeline.py
    │   ├── ImageAugmentation.py
    │   └── COCO_data_pipeline.py
    ├── dataloader.py
    ├── coco.py
    └── data_parallel.py
├── evaluate
    ├── __init__.py
    ├── multipose_coco_eval.py
    ├── multipose_test.py
    ├── multipose_keypoint_val.py
    ├── multipose_detection_val.py
    ├── multipose_prn_val.py
    └── tester.py
├── network
    ├── __init__.py
    ├── utils.py
    ├── net_utils.py
    ├── anchors.py
    ├── fpn.py
    ├── losses.py
    ├── joint_utils.py
    └── posenet.py
├── training
    ├── __init__.py
    ├── batch_processor.py
    ├── multipose_prn_train.py
    ├── multipose_detection_train.py
    ├── multipose_keypoint_train.py
    └── trainer.py
├── .gitignore
├── demo
    ├── test_images
    │   ├── pic1.jpg
    │   └── pic2.jpg
    ├── output
    │   ├── pic1_canvas.png
    │   └── pic2_canvas.png
    └── models
    │   └── README.md
├── configs
    └── coco
    │   └── first_experiment.yaml
├── multipose_environment.yaml
└── README.md


/lib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/evaluate/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/nms/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/network/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/training/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/nms/_ext/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | *.pyc
3 | *~
4 | 


--------------------------------------------------------------------------------
/datasets/coco_data/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/lib/nms/_ext/nms/_nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiMeng95/MultiPoseNet.pytorch/HEAD/lib/nms/_ext/nms/_nms.so


--------------------------------------------------------------------------------
/demo/test_images/pic1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiMeng95/MultiPoseNet.pytorch/HEAD/demo/test_images/pic1.jpg


--------------------------------------------------------------------------------
/demo/test_images/pic2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiMeng95/MultiPoseNet.pytorch/HEAD/demo/test_images/pic2.jpg


--------------------------------------------------------------------------------
/demo/output/pic1_canvas.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiMeng95/MultiPoseNet.pytorch/HEAD/demo/output/pic1_canvas.png


--------------------------------------------------------------------------------
/demo/output/pic2_canvas.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiMeng95/MultiPoseNet.pytorch/HEAD/demo/output/pic2_canvas.png


--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiMeng95/MultiPoseNet.pytorch/HEAD/lib/nms/src/cuda/nms_kernel.cu.o


--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | int gpu_nms(THLongTensor * keep_out, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh);


--------------------------------------------------------------------------------
/lib/nms/src/nms.h:
--------------------------------------------------------------------------------
1 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh);


--------------------------------------------------------------------------------
/demo/models/README.md:
--------------------------------------------------------------------------------
1 | Our baseline model:([Google Drive](https://drive.google.com/open?id=1Y38q5mIY2XL7mmdaBrF06beYcZZO6v2Z),  [Tsinghua Cloud](https://cloud.tsinghua.edu.cn/f/7328ce2cb7bd4f558a78/), backbone: resnet101)


--------------------------------------------------------------------------------
/lib/utils/path.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | 
 4 | 
 5 | def mkdir(path, rm_exist=False):
 6 |     if os.path.isdir(path):
 7 |         if not rm_exist:
 8 |             return
 9 |         shutil.rmtree(path)
10 | 
11 |     os.makedirs(path)
12 | 


--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _NMS_KERNEL
 2 | #define _NMS_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 9 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
10 | 
11 | void _nms(int boxes_num, float * boxes_dev,
12 |           unsigned long long * mask_dev, float nms_overlap_thresh);
13 | 
14 | #ifdef __cplusplus
15 | }
16 | #endif
17 | 
18 | #endif
19 | 
20 | 


--------------------------------------------------------------------------------
/lib/nms/_ext/nms/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._nms import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/build.sh:
--------------------------------------------------------------------------------
 1 | CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \
 2 |            -gencode arch=compute_35,code=sm_35 \
 3 |            -gencode arch=compute_50,code=sm_50 \
 4 |            -gencode arch=compute_52,code=sm_52 \
 5 |            -gencode arch=compute_60,code=sm_60 \
 6 |            -gencode arch=compute_61,code=sm_61"
 7 | 
 8 | 
 9 | # Build NMS
10 | cd nms/src/cuda
11 | echo "Compiling nms kernels by nvcc..."
12 | /usr/local/cuda/bin/nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH
13 | cd ../../
14 | python build.py
15 | cd ../
16 | 


--------------------------------------------------------------------------------
/lib/utils/log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | def get_logger(name='root'):
 5 |     formatter = logging.Formatter(
 6 |         # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
 7 |         fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
 8 | 
 9 |     handler = logging.StreamHandler()
10 |     handler.setFormatter(formatter)
11 | 
12 |     logger = logging.getLogger(name)
13 |     logger.setLevel(logging.DEBUG)
14 |     logger.addHandler(handler)
15 |     return logger
16 | 
17 | 
18 | logger = get_logger('root')
19 | 


--------------------------------------------------------------------------------
/datasets/coco_data/preprocessing.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Provides different utilities to preprocess images.
 3 | Args:
 4 | image: A np.array representing an image of (h,w,3).
 5 | 
 6 | Returns:
 7 | A preprocessed image. which dtype is np.float32
 8 | and transposed to (3,h,w).
 9 | 
10 | """
11 | 
12 | import cv2
13 | import numpy as np
14 | 
15 | def resnet_preprocess(image):
16 |     image = image.astype(np.float32) / 255.
17 |     means = [0.485, 0.456, 0.406]
18 |     stds = [0.229, 0.224, 0.225]
19 | 
20 |     preprocessed_img = image.copy()[:, :, ::-1]
21 |     for i in range(3):
22 |         preprocessed_img[:, :, i] = preprocessed_img[:, :, i] - means[i]
23 |         preprocessed_img[:, :, i] = preprocessed_img[:, :, i] / stds[i]
24 | 
25 |     preprocessed_img = preprocessed_img.transpose((2, 0, 1)).astype(np.float32)
26 |     return preprocessed_img
27 | 


--------------------------------------------------------------------------------
/lib/nms/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | sources = ['src/nms.c']
 7 | headers = ['src/nms.h']
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | if torch.cuda.is_available():
12 |     print('Including CUDA code.')
13 |     sources += ['src/nms_cuda.c']
14 |     headers += ['src/nms_cuda.h']
15 |     defines += [('WITH_CUDA', None)]
16 |     with_cuda = True
17 | 
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/cuda/nms_kernel.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 | 
23 | ffi = create_extension(
24 |     '_ext.nms',
25 |     headers=headers,
26 |     sources=sources,
27 |     define_macros=defines,
28 |     relative_to=__file__,
29 |     with_cuda=with_cuda,
30 |     extra_objects=extra_objects,
31 |     extra_compile_args=['-std=c99']
32 | )
33 | 
34 | if __name__ == '__main__':
35 |     ffi.build()
36 | 


--------------------------------------------------------------------------------
/lib/utils/meter.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Meter(object):
 6 |     def reset(self):
 7 |         pass
 8 | 
 9 |     def add(self):
10 |         pass
11 | 
12 |     def value(self):
13 |         pass
14 | 
15 | 
16 | class AverageValueMeter(Meter):
17 |     def __init__(self):
18 |         super(AverageValueMeter, self).__init__()
19 |         self.reset()
20 | 
21 |     def add(self, value, n=1):
22 |         self.sum += value
23 |         self.var += value * value
24 |         self.n += n
25 | 
26 |     def value(self):
27 |         n = self.n
28 |         if n == 0:
29 |             mean, std = np.nan, np.nan
30 |         elif n == 1:
31 |             return self.sum, np.inf
32 |         else:
33 |             mean = self.sum / n
34 |             std = math.sqrt((self.var - n * mean * mean) / (n - 1.0))
35 |         return mean, std
36 | 
37 |     def reset(self):
38 |         self.sum = 0.0
39 |         self.n = 0
40 |         self.var = 0.0
41 | 
42 |     def __float__(self):
43 |         return self.value()[0]


--------------------------------------------------------------------------------
/evaluate/multipose_coco_eval.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | root_path = os.path.realpath(__file__).split('/evaluate/multipose_coco_eval.py')[0]
 3 | os.chdir(root_path)
 4 | sys.path.append(root_path)
 5 | 
 6 | from network.posenet import poseNet
 7 | from evaluate.tester import Tester
 8 | 
 9 | backbone = 'resnet101'
10 | 
11 | # Set Training parameters
12 | params = Tester.TestParams()
13 | params.subnet_name = 'both'
14 | params.inp_size = 480  # input picture size = (inp_size, inp_size)
15 | params.coeff = 2
16 | params.in_thres = 0.21
17 | params.coco_root = '/data/COCO/'
18 | params.testresult_write_json = False  # Whether to write json result
19 | params.coco_result_filename = './demo/multipose_coco2017_results.json'
20 | params.ckpt = './demo/models/ckpt_baseline_resnet101.h5'
21 | 
22 | # model
23 | if backbone == 'resnet101':
24 |     model = poseNet(101)
25 | elif backbone == 'resnet50':
26 |     model = poseNet(50)
27 | 
28 | for name, module in model.named_children():
29 |     for para in module.parameters():
30 |         para.requires_grad = False
31 | 
32 | tester = Tester(model, params)
33 | tester.coco_eval()  # pic_test
34 | 


--------------------------------------------------------------------------------
/evaluate/multipose_test.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | root_path = os.path.realpath(__file__).split('/evaluate/multipose_test.py')[0]
 3 | os.chdir(root_path)
 4 | sys.path.append(root_path)
 5 | 
 6 | from network.posenet import poseNet
 7 | from evaluate.tester import Tester
 8 | 
 9 | backbone = 'resnet101'
10 | 
11 | # Set Training parameters
12 | params = Tester.TestParams()
13 | params.subnet_name = 'both'
14 | params.inp_size = 480  # input picture size = (inp_size, inp_size)
15 | params.coeff = 2
16 | params.in_thres = 0.21
17 | params.testdata_dir = './demo/test_images/'
18 | params.testresult_dir = './demo/output/'
19 | params.testresult_write_image = True  # Whether to write result pictures
20 | params.testresult_write_json = False  # Whether to write json result
21 | params.ckpt = './demo/models/ckpt_baseline_resnet101.h5'
22 | 
23 | # model
24 | if backbone == 'resnet101':
25 |     model = poseNet(101)
26 | elif backbone == 'resnet50':
27 |     model = poseNet(50)
28 | 
29 | for name, module in model.named_children():
30 |     for para in module.parameters():
31 |         para.requires_grad = False
32 | 
33 | tester = Tester(model, params)
34 | tester.test()  # pic_test
35 | 


--------------------------------------------------------------------------------
/datasets/dataloader.py:
--------------------------------------------------------------------------------
 1 | from typing import Generator
 2 | from torch.utils.data.dataloader import DataLoader, _DataLoaderIter
 3 | from lib.utils.log import logger
 4 | 
 5 | 
 6 | class sDataLoader(DataLoader):
 7 |     def get_stream(self):
 8 |         """
 9 |         Return a generate that can yield endless data.
10 |         :Example:
11 |         stream = get_stream()
12 |         for i in range(100):
13 |             batch = next(stream)
14 | 
15 |         :return: stream
16 |         :rtype: Generator
17 |         """
18 |         while True:
19 |             for data in _DataLoaderIter(self):
20 |                 yield data
21 | 
22 |     @staticmethod
23 |     def copy(loader):
24 |         """
25 |         Init a sDataloader from an existing Dataloader
26 |         :param loader: an instance of Dataloader
27 |         :type loader: DataLoader
28 |         :return: a new instance of sDataloader
29 |         :rtype: sDataLoader
30 |         """
31 |         if not isinstance(loader, DataLoader):
32 |             logger('loader should be an instance of Dataloader, but got {}'.format(type(loader)))
33 |             return loader
34 | 
35 |         new_loader = sDataLoader(loader.dataset)
36 |         for k, v in loader.__dict__.items():
37 |             setattr(new_loader, k, v)
38 |         return new_loader
39 | 


--------------------------------------------------------------------------------
/lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | 
11 | class Timer(object):
12 |     """A simple timer."""
13 |     def __init__(self):
14 |         self.total_time = 0.
15 |         self.calls = 0
16 |         self.start_time = 0.
17 |         self.diff = 0.
18 |         self.average_time = 0.
19 | 
20 |         self.duration = 0.
21 | 
22 |     def tic(self):
23 |         # using time.time instead of time.clock because time time.clock
24 |         # does not normalize for multithreading
25 |         self.start_time = time.time()
26 | 
27 |     def toc(self, average=True):
28 |         self.diff = time.time() - self.start_time
29 |         self.total_time += self.diff
30 |         self.calls += 1
31 |         self.average_time = self.total_time / self.calls
32 |         if average:
33 |             self.duration = self.average_time
34 |         else:
35 |             self.duration = self.diff
36 |         return self.duration
37 | 
38 |     def clear(self):
39 |         self.total_time = 0.
40 |         self.calls = 0
41 |         self.start_time = 0.
42 |         self.diff = 0.
43 |         self.average_time = 0.
44 |         self.duration = 0.
45 | 
46 | 


--------------------------------------------------------------------------------
/lib/nms/pth_nms.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from ._ext import nms
 3 | import numpy as np
 4 | 
 5 | def pth_nms(dets, thresh):
 6 |   """
 7 |   dets has to be a tensor
 8 |   """
 9 |   if not dets.is_cuda:
10 |     x1 = dets[:, 0]
11 |     y1 = dets[:, 1]
12 |     x2 = dets[:, 2]
13 |     y2 = dets[:, 3]
14 |     scores = dets[:, 4]
15 | 
16 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
17 |     order = scores.sort(0, descending=True)[1]
18 |     # order = torch.from_numpy(np.ascontiguousarray(scores.numpy().argsort()[::-1])).long()
19 | 
20 |     keep = torch.LongTensor(dets.size(0))
21 |     num_out = torch.LongTensor(1)
22 |     nms.cpu_nms(keep, num_out, dets, order, areas, thresh)
23 | 
24 |     return keep[:num_out[0]]
25 |   else:
26 |     x1 = dets[:, 0]
27 |     y1 = dets[:, 1]
28 |     x2 = dets[:, 2]
29 |     y2 = dets[:, 3]
30 |     scores = dets[:, 4]
31 | 
32 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
33 |     order = scores.sort(0, descending=True)[1]
34 |     # order = torch.from_numpy(np.ascontiguousarray(scores.cpu().numpy().argsort()[::-1])).long().cuda()
35 | 
36 |     dets = dets[order].contiguous()
37 | 
38 |     keep = torch.LongTensor(dets.size(0))
39 |     num_out = torch.LongTensor(1)
40 |     # keep = torch.cuda.LongTensor(dets.size(0))
41 |     # num_out = torch.cuda.LongTensor(1)
42 |     nms.gpu_nms(keep, num_out, dets, thresh)
43 | 
44 |     return order[keep[:num_out[0]].cuda()].contiguous()
45 |     # return order[keep[:num_out[0]]].contiguous()
46 | 
47 | 


--------------------------------------------------------------------------------
/datasets/coco_data/heatmap.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | import random
 4 | import sys
 5 | 
 6 | import cv2
 7 | import matplotlib.pyplot as plt
 8 | import numpy as np
 9 | from scipy import misc, ndimage
10 | 
11 | 
12 | """Implement the generate of every channel of ground truth heatmap.
13 | :param centerA: int with shape (2,), every coordinate of person's keypoint.
14 | :param accumulate_confid_map: one channel of heatmap, which is accumulated, 
15 |        np.log(100) is the max value of heatmap.
16 | :param params_transform: store the value of stride and crop_szie_y, crop_size_x                 
17 | """
18 | 
19 | 
20 | def putGaussianMaps(center, accumulate_confid_map, params_transform):
21 |     crop_size_y = params_transform['crop_size_y']
22 |     crop_size_x = params_transform['crop_size_x']
23 |     stride = params_transform['stride']
24 |     sigma = params_transform['sigma']
25 | 
26 |     grid_y = int(crop_size_y / stride)
27 |     grid_x = int(crop_size_x / stride)
28 |     start = stride / 2.0 - 0.5
29 |     y_range = [i for i in range(grid_y)]
30 |     x_range = [i for i in range(grid_x)]
31 |     xx, yy = np.meshgrid(x_range, y_range)
32 |     xx = xx * stride + start
33 |     yy = yy * stride + start
34 |     d2 = (xx - center[0]) ** 2 + (yy - center[1]) ** 2
35 |     exponent = d2 / 2.0 / sigma / sigma
36 |     mask = exponent <= 4.6052
37 |     cofid_map = np.exp(-exponent)
38 |     cofid_map = np.multiply(mask, cofid_map)
39 |     accumulate_confid_map += cofid_map
40 |     accumulate_confid_map[accumulate_confid_map > 1.0] = 1.0
41 |     return accumulate_confid_map
42 | 


--------------------------------------------------------------------------------
/evaluate/multipose_keypoint_val.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | root_path = os.path.realpath(__file__).split('/evaluate/multipose_keypoint_val.py')[0]
 3 | os.chdir(root_path)
 4 | sys.path.append(root_path)
 5 | 
 6 | from training.batch_processor import batch_processor
 7 | from network.posenet import poseNet
 8 | from datasets.coco import get_loader
 9 | from evaluate.tester import Tester
10 | 
11 | # Hyper-params
12 | coco_root = '/data/COCO/'
13 | backbone = 'resnet101'  # 'resnet50'
14 | data_dir = coco_root+'images/'
15 | mask_dir = coco_root
16 | json_path = coco_root+'COCO.json'
17 | inp_size = 480  # input size 480*480
18 | feat_stride = 4
19 | 
20 | # Set Training parameters
21 | params = Tester.TestParams()
22 | params.subnet_name = 'keypoint_subnet'
23 | params.gpus = [0]
24 | params.ckpt = './demo/models/ckpt_baseline_resnet101.h5'
25 | params.batch_size = 6 * len(params.gpus)
26 | params.print_freq = 50
27 | 
28 | # validation data
29 | valid_data = get_loader(json_path, data_dir, mask_dir, inp_size, feat_stride,
30 |                         preprocess='resnet', batch_size=params.batch_size-2*len(params.gpus), training=False,
31 |                         shuffle=False, num_workers=4, subnet=params.subnet_name)
32 | print('val dataset len: {}'.format(len(valid_data.dataset)))
33 | 
34 | # model
35 | if backbone == 'resnet101':
36 |     model = poseNet(101)
37 | elif backbone == 'resnet50':
38 |     model = poseNet(50)
39 | 
40 | for name, module in model.named_children():
41 |     for para in module.parameters():
42 |         para.requires_grad = False
43 | 
44 | tester = Tester(model, params, batch_processor, valid_data)
45 | tester.val()
46 | 


--------------------------------------------------------------------------------
/evaluate/multipose_detection_val.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | root_path = os.path.realpath(__file__).split('/evaluate/multipose_detection_val.py')[0]
 3 | os.chdir(root_path)
 4 | sys.path.append(root_path)
 5 | 
 6 | from training.batch_processor import batch_processor
 7 | from network.posenet import poseNet
 8 | from datasets.coco import get_loader
 9 | from evaluate.tester import Tester
10 | 
11 | # Hyper-params
12 | coco_root = '/data/COCO/'
13 | backbone = 'resnet101'  # 'resnet50'
14 | data_dir = coco_root+'images/'
15 | mask_dir = coco_root
16 | json_path = coco_root+'COCO.json'
17 | inp_size = 608  # input size 608*608
18 | feat_stride = 4
19 | 
20 | # Set Training parameters
21 | params = Tester.TestParams()
22 | params.subnet_name = 'detection_subnet'
23 | params.gpus = [0]
24 | params.ckpt = './demo/models/ckpt_baseline_resnet101.h5'
25 | params.batch_size = 25 * len(params.gpus)
26 | params.print_freq = 100
27 | 
28 | # validation data
29 | valid_data = get_loader(json_path, data_dir, mask_dir, inp_size, feat_stride,
30 |                         preprocess='resnet', batch_size=params.batch_size-10*len(params.gpus), training=False,
31 |                         shuffle=False, num_workers=8, subnet=params.subnet_name)
32 | print('val dataset len: {}'.format(len(valid_data.dataset)))
33 | 
34 | # model
35 | if backbone == 'resnet101':
36 |     model = poseNet(101)
37 | elif backbone == 'resnet50':
38 |     model = poseNet(50)
39 | 
40 | for name, module in model.named_children():
41 |     for para in module.parameters():
42 |         para.requires_grad = False
43 | 
44 | tester = Tester(model, params, batch_processor, valid_data)
45 | tester.val()
46 | 


--------------------------------------------------------------------------------
/evaluate/multipose_prn_val.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | root_path = os.path.realpath(__file__).split('/evaluate/multipose_prn_val.py')[0]
 3 | os.chdir(root_path)
 4 | sys.path.append(root_path)
 5 | 
 6 | from network.posenet import poseNet
 7 | from pycocotools.coco import COCO
 8 | from datasets.coco_data.prn_data_pipeline import PRN_CocoDataset
 9 | from torch.utils.data import DataLoader
10 | from training.batch_processor import batch_processor
11 | from evaluate.tester import Tester
12 | 
13 | 
14 | # Hyper-params
15 | coco_root = '/data/COCO/'
16 | backbone='resnet101'  # 'resnet50'
17 | inp_size = 480  # input size 480*480
18 | feat_stride = 4
19 | node_count = 1024  # Hidden Layer Node Count
20 | coeff = 2  # Coefficient of bbox size
21 | threshold = 0.21  # BBOX threshold
22 | num_of_keypoints = 3  # Minimum number of keypoints for each bbox in training
23 | 
24 | # Set Training parameters
25 | params = Tester.TestParams()
26 | params.subnet_name = 'prn_subnet'
27 | params.gpus = [0]
28 | params.ckpt = './demo/models/ckpt_baseline_resnet101.h5'
29 | params.batch_size = 8 * len(params.gpus)
30 | params.print_freq = 500
31 | 
32 | # validation data
33 | coco_val = COCO(os.path.join(coco_root, 'annotations/person_keypoints_val2017.json'))
34 | valid_data = DataLoader(dataset=PRN_CocoDataset(
35 |     coco_val, num_of_keypoints=num_of_keypoints, coeff=coeff, threshold=threshold,
36 |     inp_size=inp_size, feat_stride=feat_stride), batch_size=params.batch_size, num_workers=4, shuffle=False)
37 | print('val dataset len: {}'.format(len(valid_data.dataset)))
38 | 
39 | # model
40 | if backbone == 'resnet101':
41 |     model = poseNet(101, prn_node_count=node_count, prn_coeff=coeff)
42 | elif backbone == 'resnet50':
43 |     model = poseNet(50, prn_node_count=node_count, prn_coeff=coeff)
44 | 
45 | for name, module in model.named_children():
46 |     for para in module.parameters():
47 |         para.requires_grad = False
48 | 
49 | tester = Tester(model, params, batch_processor, valid_data)
50 | tester.val()
51 | 


--------------------------------------------------------------------------------
/configs/coco/first_experiment.yaml:
--------------------------------------------------------------------------------
 1 | CUDNN:
 2 |   BENCHMARK: true
 3 |   DETERMINISTIC: false
 4 |   ENABLED: true
 5 | DATASET:
 6 |   DATASET: mpii
 7 |   DATA_FORMAT: jpg
 8 |   FLIP: true
 9 |   HYBRID_JOINTS_TYPE: ''
10 |   ROOT: ''
11 |   ROT_FACTOR: 30
12 |   SCALE_FACTOR: 0.25
13 |   SELECT_DATA: false
14 |   TEST_SET: valid
15 |   TRAIN_SET: train
16 | DATA_DIR: ''
17 | DEBUG:
18 |   DEBUG: false
19 |   SAVE_BATCH_IMAGES_GT: false
20 |   SAVE_BATCH_IMAGES_PRED: false
21 |   SAVE_HEATMAPS_GT: false
22 |   SAVE_HEATMAPS_PRED: false
23 | GPUS: '0'
24 | LOG_DIR: ''
25 | LOSS:
26 |   USE_TARGET_WEIGHT: true
27 | MODEL:
28 |   EXTRA: !!python/object/new:easydict.EasyDict
29 |     dictitems:
30 |       DECONV_WITH_BIAS: false
31 |       FINAL_CONV_KERNEL: 1
32 |       HEATMAP_SIZE: &id001
33 |       - 64
34 |       - 64
35 |       NUM_DECONV_FILTERS: &id002
36 |       - 256
37 |       - 256
38 |       - 256
39 |       NUM_DECONV_KERNELS: &id003
40 |       - 4
41 |       - 4
42 |       - 4
43 |       NUM_DECONV_LAYERS: 3
44 |       NUM_LAYERS: 50
45 |       SIGMA: 2
46 |       TARGET_TYPE: gaussian
47 |     state:
48 |       DECONV_WITH_BIAS: false
49 |       FINAL_CONV_KERNEL: 1
50 |       HEATMAP_SIZE: *id001
51 |       NUM_DECONV_FILTERS: *id002
52 |       NUM_DECONV_KERNELS: *id003
53 |       NUM_DECONV_LAYERS: 3
54 |       NUM_LAYERS: 50
55 |       SIGMA: 2
56 |       TARGET_TYPE: gaussian
57 |   IMAGE_SIZE:
58 |   - 256
59 |   - 256
60 |   INIT_WEIGHTS: true
61 |   NAME: pose_resnet
62 |   NUM_JOINTS: 16
63 |   PRETRAINED: ''
64 | OUTPUT_DIR: ''
65 | PRINT_FREQ: 20
66 | TEST:
67 |   BATCH_SIZE: 32
68 |   BBOX_THRE: 1.0
69 |   COCO_BBOX_FILE: ''
70 |   FLIP_TEST: false
71 |   IMAGE_THRE: 0.0
72 |   IN_VIS_THRE: 0.0
73 |   MODEL_FILE: ''
74 |   NMS_THRE: 1.0
75 |   OKS_THRE: 0.5
76 |   POST_PROCESS: true
77 |   SHIFT_HEATMAP: true
78 |   USE_GT_BBOX: false
79 | TRAIN:
80 |   BATCH_SIZE: 32
81 |   BEGIN_EPOCH: 0
82 |   CHECKPOINT: ''
83 |   END_EPOCH: 140
84 |   GAMMA1: 0.99
85 |   GAMMA2: 0.0
86 |   LR: 0.001
87 |   LR_FACTOR: 0.1
88 |   LR_STEP:
89 |   - 90
90 |   - 110
91 |   MOMENTUM: 0.9
92 |   NESTEROV: false
93 |   OPTIMIZER: adam
94 |   RESUME: false
95 |   SHUFFLE: true
96 |   WD: 0.0001
97 | WORKERS: 4
98 | 


--------------------------------------------------------------------------------
/network/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import numpy as np
 4 | 
 5 | 
 6 | class BBoxTransform(nn.Module):
 7 | 
 8 |     def __init__(self, mean=None, std=None):
 9 |         super(BBoxTransform, self).__init__()
10 |         if mean is None:
11 |             self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32)).cuda()
12 |         else:
13 |             self.mean = mean
14 |         if std is None:
15 |             self.std = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32)).cuda()
16 |         else:
17 |             self.std = std
18 | 
19 |     def forward(self, boxes, deltas):
20 | 
21 |         widths  = boxes[:, :, 2] - boxes[:, :, 0]
22 |         heights = boxes[:, :, 3] - boxes[:, :, 1]
23 |         ctr_x   = boxes[:, :, 0] + 0.5 * widths
24 |         ctr_y   = boxes[:, :, 1] + 0.5 * heights
25 | 
26 |         dx = deltas[:, :, 0] * self.std[0] + self.mean[0]
27 |         dy = deltas[:, :, 1] * self.std[1] + self.mean[1]
28 |         dw = deltas[:, :, 2] * self.std[2] + self.mean[2]
29 |         dh = deltas[:, :, 3] * self.std[3] + self.mean[3]
30 | 
31 |         pred_ctr_x = ctr_x + dx * widths
32 |         pred_ctr_y = ctr_y + dy * heights
33 |         pred_w     = torch.exp(dw) * widths
34 |         pred_h     = torch.exp(dh) * heights
35 | 
36 |         pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w
37 |         pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h
38 |         pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w
39 |         pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h
40 | 
41 |         pred_boxes = torch.stack([pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2], dim=2)
42 | 
43 |         return pred_boxes
44 | 
45 | 
46 | class ClipBoxes(nn.Module):
47 | 
48 |     def __init__(self, width=None, height=None):
49 |         super(ClipBoxes, self).__init__()
50 | 
51 |     def forward(self, boxes, img):
52 | 
53 |         batch_size, num_channels, height, width = img.shape
54 | 
55 |         boxes[:, :, 0] = torch.clamp(boxes[:, :, 0], min=0)
56 |         boxes[:, :, 1] = torch.clamp(boxes[:, :, 1], min=0)
57 | 
58 |         boxes[:, :, 2] = torch.clamp(boxes[:, :, 2], max=width)
59 |         boxes[:, :, 3] = torch.clamp(boxes[:, :, 3], max=height)
60 |       
61 |         return boxes
62 | 


--------------------------------------------------------------------------------
/training/batch_processor.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Utility functions for rtpose project
 3 | --------------------------------------------
 4 | Change to pytorch=0.4.0 by @LiMeng95
 5 | Utility functions for Multipose project
 6 | '''
 7 | 
 8 | import torch
 9 | 
10 | def batch_processor(state, batch):
11 |     gpus = state.params.gpus
12 |     subnet_name = state.params.subnet_name  # 'detection_subnet'/'keypoint_subnet'/'prn_subnet'
13 | 
14 |     if subnet_name == 'keypoint_subnet':
15 |         inp, heat_temp, heat_weight = batch
16 | 
17 |         if not state.model.training:  # used for inference
18 |             with torch.no_grad():
19 |                 input_var = inp.cuda(device=gpus[0])
20 |                 heat_weight_var = heat_weight.cuda(device=gpus[0], async=False)
21 |                 heat_temp_var = heat_temp.cuda(device=gpus[0], async=False)
22 |         else:
23 |             input_var = inp.cuda(device=gpus[0])
24 |             heat_weight_var = heat_weight.cuda(device=gpus[0], async=False)
25 |             heat_temp_var = heat_temp.cuda(device=gpus[0], async=False)
26 | 
27 |         inputs = [[input_var, subnet_name]]
28 |         gts = [subnet_name, heat_temp_var, heat_weight_var]
29 |         saved_for_eval = []
30 |     elif subnet_name == 'detection_subnet':  #'detection_subnet'
31 |         inp, anno = batch  # anno: [x1, y1, x2, y2, category_id]
32 | 
33 |         if not state.model.training:  # used for inference
34 |             with torch.no_grad():
35 |                 input_var = inp.cuda(device=gpus[0])
36 |                 anno_var = anno.cuda(device=gpus[0])
37 |         else:
38 |             input_var = inp.cuda(device=gpus[0])
39 |             anno_var = anno.cuda(device=gpus[0])
40 | 
41 |         inputs = [[input_var, subnet_name]]
42 |         gts = [subnet_name, anno_var]
43 |         saved_for_eval = []
44 |     else:  #'prn_subnet'
45 |         inp, label = batch  # input, label
46 | 
47 |         if not state.model.training:  # used for inference
48 |             with torch.no_grad():
49 |                 input_var = inp.cuda(device=gpus[0]).float()
50 |                 anno_var = label.cuda(device=gpus[0]).float()
51 |         else:
52 |             input_var = inp.cuda(device=gpus[0]).float()
53 |             anno_var = label.cuda(device=gpus[0]).float()
54 | 
55 |         inputs = [[input_var, subnet_name]]
56 |         gts = [subnet_name, anno_var]
57 |         saved_for_eval = []
58 | 
59 |     return inputs, gts, saved_for_eval
60 | 
61 | 


--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
 1 | // ------------------------------------------------------------------
 2 | // Faster R-CNN
 3 | // Copyright (c) 2015 Microsoft
 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
 5 | // Written by Shaoqing Ren
 6 | // ------------------------------------------------------------------
 7 | #include <THC/THC.h>
 8 | #include <TH/TH.h>
 9 | #include <math.h>
10 | #include <stdio.h>
11 | 
12 | #include "cuda/nms_kernel.h"
13 | 
14 | 
15 | extern THCState *state;
16 | 
17 | int gpu_nms(THLongTensor * keep, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh) {
18 |   // boxes has to be sorted
19 |   THArgCheck(THLongTensor_isContiguous(keep), 0, "boxes must be contiguous");
20 |   THArgCheck(THCudaTensor_isContiguous(state, boxes), 2, "boxes must be contiguous");
21 |   // Number of ROIs
22 |   int boxes_num = THCudaTensor_size(state, boxes, 0);
23 |   int boxes_dim = THCudaTensor_size(state, boxes, 1);
24 | 
25 |   float* boxes_flat = THCudaTensor_data(state, boxes);
26 | 
27 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
28 |   THCudaLongTensor * mask = THCudaLongTensor_newWithSize2d(state, boxes_num, col_blocks);
29 |   unsigned long long* mask_flat = THCudaLongTensor_data(state, mask);
30 | 
31 |   _nms(boxes_num, boxes_flat, mask_flat, nms_overlap_thresh);
32 | 
33 |   THLongTensor * mask_cpu = THLongTensor_newWithSize2d(boxes_num, col_blocks);
34 |   THLongTensor_copyCuda(state, mask_cpu, mask);
35 |   THCudaLongTensor_free(state, mask);
36 | 
37 |   unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu);
38 | 
39 |   THLongTensor * remv_cpu = THLongTensor_newWithSize1d(col_blocks);
40 |   unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu);
41 |   THLongTensor_fill(remv_cpu, 0);
42 | 
43 |   long * keep_flat = THLongTensor_data(keep);
44 |   long num_to_keep = 0;
45 | 
46 |   int i, j;
47 |   for (i = 0; i < boxes_num; i++) {
48 |     int nblock = i / threadsPerBlock;
49 |     int inblock = i % threadsPerBlock;
50 | 
51 |     if (!(remv_cpu_flat[nblock] & (1ULL << inblock))) {
52 |       keep_flat[num_to_keep++] = i;
53 |       unsigned long long *p = &mask_cpu_flat[0] + i * col_blocks;
54 |       for (j = nblock; j < col_blocks; j++) {
55 |         remv_cpu_flat[j] |= p[j];
56 |       }
57 |     }
58 |   }
59 | 
60 |   long * num_out_flat = THLongTensor_data(num_out);
61 |   * num_out_flat = num_to_keep;
62 | 
63 |   THLongTensor_free(mask_cpu);
64 |   THLongTensor_free(remv_cpu);
65 | 
66 |   return 1;
67 | }
68 | 


--------------------------------------------------------------------------------
/datasets/coco.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | from torchvision.transforms import ToTensor
 4 | from datasets.coco_data.COCO_data_pipeline import Cocokeypoints, Cocobbox, bbox_collater
 5 | from datasets.dataloader import sDataLoader
 6 | from pycocotools.coco import COCO
 7 | 
 8 | 
 9 | def get_loader(json_path, data_dir, mask_dir, inp_size, feat_stride, preprocess,
10 |                batch_size, training=True, shuffle=True, num_workers=3, subnet='keypoint_subnet'):
11 |     """ Build a COCO dataloader
12 |     :param json_path: string, path to jso file
13 |     :param datadir: string, path to coco data
14 |     :returns : the data_loader
15 |     """
16 |     with open(json_path) as data_file:
17 |         data_this = json.load(data_file)
18 |         data = data_this['root']
19 | 
20 |     num_samples = len(data)
21 |     train_indexes = []
22 |     val_indexes = []
23 | 
24 |     if subnet == 'keypoint_subnet':
25 |         for count in range(num_samples):
26 |             if data[count]['isValidation'] != 0.:
27 |                 val_indexes.append(count)
28 |             else:
29 |                 train_indexes.append(count)
30 | 
31 |         coco_data = Cocokeypoints(root=data_dir, mask_dir=mask_dir,
32 |                                   index_list=train_indexes if training else val_indexes,
33 |                                   data=data, inp_size=inp_size, feat_stride=feat_stride,
34 |                                   preprocess=preprocess, transform=ToTensor())
35 |         data_loader = sDataLoader(coco_data, batch_size=batch_size,
36 |                                   shuffle=shuffle, num_workers=num_workers)
37 | 
38 |     elif subnet == 'detection_subnet':
39 |         if training:
40 |             anno_path = os.path.join(mask_dir, 'annotations', 'person_keypoints_train2017.json')
41 |         else:
42 |             anno_path = os.path.join(mask_dir, 'annotations', 'person_keypoints_val2017.json')
43 |         coco = COCO(anno_path)
44 |         images_ids = coco.getImgIds()
45 | 
46 |         data_indexes = []
47 |         for count in range(num_samples):
48 |             if int(data[count]['image_id']) in images_ids:
49 |                 data_indexes.append(count)
50 | 
51 |         coco_data = Cocobbox(root=data_dir, mask_dir=mask_dir, index_list=data_indexes,
52 |                              data=data, inp_size=inp_size, feat_stride=feat_stride, coco=coco,
53 |                              preprocess=preprocess, training=True if training else False)
54 | 
55 |         data_loader = sDataLoader(coco_data, batch_size=batch_size, shuffle=shuffle,
56 |                                   num_workers=num_workers, collate_fn=bbox_collater)
57 | 
58 |     return data_loader
59 | 


--------------------------------------------------------------------------------
/lib/nms/src/nms.c:
--------------------------------------------------------------------------------
 1 | #include <TH/TH.h>
 2 | #include <math.h>
 3 | 
 4 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh) {
 5 |     // boxes has to be sorted
 6 |     THArgCheck(THLongTensor_isContiguous(keep_out), 0, "keep_out must be contiguous");
 7 |     THArgCheck(THLongTensor_isContiguous(boxes), 2, "boxes must be contiguous");
 8 |     THArgCheck(THLongTensor_isContiguous(order), 3, "order must be contiguous");
 9 |     THArgCheck(THLongTensor_isContiguous(areas), 4, "areas must be contiguous");
10 |     // Number of ROIs
11 |     long boxes_num = THFloatTensor_size(boxes, 0);
12 |     long boxes_dim = THFloatTensor_size(boxes, 1);
13 | 
14 |     long * keep_out_flat = THLongTensor_data(keep_out);
15 |     float * boxes_flat = THFloatTensor_data(boxes);
16 |     long * order_flat = THLongTensor_data(order);
17 |     float * areas_flat = THFloatTensor_data(areas);
18 | 
19 |     THByteTensor* suppressed = THByteTensor_newWithSize1d(boxes_num);
20 |     THByteTensor_fill(suppressed, 0);
21 |     unsigned char * suppressed_flat =  THByteTensor_data(suppressed);
22 | 
23 |     // nominal indices
24 |     int i, j;
25 |     // sorted indices
26 |     int _i, _j;
27 |     // temp variables for box i's (the box currently under consideration)
28 |     float ix1, iy1, ix2, iy2, iarea;
29 |     // variables for computing overlap with box j (lower scoring box)
30 |     float xx1, yy1, xx2, yy2;
31 |     float w, h;
32 |     float inter, ovr;
33 | 
34 |     long num_to_keep = 0;
35 |     for (_i=0; _i < boxes_num; ++_i) {
36 |         i = order_flat[_i];
37 |         if (suppressed_flat[i] == 1) {
38 |             continue;
39 |         }
40 |         keep_out_flat[num_to_keep++] = i;
41 |         ix1 = boxes_flat[i * boxes_dim];
42 |         iy1 = boxes_flat[i * boxes_dim + 1];
43 |         ix2 = boxes_flat[i * boxes_dim + 2];
44 |         iy2 = boxes_flat[i * boxes_dim + 3];
45 |         iarea = areas_flat[i];
46 |         for (_j = _i + 1; _j < boxes_num; ++_j) {
47 |             j = order_flat[_j];
48 |             if (suppressed_flat[j] == 1) {
49 |                 continue;
50 |             }
51 |             xx1 = fmaxf(ix1, boxes_flat[j * boxes_dim]);
52 |             yy1 = fmaxf(iy1, boxes_flat[j * boxes_dim + 1]);
53 |             xx2 = fminf(ix2, boxes_flat[j * boxes_dim + 2]);
54 |             yy2 = fminf(iy2, boxes_flat[j * boxes_dim + 3]);
55 |             w = fmaxf(0.0, xx2 - xx1 + 1);
56 |             h = fmaxf(0.0, yy2 - yy1 + 1);
57 |             inter = w * h;
58 |             ovr = inter / (iarea + areas_flat[j] - inter);
59 |             if (ovr >= nms_overlap_thresh) {
60 |                 suppressed_flat[j] = 1;
61 |             }
62 |         }
63 |     }
64 | 
65 |     long *num_out_flat = THLongTensor_data(num_out);
66 |     *num_out_flat = num_to_keep;
67 |     THByteTensor_free(suppressed);
68 |     return 1;
69 | }


--------------------------------------------------------------------------------
/datasets/data_parallel.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | import torch
 3 | from torch.nn import DataParallel
 4 | from torch.autograd import Variable
 5 | from torch.nn.parallel._functions import Scatter, Gather
 6 | 
 7 | 
 8 | class ScatterList(list):
 9 |     pass
10 | 
11 | 
12 | class ConstList(list):
13 |     pass
14 | 
15 | 
16 | class ListDataParallel(DataParallel):
17 |     def scatter(self, inputs, kwargs, device_ids):
18 |         return pose_scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
19 | 
20 |     def gather(self, outputs, output_device):
21 |         return pose_gather(outputs, output_device, dim=self.dim)
22 | 
23 | 
24 | def scatter(inputs, target_gpus, dim=0):
25 |     r"""
26 |     Slices variables into approximately equal chunks and
27 |     distributes them across given GPUs. Duplicates
28 |     references to objects that are not variables. Does not
29 |     support Tensors.
30 |     """
31 |     def scatter_map(obj):
32 |         if isinstance(obj, Variable):
33 |             return Scatter.apply(target_gpus, None, dim, obj)
34 |         assert not torch.is_tensor(obj), "Tensors not supported in scatter."
35 |         if isinstance(obj, ScatterList):
36 |             assert len(obj) == len(target_gpus)
37 |             return [obj[i] for i in range(len(target_gpus))]
38 |         if isinstance(obj, tuple) and len(obj) > 0:
39 |             return list(zip(*map(scatter_map, obj)))
40 |         if isinstance(obj, list) and len(obj) > 0:
41 |             return list(map(list, zip(*map(scatter_map, obj))))
42 |         if isinstance(obj, dict) and len(obj) > 0:
43 |             return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
44 |         return [obj for targets in target_gpus]
45 | 
46 |     return scatter_map(inputs)
47 | 
48 | 
49 | def pose_scatter_kwargs(inputs, kwargs, target_gpus, dim=0):
50 |     r"""Scatter with support for kwargs dictionary"""
51 |     inputs = scatter(inputs, target_gpus, dim) if inputs else []
52 |     kwargs = scatter(kwargs, target_gpus, dim) if kwargs else []
53 |     if len(inputs) < len(kwargs):
54 |         inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
55 |     elif len(kwargs) < len(inputs):
56 |         kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
57 |     inputs = tuple(inputs)
58 |     kwargs = tuple(kwargs)
59 |     return inputs, kwargs
60 | 
61 | 
62 | def pose_gather(outputs, target_device, dim=0):
63 |     r"""
64 |     Gathers variables from different GPUs on a specified device
65 |       (-1 means the CPU).
66 |     """
67 |     def gather_map(outputs):
68 |         if isinstance(outputs, Variable):
69 |             if target_device == -1:
70 |                 return outputs.cpu()
71 |             return outputs.cuda(target_device)
72 | 
73 |         out = outputs[0]
74 |         if isinstance(out, Variable):
75 |             return Gather.apply(target_device, dim, *outputs)
76 |         if out is None:
77 |             return None
78 | 
79 |         if isinstance(out, str):
80 |             return out
81 |         if isinstance(out, ConstList):
82 |             return out
83 |         if isinstance(out, ScatterList):
84 |             return tuple(map(gather_map, itertools.chain(*outputs)))
85 | 
86 |         return type(out)(map(gather_map, zip(*outputs)))
87 |     return gather_map(outputs)
88 | 


--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.cu:
--------------------------------------------------------------------------------
 1 | // ------------------------------------------------------------------
 2 | // Faster R-CNN
 3 | // Copyright (c) 2015 Microsoft
 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
 5 | // Written by Shaoqing Ren
 6 | // ------------------------------------------------------------------
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | #include <math.h>
12 | #include <stdio.h>
13 | #include <float.h>
14 | #include "nms_kernel.h"
15 | 
16 | __device__ inline float devIoU(float const * const a, float const * const b) {
17 |   float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]);
18 |   float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]);
19 |   float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f);
20 |   float interS = width * height;
21 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
22 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
23 |   return interS / (Sa + Sb - interS);
24 | }
25 | 
26 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
27 |                            const float *dev_boxes, unsigned long long *dev_mask) {
28 |   const int row_start = blockIdx.y;
29 |   const int col_start = blockIdx.x;
30 | 
31 |   // if (row_start > col_start) return;
32 | 
33 |   const int row_size =
34 |         fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
35 |   const int col_size =
36 |         fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
37 | 
38 |   __shared__ float block_boxes[threadsPerBlock * 5];
39 |   if (threadIdx.x < col_size) {
40 |     block_boxes[threadIdx.x * 5 + 0] =
41 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
42 |     block_boxes[threadIdx.x * 5 + 1] =
43 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
44 |     block_boxes[threadIdx.x * 5 + 2] =
45 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
46 |     block_boxes[threadIdx.x * 5 + 3] =
47 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
48 |     block_boxes[threadIdx.x * 5 + 4] =
49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
50 |   }
51 |   __syncthreads();
52 | 
53 |   if (threadIdx.x < row_size) {
54 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
55 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
56 |     int i = 0;
57 |     unsigned long long t = 0;
58 |     int start = 0;
59 |     if (row_start == col_start) {
60 |       start = threadIdx.x + 1;
61 |     }
62 |     for (i = start; i < col_size; i++) {
63 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
64 |         t |= 1ULL << i;
65 |       }
66 |     }
67 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
68 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
69 |   }
70 | }
71 | 
72 | 
73 | void _nms(int boxes_num, float * boxes_dev,
74 |           unsigned long long * mask_dev, float nms_overlap_thresh) {
75 | 
76 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
77 |               DIVUP(boxes_num, threadsPerBlock));
78 |   dim3 threads(threadsPerBlock);
79 |   nms_kernel<<<blocks, threads>>>(boxes_num,
80 |                                   nms_overlap_thresh,
81 |                                   boxes_dev,
82 |                                   mask_dev);
83 | }
84 | 
85 | #ifdef __cplusplus
86 | }
87 | #endif
88 | 


--------------------------------------------------------------------------------
/training/multipose_prn_train.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | root_path = os.path.realpath(__file__).split('/training/multipose_prn_train.py')[0]
 3 | os.chdir(root_path)
 4 | sys.path.append(root_path)
 5 | 
 6 | import torch
 7 | import torch.backends.cudnn as cudnn
 8 | from pycocotools.coco import COCO
 9 | from torch.utils.data import DataLoader
10 | from training.trainer import Trainer
11 | from datasets.coco_data.prn_data_pipeline import PRN_CocoDataset
12 | from network.posenet import poseNet
13 | from training.batch_processor import batch_processor
14 | from torch.optim.lr_scheduler import ReduceLROnPlateau
15 | 
16 | # Hyper-params
17 | coco_root = '/data/COCO/'
18 | backbone='resnet101'  # 'resnet50'
19 | opt = 'adam'
20 | inp_size = 480  # input size 480*480
21 | feat_stride = 4
22 | node_count = 1024  # Hidden Layer Node Count
23 | coeff = 2  # Coefficient of bbox size
24 | threshold = 0.21  # BBOX threshold
25 | num_of_keypoints = 3  # Minimum number of keypoints for each bbox in training
26 | 
27 | # model parameters in MultiPoseNet
28 | prn_para = ['prn']
29 | 
30 | #####################################################################
31 | # Set Training parameters
32 | params = Trainer.TrainParams()
33 | params.exp_name = 'prn_subnet/'
34 | params.subnet_name = 'prn_subnet'
35 | params.save_dir = './extra/models/{}'.format(params.exp_name)
36 | params.ckpt = './demo/models/ckpt_baseline_resnet101.h5'
37 | params.ignore_opt_state = True
38 | 
39 | params.max_epoch = 40
40 | params.init_lr = 1.0e-3
41 | params.lr_decay = 0.9
42 | 
43 | params.gpus = [0]
44 | params.batch_size = 8 * len(params.gpus)
45 | params.val_nbatch_end_epoch = 2000
46 | 
47 | params.print_freq = 1000
48 | 
49 | # model
50 | if backbone == 'resnet101':
51 |     model = poseNet(101, prn_node_count=node_count, prn_coeff=coeff)
52 | elif backbone == 'resnet50':
53 |     model = poseNet(50, prn_node_count=node_count, prn_coeff=coeff)
54 | 
55 | # Train Key-point Subnet, Fix the weights in detection subnet (RetinaNet)
56 | for name, module in model.named_children():
57 |     if name not in prn_para:
58 |         for para in module.parameters():
59 |             para.requires_grad = False
60 | 
61 | print("Loading dataset...")
62 | # load training data
63 | coco_train = COCO(os.path.join(coco_root, 'annotations/person_keypoints_train2017.json'))
64 | train_data = DataLoader(dataset=PRN_CocoDataset(
65 |     coco_train, num_of_keypoints=num_of_keypoints, coeff=coeff, threshold=threshold,
66 |     inp_size=inp_size, feat_stride=feat_stride),batch_size=params.batch_size, num_workers=4, shuffle=True)
67 | print('train dataset len: {}'.format(len(train_data.dataset)))
68 | 
69 | # load validation data
70 | valid_data = None
71 | if params.val_nbatch > 0:
72 |     coco_val = COCO(os.path.join(coco_root, 'annotations/person_keypoints_val2017.json'))
73 |     valid_data = DataLoader(dataset=PRN_CocoDataset(
74 |         coco_val, num_of_keypoints=num_of_keypoints, coeff=coeff, threshold=threshold,
75 |         inp_size=inp_size, feat_stride=feat_stride), batch_size=params.batch_size, num_workers=4, shuffle=True)
76 |     print('val dataset len: {}'.format(len(valid_data.dataset)))
77 | 
78 | trainable_vars = [param for param in model.parameters() if param.requires_grad]
79 | if opt == 'adam':
80 |     print("Training with adam")
81 |     params.optimizer = torch.optim.Adam(
82 |         trainable_vars, lr=params.init_lr)
83 | 
84 | cudnn.benchmark = True
85 | params.lr_scheduler = ReduceLROnPlateau(params.optimizer, 'min', factor=params.lr_decay, patience=2, verbose=True)
86 | trainer = Trainer(model, params, batch_processor, train_data, valid_data)
87 | trainer.train()
88 | 


--------------------------------------------------------------------------------
/datasets/coco_data/prn_gaussian.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from skimage.filters import gaussian
  3 | 
  4 | sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89] * 100)
  5 | 
  6 | 
  7 | def multivariate_gaussian(N, sigma=2):
  8 |     t = 4
  9 |     X = np.linspace(-t, t, N)
 10 |     Y = np.linspace(-t, t, N)
 11 |     X, Y = np.meshgrid(X, Y)
 12 |     pos = np.empty(X.shape + (2,))
 13 |     pos[:, :, 0] = X
 14 |     pos[:, :, 1] = Y
 15 |     mu = np.array([0., 0.])
 16 |     sigma = np.array([[sigma, 0], [0, sigma]])
 17 |     n = mu.shape[0]
 18 |     Sigma_det = np.linalg.det(sigma)
 19 |     Sigma_inv = np.linalg.inv(sigma)
 20 |     N = np.sqrt((2 * np.pi) ** n * Sigma_det)
 21 |     fac = np.einsum('...k,kl,...l->...', pos - mu, Sigma_inv, pos - mu)
 22 |     return np.exp(-fac / 2) / N
 23 | 
 24 | 
 25 | def crop_paste(img, c, N=13, sigma=2):
 26 |     Z = multivariate_gaussian(N, sigma)
 27 | 
 28 |     H = img.shape[1]
 29 |     W = img.shape[0]
 30 | 
 31 |     h = (Z.shape[0] - 1) / 2
 32 | 
 33 |     N = Z.shape[0]
 34 |     x1 = (c[0] - h)
 35 |     y1 = (c[1] - h)
 36 | 
 37 |     x2 = (c[0] + h) + 1
 38 |     y2 = (c[1] + h) + 1
 39 | 
 40 |     zx1 = 0
 41 |     zy1 = 0
 42 |     zx2 = N + 1
 43 |     zy2 = N + 1
 44 | 
 45 |     if x1 < 0:
 46 |         x1 = 0
 47 |         zx1 = 0 - (c[0] - h)
 48 | 
 49 |     if y1 < 0:
 50 |         y1 = 0
 51 |         zy1 = 0 - (c[1] - h)
 52 | 
 53 |     if x2 > W - 1:
 54 |         x2 = W - 1
 55 |         zx2 = x2 - x1 + 1
 56 |         x2 = W
 57 | 
 58 |     if y2 > H - 1:
 59 |         y2 = H - 1
 60 |         zy2 = y2 - y1 + 1
 61 |         y2 = H
 62 | 
 63 |     img[x1:x2, y1:y2] = np.maximum(Z[zx1:zx2, zy1:zy2], img[x1:x2, y1:y2])
 64 | 
 65 | 
 66 | '''
 67 | def gaussian(img, N = 13, sigma=2):
 68 |     cs = np.where(img==1)
 69 |     img = np.zeros_like(img)
 70 |     for c in zip(cs[0], cs[1]):
 71 |         crop_paste(img, c, N, sigma)
 72 |     return img
 73 | '''
 74 | 
 75 | 
 76 | def gaussian_multi_input_mp(inp):
 77 |     '''
 78 |     :param inp: Multi person ground truth heatmap input (17 ch) Each channel contains multiple joints.
 79 |     :return: out: Gaussian augmented output. Values are between 0. and 1.
 80 |     '''
 81 | 
 82 |     h, w, ch = inp.shape
 83 |     out = np.zeros_like(inp)
 84 |     for i in range(ch):
 85 |         layer = inp[:, :, i]
 86 |         ind = np.argwhere(layer == 1)
 87 |         b = []
 88 |         if len(ind) > 0:
 89 |             for j in ind:
 90 |                 t = np.zeros((h, w))
 91 |                 t[j[0], j[1]] = 1
 92 |                 t = gaussian(t, sigma=2, mode='constant')
 93 |                 t = t * (1 / t.max())
 94 |                 b.append(t)
 95 | 
 96 |             out[:, :, i] = np.maximum.reduce(b)
 97 |         else:
 98 |             out[:, :, i] = np.zeros((h, w))
 99 |     return out
100 | 
101 | 
102 | def gaussian_multi_output(inp):
103 |     '''
104 |     :param inp: Single person ground truth heatmap input (17 ch) Each channel contains one joint.
105 |     :return: out: Gaussian augmented output. Values are between 0. and 1.
106 |     '''
107 |     h, w, ch = inp.shape
108 |     out = np.zeros_like(inp)
109 |     for i in range(ch):
110 |         j = np.argwhere(inp[:, :, i] == 1)
111 |         if len(j) == 0:
112 |             out[:, :, i] = np.zeros((h, w))
113 |             continue
114 |         j = j[0]
115 |         t = np.zeros((h, w))
116 |         t[j[0], j[1]] = 1
117 |         t = gaussian(t, sigma=5, mode='constant')
118 |         out[:, :, i] = t * (1 / t.max())
119 |     return out
120 | 
121 | 
122 | def crop(img, c, N=13):
123 |     H = img.shape[1]
124 |     W = img.shape[0]
125 | 
126 |     h = (N - 1) / 2
127 | 
128 |     x1 = int(c[0] - h)
129 |     y1 = int(c[1] - h)
130 | 
131 |     x2 = int(c[0] + h) + 1
132 |     y2 = int(c[1] + h) + 1
133 | 
134 |     if x1 < 0:
135 |         x1 = 0
136 | 
137 |     if y1 < 0:
138 |         y1 = 0
139 | 
140 |     if x2 > W - 1:
141 |         x2 = W
142 | 
143 |     if y2 > H - 1:
144 |         y2 = H
145 | 
146 |     return img[x1:x2, y1:y2]
147 | 
148 | 


--------------------------------------------------------------------------------
/training/multipose_detection_train.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | root_path = os.path.realpath(__file__).split('/training/multipose_detection_train.py')[0]
  3 | os.chdir(root_path)
  4 | sys.path.append(root_path)
  5 | 
  6 | import torch
  7 | from torch.optim.lr_scheduler import ReduceLROnPlateau
  8 | 
  9 | from training.batch_processor import batch_processor
 10 | from network.posenet import poseNet
 11 | from datasets.coco import get_loader
 12 | from training.trainer import Trainer
 13 | 
 14 | # Hyper-params
 15 | coco_root = '/data/COCO/'
 16 | backbone = 'resnet101'  # 'resnet50'
 17 | opt = 'adam'
 18 | weight_decay = 0.000
 19 | inp_size = 608  # input size 608*608
 20 | feat_stride = 4
 21 | 
 22 | # model parameters in MultiPoseNet
 23 | fpn_resnet_para = ['conv1', 'bn1', 'layer1', 'layer2', 'layer3', 'layer4']
 24 | fpn_retinanet_para = ['conv6', 'conv7', 'latlayer1', 'latlayer2',
 25 |                       'latlayer3', 'toplayer0', 'toplayer1', 'toplayer2']
 26 | fpn_keypoint_para = ['toplayer', 'flatlayer1', 'flatlayer2',
 27 |                      'flatlayer3', 'smooth1', 'smooth2', 'smooth3']
 28 | retinanet_para = ['regressionModel', 'classificationModel']
 29 | keypoint_para = ['convt1', 'convt2', 'convt3', 'convt4', 'convs1', 'convs2', 'convs3', 'convs4', 'upsample1',
 30 |                  'upsample2', 'upsample3', 'conv2', 'convfin', 'convfin_k2', 'convfin_k3', 'convfin_k4', 'convfin_k5']
 31 | prn_para = ['prn']
 32 | 
 33 | #####################################################################
 34 | # train detection subnet
 35 | data_dir = coco_root+'images/'
 36 | mask_dir = coco_root
 37 | json_path = coco_root+'COCO.json'
 38 | 
 39 | # Set Training parameters
 40 | params = Trainer.TrainParams()
 41 | params.exp_name = 'res101_detection_subnet/'
 42 | params.subnet_name = 'detection_subnet'
 43 | params.save_dir = './extra/models/{}'.format(params.exp_name)
 44 | params.ckpt = './demo/models/ckpt_baseline_resnet101.h5'
 45 | params.ignore_opt_state = True
 46 | 
 47 | params.max_epoch = 50
 48 | params.init_lr = 1.e-5
 49 | params.lr_decay = 0.1
 50 | 
 51 | params.gpus = [0]
 52 | params.batch_size = 25 * len(params.gpus)
 53 | params.val_nbatch_end_epoch = 2000
 54 | 
 55 | params.print_freq = 50
 56 | 
 57 | # model
 58 | if backbone == 'resnet101':
 59 |     model = poseNet(101)
 60 | elif backbone == 'resnet50':
 61 |     model = poseNet(50)
 62 | 
 63 | # Train detection subnet (RetinaNet), Fix the weights in backbone (ResNet) ans Key-point Subnet
 64 | for name, module in model.fpn.named_children():
 65 |     if name in fpn_resnet_para:
 66 |         for para in module.parameters():
 67 |             para.requires_grad = False
 68 | for name, module in model.fpn.named_children():
 69 |     if name in fpn_keypoint_para:
 70 |         for para in module.parameters():
 71 |             para.requires_grad = False
 72 | for name, module in model.named_children():
 73 |     if name in keypoint_para:
 74 |         for para in module.parameters():
 75 |             para.requires_grad = False
 76 | for name, module in model.named_children():
 77 |     if name in prn_para:
 78 |         for para in module.parameters():
 79 |             para.requires_grad = False
 80 | 
 81 | print("Loading dataset...")
 82 | # load training data
 83 | train_data = get_loader(json_path, data_dir, mask_dir, inp_size, feat_stride,
 84 |                         preprocess='resnet', batch_size=params.batch_size, training=True,
 85 |                         shuffle=True, num_workers=8, subnet=params.subnet_name)
 86 | print('train dataset len: {}'.format(len(train_data.dataset)))
 87 | 
 88 | # load validation data
 89 | valid_data = get_loader(json_path, data_dir, mask_dir, inp_size, feat_stride,
 90 |                         preprocess='resnet', batch_size=params.batch_size-10*len(params.gpus), training=False,
 91 |                         shuffle=False, num_workers=8, subnet=params.subnet_name)
 92 | print('val dataset len: {}'.format(len(valid_data.dataset)))
 93 | 
 94 | trainable_vars = [param for param in model.parameters() if param.requires_grad]
 95 | if opt == 'adam':
 96 |     print("Training with adam")
 97 |     params.optimizer = torch.optim.Adam(
 98 |         trainable_vars, lr=params.init_lr, weight_decay=weight_decay)
 99 | 
100 | params.lr_scheduler = ReduceLROnPlateau(
101 |     params.optimizer, 'min', factor=params.lr_decay, patience=3, verbose=True)
102 | trainer = Trainer(model, params, batch_processor, train_data, valid_data)
103 | trainer.train()
104 | 


--------------------------------------------------------------------------------
/network/net_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import torch.nn as nn
  4 | import numpy as np
  5 | from copy import deepcopy
  6 | try:
  7 |     import cPickle as pickle
  8 | except ImportError:
  9 |     import pickle
 10 | from lib.utils.log import logger
 11 | 
 12 | def set_optimizer_state_devices(state, device_id=None):
 13 |     """
 14 |     set state in optimizer to a device. move to cpu if device_id==None
 15 |     :param state: optimizer.state
 16 |     :param device_id: None or a number
 17 |     :return:
 18 |     """
 19 |     for k, v in state.items():
 20 |         for k2 in v.keys():
 21 |             if hasattr(v[k2], 'cuda'):
 22 |                 if device_id is None:
 23 |                     v[k2] = v[k2].cpu()
 24 |                 else:
 25 |                     v[k2] = v[k2].cuda(device_id)
 26 | 
 27 |     return state
 28 | 
 29 | 
 30 | def save_net(fname, net, epoch=-1, optimizers=None, rm_prev_opt=False, max_n_ckpts=-1):
 31 |     import h5py
 32 |     with h5py.File(fname, mode='w') as h5f:
 33 |         for k, v in net.state_dict().items():
 34 |             h5f.create_dataset(k, data=v.cpu().numpy())
 35 |         h5f.attrs['epoch'] = epoch
 36 | 
 37 |     if optimizers is not None:
 38 |         state_dicts = []
 39 |         for optimizer in optimizers:
 40 |             state_dict = deepcopy(optimizer.state_dict())
 41 |             state_dict['state'] = set_optimizer_state_devices(state_dict['state'], device_id=None)
 42 |             state_dicts.append(state_dict)
 43 | 
 44 |         state_file = fname + '.optimizer_state.pk'
 45 |         with open(state_file, 'wb') as f:
 46 |             pickle.dump(state_dicts, f)
 47 | 
 48 |         # remove
 49 |         if rm_prev_opt:
 50 |             root = os.path.split(fname)[0]
 51 |             for filename in os.listdir(root):
 52 |                 filename = os.path.join(root, filename)
 53 |                 if filename.endswith('.optimizer_state.pk') and filename != state_file:
 54 |                     logger.info(('Remove {}'.format(filename)))
 55 |                     os.remove(filename)
 56 | 
 57 |         # remove ckpt
 58 |         if max_n_ckpts > 0:
 59 |             root = os.path.split(fname)[0]
 60 |             ckpts = [fname for fname in os.listdir(root) if os.path.splitext(fname)[-1] == '.h5']
 61 |             ckpts = sorted(ckpts, key=lambda name: int(os.path.splitext(name)[0].split('_')[-1]))
 62 |             if len(ckpts) > max_n_ckpts:
 63 |                 for ckpt in ckpts[0:-max_n_ckpts]:
 64 |                     filename = os.path.join(root, ckpt)
 65 |                     logger.info('Remove {}'.format(filename))
 66 |                     os.remove(filename)
 67 | 
 68 | 
 69 | def load_net(fname, net, prefix='', load_state_dict=False):
 70 |     import h5py
 71 |     with h5py.File(fname, mode='r') as h5f:
 72 |         h5f_is_module = True
 73 |         for k in h5f.keys():
 74 |             if not str(k).startswith('module.'):
 75 |                 h5f_is_module = False
 76 |                 break
 77 |         if prefix == '' and not isinstance(net, nn.DataParallel) and h5f_is_module:
 78 |             prefix = 'module.'
 79 | 
 80 |         for k, v in net.state_dict().items():
 81 |             k = prefix + k
 82 |             if k in h5f:
 83 |                 param = torch.from_numpy(np.asarray(h5f[k]))
 84 |                 if v.size() != param.size():
 85 |                     logger.warning('Inconsistent shape: {}, {}'.format(v.size(), param.size()))
 86 |                 else:
 87 |                     v.copy_(param)
 88 |             else:
 89 |                 logger.warning('No layer: {}'.format(k))
 90 | 
 91 |         epoch = h5f.attrs['epoch'] if 'epoch' in h5f.attrs else -1
 92 | 
 93 |         if not load_state_dict:
 94 |             if 'learning_rates' in h5f.attrs:
 95 |                 lr = h5f.attrs['learning_rates']
 96 |             else:
 97 |                 lr = h5f.attrs.get('lr', -1)
 98 |                 lr = np.asarray([lr] if lr > 0 else [], dtype=np.float)
 99 | 
100 |             return epoch, lr
101 | 
102 |         state_file = fname + '.optimizer_state.pk'
103 |         if os.path.isfile(state_file):
104 |             with open(state_file, 'rb') as f:
105 |                 state_dicts = pickle.load(f)
106 |                 if not isinstance(state_dicts, list):
107 |                     state_dicts = [state_dicts]
108 |         else:
109 |             state_dicts = None
110 |         return epoch, state_dicts
111 | 
112 | 
113 | 


--------------------------------------------------------------------------------
/network/anchors.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | 
  6 | class Anchors(nn.Module):
  7 |     def __init__(self, pyramid_levels=None, strides=None, sizes=None, ratios=None, scales=None):
  8 |         super(Anchors, self).__init__()
  9 | 
 10 |         if pyramid_levels is None:
 11 |             self.pyramid_levels = [3, 4, 5, 6, 7]
 12 |         if strides is None:
 13 |             self.strides = [2 ** x for x in self.pyramid_levels]
 14 |         if sizes is None:
 15 |             self.sizes = [2 ** (x + 2) for x in self.pyramid_levels]
 16 |         if ratios is None:
 17 |             self.ratios = np.array([0.5, 1, 2])
 18 |         if scales is None:
 19 |             self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
 20 | 
 21 |     def forward(self, image):
 22 |         
 23 |         image_shape = image.shape[2:]
 24 |         image_shape = np.array(image_shape)
 25 |         image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels]
 26 | 
 27 |         # compute anchors over all pyramid levels
 28 |         all_anchors = np.zeros((0, 4)).astype(np.float32)
 29 | 
 30 |         for idx, p in enumerate(self.pyramid_levels):
 31 |             anchors         = generate_anchors(base_size=self.sizes[idx], ratios=self.ratios, scales=self.scales)
 32 |             shifted_anchors = shift(image_shapes[idx], self.strides[idx], anchors)
 33 |             all_anchors     = np.append(all_anchors, shifted_anchors, axis=0)
 34 | 
 35 |         all_anchors = np.expand_dims(all_anchors, axis=0)
 36 | 
 37 |         return torch.from_numpy(all_anchors.astype(np.float32)).cuda()
 38 | 
 39 | def generate_anchors(base_size=16, ratios=None, scales=None):
 40 |     """
 41 |     Generate anchor (reference) windows by enumerating aspect ratios X
 42 |     scales w.r.t. a reference window.
 43 |     """
 44 | 
 45 |     if ratios is None:
 46 |         ratios = np.array([0.5, 1, 2])
 47 | 
 48 |     if scales is None:
 49 |         scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
 50 | 
 51 |     num_anchors = len(ratios) * len(scales)
 52 | 
 53 |     # initialize output anchors
 54 |     anchors = np.zeros((num_anchors, 4))
 55 | 
 56 |     # scale base_size
 57 |     anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T
 58 | 
 59 |     # compute areas of anchors
 60 |     areas = anchors[:, 2] * anchors[:, 3]
 61 | 
 62 |     # correct for ratios
 63 |     anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales)))
 64 |     anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales))
 65 | 
 66 |     # transform from (x_ctr, y_ctr, w, h) -> (x1, y1, x2, y2)
 67 |     anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T
 68 |     anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T
 69 | 
 70 |     return anchors
 71 | 
 72 | def compute_shape(image_shape, pyramid_levels):
 73 |     """Compute shapes based on pyramid levels.
 74 | 
 75 |     :param image_shape:
 76 |     :param pyramid_levels:
 77 |     :return:
 78 |     """
 79 |     image_shape = np.array(image_shape[:2])
 80 |     image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels]
 81 |     return image_shapes
 82 | 
 83 | 
 84 | def anchors_for_shape(
 85 |     image_shape,
 86 |     pyramid_levels=None,
 87 |     ratios=None,
 88 |     scales=None,
 89 |     strides=None,
 90 |     sizes=None,
 91 |     shapes_callback=None,
 92 | ):
 93 | 
 94 |     image_shapes = compute_shape(image_shape, pyramid_levels)
 95 | 
 96 |     # compute anchors over all pyramid levels
 97 |     all_anchors = np.zeros((0, 4))
 98 |     for idx, p in enumerate(pyramid_levels):
 99 |         anchors         = generate_anchors(base_size=sizes[idx], ratios=ratios, scales=scales)
100 |         shifted_anchors = shift(image_shapes[idx], strides[idx], anchors)
101 |         all_anchors     = np.append(all_anchors, shifted_anchors, axis=0)
102 | 
103 |     return all_anchors
104 | 
105 | 
106 | def shift(shape, stride, anchors):
107 |     shift_x = (np.arange(0, shape[1]) + 0.5) * stride
108 |     shift_y = (np.arange(0, shape[0]) + 0.5) * stride
109 | 
110 |     shift_x, shift_y = np.meshgrid(shift_x, shift_y)
111 | 
112 |     shifts = np.vstack((
113 |         shift_x.ravel(), shift_y.ravel(),
114 |         shift_x.ravel(), shift_y.ravel()
115 |     )).transpose()
116 | 
117 |     # add A anchors (1, A, 4) to
118 |     # cell K shifts (K, 1, 4) to get
119 |     # shift anchors (K, A, 4)
120 |     # reshape to (K*A, 4) shifted anchors
121 |     A = anchors.shape[0]
122 |     K = shifts.shape[0]
123 |     all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
124 |     all_anchors = all_anchors.reshape((K * A, 4))
125 | 
126 |     return all_anchors
127 | 
128 | 


--------------------------------------------------------------------------------
/training/multipose_keypoint_train.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | root_path = os.path.realpath(__file__).split('/training/multipose_keypoint_train.py')[0]
  3 | os.chdir(root_path)
  4 | sys.path.append(root_path)
  5 | 
  6 | import torch
  7 | import torch.utils.model_zoo as model_zoo
  8 | from torch.optim.lr_scheduler import ReduceLROnPlateau
  9 | 
 10 | from training.batch_processor import batch_processor
 11 | from network.posenet import poseNet
 12 | from datasets.coco import get_loader
 13 | from training.trainer import Trainer
 14 | 
 15 | # Hyper-params
 16 | coco_root = '/data/COCO/'
 17 | backbone = 'resnet101'  # 'resnet50'
 18 | opt = 'adam'
 19 | weight_decay = 0.000
 20 | inp_size = 480  # input size 480*480
 21 | feat_stride = 4
 22 | 
 23 | model_urls = {
 24 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 25 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 26 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 27 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 28 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 29 | }
 30 | 
 31 | # model parameters in MultiPoseNet
 32 | fpn_resnet_para = ['conv1', 'bn1', 'layer1', 'layer2', 'layer3', 'layer4']
 33 | fpn_retinanet_para = ['conv6', 'conv7', 'latlayer1', 'latlayer2',
 34 |                       'latlayer3', 'toplayer0', 'toplayer1', 'toplayer2']
 35 | fpn_keypoint_para = ['toplayer', 'flatlayer1', 'flatlayer2',
 36 |                      'flatlayer3', 'smooth1', 'smooth2', 'smooth3']
 37 | retinanet_para = ['regressionModel', 'classificationModel']
 38 | keypoint_para = ['convt1', 'convt2', 'convt3', 'convt4', 'convs1', 'convs2', 'convs3', 'convs4', 'upsample1',
 39 |                  'upsample2', 'upsample3', 'conv2', 'convfin', 'convfin_k2', 'convfin_k3', 'convfin_k4', 'convfin_k5']
 40 | prn_para = ['prn']
 41 | 
 42 | #####################################################################
 43 | # train keypoint subnet
 44 | data_dir = coco_root+'images/'
 45 | mask_dir = coco_root
 46 | json_path = coco_root+'COCO.json'
 47 | 
 48 | # Set Training parameters
 49 | params = Trainer.TrainParams()
 50 | params.exp_name = 'res101_keypoint_subnet/'
 51 | params.subnet_name = 'keypoint_subnet'
 52 | params.save_dir = './extra/models/{}'.format(params.exp_name)
 53 | params.ckpt = None  # None checkpoint file to load
 54 | params.ignore_opt_state = False
 55 | 
 56 | params.max_epoch = 80
 57 | params.init_lr = 1.e-4
 58 | params.lr_decay = 0.1
 59 | 
 60 | params.gpus = [0]
 61 | params.batch_size = 6 * len(params.gpus)
 62 | params.val_nbatch_end_epoch = 2000
 63 | 
 64 | params.print_freq = 50
 65 | 
 66 | # model
 67 | if backbone == 'resnet101':
 68 |     model = poseNet(101)
 69 | elif backbone == 'resnet50':
 70 |     model = poseNet(50)
 71 | 
 72 | # load pretrained
 73 | if params.ckpt is None:
 74 |     model.fpn.load_state_dict(model_zoo.load_url(
 75 |         model_urls[backbone]), strict=False)
 76 | 
 77 | # Train Key-point Subnet, Fix the weights in detection subnet (RetinaNet)
 78 | for name, module in model.fpn.named_children():
 79 |     if name in fpn_retinanet_para:
 80 |         for para in module.parameters():
 81 |             para.requires_grad = False
 82 | for name, module in model.named_children():
 83 |     if name in retinanet_para:
 84 |         for para in module.parameters():
 85 |             para.requires_grad = False
 86 | for name, module in model.named_children():
 87 |     if name in prn_para:
 88 |         for para in module.parameters():
 89 |             para.requires_grad = False
 90 | 
 91 | print("Loading dataset...")
 92 | # load training data
 93 | train_data = get_loader(json_path, data_dir, mask_dir, inp_size, feat_stride,
 94 |                         preprocess='resnet', batch_size=params.batch_size, training=True,
 95 |                         shuffle=True, num_workers=8, subnet=params.subnet_name)
 96 | print('train dataset len: {}'.format(len(train_data.dataset)))
 97 | 
 98 | # load validation data
 99 | valid_data = None
100 | if params.val_nbatch > 0:
101 |     valid_data = get_loader(json_path, data_dir, mask_dir, inp_size, feat_stride,
102 |                             preprocess='resnet', batch_size=params.batch_size-3*len(params.gpus), training=False,
103 |                             shuffle=False, num_workers=8, subnet=params.subnet_name)
104 |     print('val dataset len: {}'.format(len(valid_data.dataset)))
105 | 
106 | trainable_vars = [param for param in model.parameters() if param.requires_grad]
107 | if opt == 'adam':
108 |     print("Training with adam")
109 |     params.optimizer = torch.optim.Adam(
110 |         trainable_vars, lr=params.init_lr, weight_decay=weight_decay)
111 | 
112 | params.lr_scheduler = ReduceLROnPlateau(
113 |     params.optimizer, 'min', factor=params.lr_decay, patience=3, verbose=True)
114 | trainer = Trainer(model, params, batch_processor, train_data, valid_data)
115 | trainer.train()
116 | 


--------------------------------------------------------------------------------
/multipose_environment.yaml:
--------------------------------------------------------------------------------
  1 | name: MultiPose
  2 | channels:
  3 |   - defaults
  4 | dependencies:
  5 |   - cuda90=1.0=h6433d27_0
  6 |   - pytorch=0.4.0=py36_cuda9.0.176_cudnn7.1.2_1
  7 |   - torchvision=0.2.1=py36_1
  8 |   - backcall=0.1.0=py36_0
  9 |   - blas=1.0=mkl
 10 |   - bleach=2.1.4=py36_0
 11 |   - bokeh=0.12.16=py36_0
 12 |   - bzip2=1.0.6=h14c3975_5
 13 |   - ca-certificates=2018.03.07=0
 14 |   - cairo=1.14.12=h8948797_3
 15 |   - certifi=2018.8.24=py36_1
 16 |   - cffi=1.11.5=py36h9745a5d_0
 17 |   - click=6.7=py36h5253387_0
 18 |   - cloudpickle=0.5.3=py36_0
 19 |   - cudatoolkit=9.0=h13b8566_0
 20 |   - cycler=0.10.0=py36h93f1223_0
 21 |   - cython=0.28.5=py36hf484d3e_0
 22 |   - cytoolz=0.9.0.1=py36h14c3975_0
 23 |   - dask=0.17.5=py36_0
 24 |   - dask-core=0.17.5=py36_0
 25 |   - dbus=1.13.2=h714fa37_1
 26 |   - decorator=4.3.0=py36_0
 27 |   - distributed=1.21.8=py36_0
 28 |   - entrypoints=0.2.3=py36_2
 29 |   - expat=2.2.5=he0dffb1_0
 30 |   - ffmpeg=4.0=hcdf2ecd_0
 31 |   - fontconfig=2.13.0=h9420a91_0
 32 |   - freeglut=3.0.0=hf484d3e_5
 33 |   - freetype=2.9.1=h8a8886c_0
 34 |   - glib=2.56.1=h000015b_0
 35 |   - gmp=6.1.2=h6c8ec71_1
 36 |   - graphite2=1.3.11=h16798f4_2
 37 |   - gst-plugins-base=1.14.0=hbbd80ab_1
 38 |   - gstreamer=1.14.0=hb453b48_1
 39 |   - h5py=2.8.0=py36h989c5e5_3
 40 |   - harfbuzz=1.8.4=hec2c2bc_0
 41 |   - hdf5=1.10.2=hba1933b_1
 42 |   - heapdict=1.0.0=py36_2
 43 |   - html5lib=1.0.1=py36_0
 44 |   - icu=58.2=h9c2bf20_1
 45 |   - imageio=2.3.0=py36_0
 46 |   - intel-openmp=2018.0.0=8
 47 |   - ipykernel=4.9.0=py36_0
 48 |   - ipython=6.5.0=py36_0
 49 |   - ipython_genutils=0.2.0=py36_0
 50 |   - ipywidgets=7.4.1=py36_0
 51 |   - jasper=2.0.14=h07fcdf6_1
 52 |   - jedi=0.12.1=py36_0
 53 |   - jinja2=2.10=py36ha16c418_0
 54 |   - jpeg=9b=h024ee3a_2
 55 |   - jsonschema=2.6.0=py36_0
 56 |   - jupyter=1.0.0=py36_6
 57 |   - jupyter_client=5.2.3=py36_0
 58 |   - jupyter_console=5.2.0=py36_1
 59 |   - jupyter_core=4.4.0=py36_0
 60 |   - kiwisolver=1.0.1=py36h764f252_0
 61 |   - libedit=3.1.20170329=h6b74fdf_2
 62 |   - libffi=3.2.1=hd88cf55_4
 63 |   - libgcc-ng=8.2.0=hdf63c60_1
 64 |   - libgfortran-ng=7.2.0=hdf63c60_3
 65 |   - libglu=9.0.0=hf484d3e_1
 66 |   - libopencv=3.4.2=h8fa1ad8_0
 67 |   - libopus=1.2.1=hb9ed12e_0
 68 |   - libpng=1.6.34=hb9fc6fc_0
 69 |   - libprotobuf=3.5.2=h6f1eeef_0
 70 |   - libsodium=1.0.16=h1bed415_0
 71 |   - libstdcxx-ng=7.2.0=hdf63c60_3
 72 |   - libtiff=4.0.9=he85c1e1_1
 73 |   - libuuid=1.0.3=h1bed415_2
 74 |   - libvpx=1.7.0=h439df22_0
 75 |   - libxcb=1.13=h1bed415_1
 76 |   - libxml2=2.9.8=h26e45fe_1
 77 |   - locket=0.2.0=py36h787c0ad_1
 78 |   - markupsafe=1.0=py36hd9260cd_1
 79 |   - matplotlib=2.2.3=py36hb69df0a_0
 80 |   - mistune=0.8.3=py36h14c3975_1
 81 |   - mkl=2018.0.2=1
 82 |   - mkl_fft=1.0.1=py36h3010b51_0
 83 |   - mkl_random=1.0.1=py36h629b387_0
 84 |   - msgpack-python=0.5.6=py36h6bb024c_0
 85 |   - nbconvert=5.3.1=py36_0
 86 |   - nbformat=4.4.0=py36_0
 87 |   - ncurses=6.1=hf484d3e_0
 88 |   - networkx=2.1=py36_0
 89 |   - ninja=1.8.2=py36h6bb024c_1
 90 |   - notebook=5.5.0=py36_0
 91 |   - numpy=1.14.3=py36hcd700cb_1
 92 |   - numpy-base=1.14.3=py36h9be14a7_1
 93 |   - olefile=0.45.1=py36_0
 94 |   - opencv=3.4.2=py36h6fd60c2_0
 95 |   - openssl=1.0.2p=h14c3975_0
 96 |   - packaging=17.1=py36_0
 97 |   - pandas=0.23.0=py36h637b7d7_0
 98 |   - pandoc=2.2.3.2=0
 99 |   - pandocfilters=1.4.2=py36_1
100 |   - parso=0.3.1=py36_0
101 |   - partd=0.3.8=py36h36fd896_0
102 |   - pcre=8.42=h439df22_0
103 |   - pexpect=4.6.0=py36_0
104 |   - pickleshare=0.7.4=py36_0
105 |   - pillow=5.2.0=py36heded4f4_0
106 |   - pip=10.0.1=py36_0
107 |   - pixman=0.34.0=hceecf20_3
108 |   - progress=1.4=py36_0
109 |   - prompt_toolkit=1.0.15=py36_0
110 |   - psutil=5.4.5=py36h14c3975_0
111 |   - ptyprocess=0.6.0=py36_0
112 |   - py-opencv=3.4.2=py36h8fa1ad8_0
113 |   - pycparser=2.18=py36hf9f622e_1
114 |   - pygments=2.2.0=py36_0
115 |   - pyparsing=2.2.0=py36hee85983_1
116 |   - pyqt=5.9.2=py36h751905a_0
117 |   - python=3.6.5=hc3d631a_2
118 |   - python-dateutil=2.7.3=py36_0
119 |   - pytz=2018.4=py36_0
120 |   - pywavelets=0.5.2=py36he602eb0_0
121 |   - pyyaml=3.12=py36hafb9ca4_1
122 |   - pyzmq=17.1.2=py36h14c3975_0
123 |   - qt=5.9.6=h52aff34_0
124 |   - qtconsole=4.4.1=py36_0
125 |   - readline=7.0=ha6073c6_4
126 |   - scikit-image=0.13.1=py36h14c3975_1
127 |   - scipy=1.1.0=py36hfc37229_0
128 |   - send2trash=1.5.0=py36_0
129 |   - setuptools=39.1.0=py36_0
130 |   - simplegeneric=0.8.1=py36_2
131 |   - sip=4.19.8=py36hf484d3e_0
132 |   - six=1.11.0=py36h372c433_1
133 |   - sortedcontainers=1.5.10=py36_0
134 |   - sqlite=3.23.1=he433501_0
135 |   - tblib=1.3.2=py36h34cf8b6_0
136 |   - terminado=0.8.1=py36_1
137 |   - testpath=0.3.1=py36_0
138 |   - tk=8.6.7=hc745277_3
139 |   - toolz=0.9.0=py36_0
140 |   - tornado=5.0.2=py36_0
141 |   - tqdm=4.23.3=py36_0
142 |   - traitlets=4.3.2=py36_0
143 |   - wcwidth=0.1.7=py36_0
144 |   - webencodings=0.5.1=py36_1
145 |   - wheel=0.31.1=py36_0
146 |   - widgetsnbextension=3.4.1=py36_0
147 |   - xz=5.2.4=h14c3975_4
148 |   - yaml=0.1.7=had09818_2
149 |   - zeromq=4.2.5=hf484d3e_1
150 |   - zict=0.1.3=py36h3a3bf81_0
151 |   - zlib=1.2.11=ha838bed_2
152 | prefix: [path/to/anaconda]/envs/MultiPose
153 | 
154 | 


--------------------------------------------------------------------------------
/datasets/coco_data/prn_data_pipeline.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | from skimage.filters import gaussian
  4 | from torch.utils.data import Dataset
  5 | from datasets.coco_data.heatmap import putGaussianMaps
  6 | 
  7 | params_transform = dict()
  8 | params_transform['sigma'] = 7.0
  9 | 
 10 | class PRN_CocoDataset(Dataset):
 11 |     def __init__(self, coco_train, num_of_keypoints, coeff, threshold, inp_size, feat_stride):
 12 |         self.coco_train = coco_train
 13 |         self.num_of_keypoints = num_of_keypoints
 14 |         self.anns = self.get_anns(self.coco_train)
 15 |         self.bbox_height = coeff * 28
 16 |         self.bbox_width = coeff * 18
 17 |         self.threshold = threshold
 18 | 
 19 |         params_transform['crop_size_x'] = inp_size/feat_stride
 20 |         params_transform['crop_size_y'] = inp_size/feat_stride
 21 |         params_transform['stride'] = 1
 22 | 
 23 |     def __len__(self):
 24 |         return len(self.anns)
 25 | 
 26 |     def __getitem__(self, item):
 27 |         ann_data = self.anns[item]
 28 | 
 29 |         input, label = self.get_data(ann_data, self.coco_train)
 30 | 
 31 |         return input, label
 32 | 
 33 |     def get_data(self, ann_data, coco):
 34 |         weights = np.zeros((self.bbox_height, self.bbox_width, 17))
 35 |         output = np.zeros((self.bbox_height, self.bbox_width, 17))
 36 | 
 37 |         bbox = ann_data['bbox']
 38 |         x = int(bbox[0])
 39 |         y = int(bbox[1])
 40 |         w = float(bbox[2])
 41 |         h = float(bbox[3])
 42 | 
 43 |         x_scale = float(self.bbox_width) / math.ceil(w)
 44 |         y_scale = float(self.bbox_height) / math.ceil(h)
 45 | 
 46 |         kpx = ann_data['keypoints'][0::3]
 47 |         kpy = ann_data['keypoints'][1::3]
 48 |         kpv = ann_data['keypoints'][2::3]
 49 | 
 50 | 
 51 |         for j in range(17):
 52 |             if kpv[j] > 0:
 53 |                 x0 = int((kpx[j] - x) * x_scale)
 54 |                 y0 = int((kpy[j] - y) * y_scale)
 55 | 
 56 |                 if x0 >= self.bbox_width and y0 >= self.bbox_height:
 57 |                     output[self.bbox_height - 1, self.bbox_width - 1, j] = 1
 58 |                 elif x0 >= self.bbox_width:
 59 |                     output[y0, self.bbox_width - 1, j] = 1
 60 |                 elif y0 >= self.bbox_height:
 61 |                     try:
 62 |                         output[self.bbox_height - 1, x0, j] = 1
 63 |                     except:
 64 |                         output[self.bbox_height - 1, 0, j] = 1
 65 |                 elif x0 < 0 and y0 < 0:
 66 |                     output[0, 0, j] = 1
 67 |                 elif x0 < 0:
 68 |                     output[y0, 0, j] = 1
 69 |                 elif y0 < 0:
 70 |                     output[0, x0, j] = 1
 71 |                 else:
 72 |                     output[y0, x0, j] = 1
 73 | 
 74 |         img_id = ann_data['image_id']
 75 |         img_data = coco.loadImgs(img_id)[0]
 76 |         ann_data = coco.loadAnns(coco.getAnnIds(img_data['id']))
 77 | 
 78 |         for ann in ann_data:
 79 |             kpx = ann['keypoints'][0::3]
 80 |             kpy = ann['keypoints'][1::3]
 81 |             kpv = ann['keypoints'][2::3]
 82 | 
 83 |             for j in range(17):
 84 |                 if kpv[j] > 0:
 85 |                     if (kpx[j] > bbox[0] - bbox[2] * self.threshold and kpx[j] < bbox[0] + bbox[2] * (1 + self.threshold)):
 86 |                         if (kpy[j] > bbox[1] - bbox[3] * self.threshold and kpy[j] < bbox[1] + bbox[3] * (1 + self.threshold)):
 87 |                             x0 = int((kpx[j] - x) * x_scale)
 88 |                             y0 = int((kpy[j] - y) * y_scale)
 89 | 
 90 |                             if x0 >= self.bbox_width and y0 >= self.bbox_height:
 91 |                                 weights[self.bbox_height - 1, self.bbox_width - 1, j] = 1
 92 |                             elif x0 >= self.bbox_width:
 93 |                                 weights[y0, self.bbox_width - 1, j] = 1
 94 |                             elif y0 >= self.bbox_height:
 95 |                                 weights[self.bbox_height - 1, x0, j] = 1
 96 |                             elif x0 < 0 and y0 < 0:
 97 |                                 weights[0, 0, j] = 1
 98 |                             elif x0 < 0:
 99 |                                 weights[y0, 0, j] = 1
100 |                             elif y0 < 0:
101 |                                 weights[0, x0, j] = 1
102 |                             else:
103 |                                 weights[y0, x0, j] = 1
104 | 
105 |         for t in range(17):
106 |             weights[:, :, t] = gaussian(weights[:, :, t])
107 |         output = gaussian(output, sigma=2, mode='constant', multichannel=True)
108 |         our_order = [0, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3]
109 |         weights = weights[:, :, our_order]
110 |         output = output[:, :, our_order]
111 |         return weights, output
112 | 
113 |     def get_anns(self, coco):
114 |         #:param coco: COCO instance
115 |         #:return: anns: List of annotations that contain person with at least 6 keypoints
116 |         ann_ids = coco.getAnnIds()
117 |         anns = []
118 |         for i in ann_ids:
119 |             ann = coco.loadAnns(i)[0]
120 |             if ann['iscrowd'] == 0 and ann['num_keypoints'] > self.num_of_keypoints:
121 |                 anns.append(ann)  # ann
122 |         sorted_list = sorted(anns, key=lambda k: k['num_keypoints'], reverse=True)
123 |         return sorted_list
124 | 


--------------------------------------------------------------------------------
/network/fpn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | '''RetinaFPN in PyTorch.'''
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | from torch.autograd import Variable
  8 | 
  9 | class Bottleneck(nn.Module):
 10 |     expansion = 4
 11 | 
 12 |     def __init__(self, in_planes, planes, stride=1):
 13 |         super(Bottleneck, self).__init__()
 14 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 15 |         self.bn1 = nn.BatchNorm2d(planes)
 16 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 17 |         self.bn2 = nn.BatchNorm2d(planes)
 18 |         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
 19 |         self.bn3 = nn.BatchNorm2d(self.expansion*planes)
 20 | 
 21 |         self.downsample = nn.Sequential()
 22 |         if stride != 1 or in_planes != self.expansion*planes:
 23 |             self.downsample = nn.Sequential(
 24 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
 25 |                 nn.BatchNorm2d(self.expansion*planes)
 26 |             )
 27 | 
 28 |     def forward(self, x):
 29 |         out = F.relu(self.bn1(self.conv1(x)))
 30 |         out = F.relu(self.bn2(self.conv2(out)))
 31 |         out = self.bn3(self.conv3(out))
 32 |         out += self.downsample(x)
 33 |         out = F.relu(out)
 34 |         return out
 35 | 
 36 | 
 37 | class FPN(nn.Module):
 38 |     def __init__(self, block, num_blocks):
 39 |         super(FPN, self).__init__()
 40 |         self.in_planes = 64
 41 | 
 42 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
 43 |         self.bn1 = nn.BatchNorm2d(64)
 44 | 
 45 |         # Bottom-up layers
 46 |         self.layer1 = self._make_layer(block,  64, num_blocks[0], stride=1)
 47 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 48 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 49 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 50 | 
 51 |         # fpn for detection subnet (RetinaNet) P6,P7
 52 |         self.conv6 = nn.Conv2d(2048, 256, kernel_size=3, stride=2, padding=1)  # p6
 53 |         self.conv7 = nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1)  # p7
 54 | 
 55 |         # pure fpn layers for detection subnet (RetinaNet)
 56 |         # Lateral layers
 57 |         self.latlayer1 = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0)  # c5 -> p5
 58 |         self.latlayer2 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)  # c4 -> p4
 59 |         self.latlayer3 = nn.Conv2d(512, 256, kernel_size=1, stride=1, padding=0)  # c3 -> p3
 60 |         # smooth
 61 |         self.toplayer0 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)  # smooth p5
 62 |         self.toplayer1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)  # smooth p4
 63 |         self.toplayer2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)  # smooth p3
 64 | 
 65 |         # pure fpn layers for keypoint subnet
 66 |         # Lateral layers
 67 |         self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0)  # c5 -> p5
 68 |         self.flatlayer1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)  # c4 -> p4
 69 |         self.flatlayer2 = nn.Conv2d(512, 256, kernel_size=1, stride=1, padding=0)  # c3 -> p3
 70 |         self.flatlayer3 = nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0)  # c2 -> p2
 71 |         # smooth
 72 |         self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)  # smooth p4
 73 |         self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)  # smooth p3
 74 |         self.smooth3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)  # smooth p2
 75 | 
 76 |     def _make_layer(self, block, planes, num_blocks, stride):
 77 |         strides = [stride] + [1]*(num_blocks-1)
 78 |         layers = []
 79 |         for stride in strides:
 80 |             layers.append(block(self.in_planes, planes, stride))
 81 |             self.in_planes = planes * block.expansion
 82 |         return nn.Sequential(*layers)
 83 | 
 84 |     def _upsample_add(self, x, y):
 85 |         '''Upsample and add two feature maps.
 86 | 
 87 |         Args:
 88 |           x: top feature map to be upsampled.
 89 |           y: lateral feature map.
 90 | 
 91 |         Returns:
 92 |           added feature map.
 93 |         '''
 94 |         _,_,H,W = y.size()
 95 |         return F.upsample(x, size=(H,W), mode='nearest', align_corners=None) + y  # bilinear, False
 96 | 
 97 |     def forward(self, x):
 98 |         # Bottom-up
 99 |         c1 = F.relu(self.bn1(self.conv1(x)))
100 |         c1 = F.max_pool2d(c1, kernel_size=3, stride=2, padding=1)
101 |         c2 = self.layer1(c1)
102 |         c3 = self.layer2(c2)
103 |         c4 = self.layer3(c3)
104 |         c5 = self.layer4(c4)
105 | 
106 |         # pure fpn for detection subnet, RetinaNet
107 |         p6 = self.conv6(c5)
108 |         p7 = self.conv7(F.relu(p6))
109 |         p5 = self.latlayer1(c5)
110 |         p4 = self._upsample_add(p5, self.latlayer2(c4))
111 |         p3 = self._upsample_add(p4, self.latlayer3(c3))
112 |         p5 = self.toplayer0(p5)
113 |         p4 = self.toplayer1(p4)
114 |         p3 = self.toplayer2(p3)
115 | 
116 |         # pure fpn for keypoints estimation
117 |         fp5 = self.toplayer(c5)
118 |         fp4 = self._upsample_add(fp5,self.flatlayer1(c4))
119 |         fp3 = self._upsample_add(fp4,self.flatlayer2(c3))
120 |         fp2 = self._upsample_add(fp3,self.flatlayer3(c2))
121 |         # Smooth
122 |         fp4 = self.smooth1(fp4)
123 |         fp3 = self.smooth2(fp3)
124 |         fp2 = self.smooth3(fp2)
125 | 
126 |         return [[fp2,fp3,fp4,fp5],[p3, p4, p5, p6, p7]]
127 | 
128 | def FPN50():
129 |     # [3,4,6,3] -> resnet50
130 |     return FPN(Bottleneck, [3,4,6,3])
131 | 
132 | def FPN101():
133 |     # [3,4,23,3] -> resnet101
134 |     return FPN(Bottleneck, [3,4,23,3])
135 | 


--------------------------------------------------------------------------------
/network/losses.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | def calc_iou(a, b):
  6 |     area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
  7 | 
  8 |     iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0])
  9 |     ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1])
 10 | 
 11 |     iw = torch.clamp(iw, min=0)
 12 |     ih = torch.clamp(ih, min=0)
 13 | 
 14 |     ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area - iw * ih
 15 | 
 16 |     ua = torch.clamp(ua, min=1e-8)
 17 | 
 18 |     intersection = iw * ih
 19 | 
 20 |     IoU = intersection / ua
 21 | 
 22 |     return IoU
 23 | 
 24 | class FocalLoss(nn.Module):
 25 |     #def __init__(self):
 26 | 
 27 |     def forward(self, classifications, regressions, anchors, annotations):
 28 |         alpha = 0.25
 29 |         gamma = 2.0
 30 |         batch_size = classifications.shape[0]
 31 |         classification_losses = []
 32 |         regression_losses = []
 33 | 
 34 |         anchor = anchors[0, :, :]
 35 | 
 36 |         anchor_widths  = anchor[:, 2] - anchor[:, 0]
 37 |         anchor_heights = anchor[:, 3] - anchor[:, 1]
 38 |         anchor_ctr_x   = anchor[:, 0] + 0.5 * anchor_widths
 39 |         anchor_ctr_y   = anchor[:, 1] + 0.5 * anchor_heights
 40 | 
 41 |         for j in range(batch_size):
 42 | 
 43 |             classification = classifications[j, :, :]
 44 |             regression = regressions[j, :, :]
 45 | 
 46 |             bbox_annotation = annotations[j, :, :]
 47 |             bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]
 48 | 
 49 |             if bbox_annotation.shape[0] == 0:
 50 |                 regression_losses.append(torch.tensor(0, requires_grad=True).float().cuda())
 51 |                 classification_losses.append(torch.tensor(0, requires_grad=True).float().cuda())
 52 | 
 53 |                 continue
 54 | 
 55 |             classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
 56 | 
 57 |             IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations
 58 | 
 59 |             IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1
 60 | 
 61 |             #import pdb
 62 |             #pdb.set_trace()
 63 | 
 64 |             # compute the loss for classification
 65 |             targets = torch.ones(classification.shape) * -1
 66 |             targets = targets.cuda()
 67 | 
 68 |             targets[torch.lt(IoU_max, 0.4), :] = 0
 69 | 
 70 |             positive_indices = torch.ge(IoU_max, 0.5)
 71 | 
 72 |             num_positive_anchors = positive_indices.sum()
 73 | 
 74 |             assigned_annotations = bbox_annotation[IoU_argmax, :]
 75 | 
 76 |             targets[positive_indices, :] = 0
 77 |             targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1
 78 | 
 79 |             alpha_factor = torch.ones(targets.shape).cuda() * alpha
 80 | 
 81 |             alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
 82 |             focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
 83 |             focal_weight = alpha_factor * torch.pow(focal_weight, gamma)
 84 | 
 85 |             bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))
 86 | 
 87 |             # cls_loss = focal_weight * torch.pow(bce, gamma)
 88 |             cls_loss = focal_weight * bce
 89 | 
 90 |             cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda())
 91 | 
 92 |             classification_losses.append(cls_loss.sum()/torch.clamp(num_positive_anchors.float(), min=1.0))
 93 | 
 94 |             # compute the loss for regression
 95 | 
 96 |             if positive_indices.sum() > 0:
 97 |                 assigned_annotations = assigned_annotations[positive_indices, :]
 98 | 
 99 |                 anchor_widths_pi = anchor_widths[positive_indices]
100 |                 anchor_heights_pi = anchor_heights[positive_indices]
101 |                 anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
102 |                 anchor_ctr_y_pi = anchor_ctr_y[positive_indices]
103 | 
104 |                 gt_widths  = assigned_annotations[:, 2] - assigned_annotations[:, 0]
105 |                 gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]
106 |                 gt_ctr_x   = assigned_annotations[:, 0] + 0.5 * gt_widths
107 |                 gt_ctr_y   = assigned_annotations[:, 1] + 0.5 * gt_heights
108 | 
109 |                 # clip widths to 1
110 |                 gt_widths  = torch.clamp(gt_widths, min=1)
111 |                 gt_heights = torch.clamp(gt_heights, min=1)
112 | 
113 |                 targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
114 |                 targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
115 |                 targets_dw = torch.log(gt_widths / anchor_widths_pi)
116 |                 targets_dh = torch.log(gt_heights / anchor_heights_pi)
117 | 
118 |                 targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh))
119 |                 targets = targets.t()
120 | 
121 |                 targets = targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda()
122 | 
123 | 
124 |                 negative_indices = 1 - positive_indices
125 | 
126 |                 regression_diff = torch.abs(targets - regression[positive_indices, :])
127 | 
128 |                 regression_loss = torch.where(
129 |                     torch.le(regression_diff, 1.0 / 9.0),
130 |                     0.5 * 9.0 * torch.pow(regression_diff, 2),
131 |                     regression_diff - 0.5 / 9.0
132 |                 )
133 |                 regression_losses.append(regression_loss.mean())
134 |             else:
135 |                 regression_losses.append(torch.tensor(0).float().cuda())
136 | 
137 |         return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True)
138 | 
139 |     
140 | 


--------------------------------------------------------------------------------
/lib/core/config.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import yaml
  7 | 
  8 | import numpy as np
  9 | from easydict import EasyDict as edict
 10 | 
 11 | 
 12 | config = edict()
 13 | 
 14 | config.OUTPUT_DIR = ''
 15 | config.LOG_DIR = ''
 16 | config.DATA_DIR = ''
 17 | config.GPUS = '0'
 18 | config.WORKERS = 4
 19 | config.PRINT_FREQ = 20
 20 | 
 21 | # Cudnn related params
 22 | config.CUDNN = edict()
 23 | config.CUDNN.BENCHMARK = True
 24 | config.CUDNN.DETERMINISTIC = False
 25 | config.CUDNN.ENABLED = True
 26 | 
 27 | # pose_resnet related params
 28 | POSE_RESNET = edict()
 29 | POSE_RESNET.NUM_LAYERS = 50
 30 | POSE_RESNET.DECONV_WITH_BIAS = False
 31 | POSE_RESNET.NUM_DECONV_LAYERS = 3
 32 | POSE_RESNET.NUM_DECONV_FILTERS = [256, 256, 256]
 33 | POSE_RESNET.NUM_DECONV_KERNELS = [4, 4, 4]
 34 | POSE_RESNET.FINAL_CONV_KERNEL = 1
 35 | POSE_RESNET.TARGET_TYPE = 'gaussian'
 36 | POSE_RESNET.HEATMAP_SIZE = [64, 64]  # width * height, ex: 24 * 32
 37 | POSE_RESNET.SIGMA = 2
 38 | 
 39 | MODEL_EXTRAS = {
 40 |     'pose_resnet': POSE_RESNET,
 41 | }
 42 | 
 43 | # common params for NETWORK
 44 | config.MODEL = edict()
 45 | config.MODEL.NAME = 'pose_resnet'
 46 | config.MODEL.INIT_WEIGHTS = True
 47 | config.MODEL.PRETRAINED = ''
 48 | config.MODEL.NUM_JOINTS = 16
 49 | config.MODEL.IMAGE_SIZE = [256, 256]  # width * height, ex: 192 * 256
 50 | config.MODEL.EXTRA = MODEL_EXTRAS[config.MODEL.NAME]
 51 | 
 52 | config.LOSS = edict()
 53 | config.LOSS.USE_TARGET_WEIGHT = True
 54 | 
 55 | # DATASET related params
 56 | config.DATASET = edict()
 57 | config.DATASET.ROOT = ''
 58 | config.DATASET.DATASET = 'mpii'
 59 | config.DATASET.TRAIN_SET = 'train'
 60 | config.DATASET.TEST_SET = 'valid'
 61 | config.DATASET.DATA_FORMAT = 'jpg'
 62 | config.DATASET.HYBRID_JOINTS_TYPE = ''
 63 | config.DATASET.SELECT_DATA = False
 64 | 
 65 | # training data augmentation
 66 | config.DATASET.FLIP = True
 67 | config.DATASET.SCALE_FACTOR = 0.25
 68 | config.DATASET.ROT_FACTOR = 30
 69 | 
 70 | # train
 71 | config.TRAIN = edict()
 72 | 
 73 | config.TRAIN.LR_FACTOR = 0.1
 74 | config.TRAIN.LR_STEP = [90, 110]
 75 | config.TRAIN.LR = 0.001
 76 | 
 77 | config.TRAIN.OPTIMIZER = 'adam'
 78 | config.TRAIN.MOMENTUM = 0.9
 79 | config.TRAIN.WD = 0.0001
 80 | config.TRAIN.NESTEROV = False
 81 | config.TRAIN.GAMMA1 = 0.99
 82 | config.TRAIN.GAMMA2 = 0.0
 83 | 
 84 | config.TRAIN.BEGIN_EPOCH = 0
 85 | config.TRAIN.END_EPOCH = 140
 86 | 
 87 | config.TRAIN.RESUME = False
 88 | config.TRAIN.CHECKPOINT = ''
 89 | 
 90 | config.TRAIN.BATCH_SIZE = 32
 91 | config.TRAIN.SHUFFLE = True
 92 | 
 93 | # testing
 94 | config.TEST = edict()
 95 | 
 96 | # size of images for each device
 97 | config.TEST.BATCH_SIZE = 32
 98 | # Test Model Epoch
 99 | config.TEST.FLIP_TEST = False
100 | config.TEST.POST_PROCESS = True
101 | config.TEST.SHIFT_HEATMAP = True
102 | 
103 | config.TEST.USE_GT_BBOX = False
104 | # nms
105 | config.TEST.OKS_THRE = 0.5
106 | config.TEST.IN_VIS_THRE = 0.0
107 | config.TEST.COCO_BBOX_FILE = ''
108 | config.TEST.BBOX_THRE = 1.0
109 | config.TEST.MODEL_FILE = ''
110 | config.TEST.IMAGE_THRE = 0.0
111 | config.TEST.NMS_THRE = 1.0
112 | 
113 | # debug
114 | config.DEBUG = edict()
115 | config.DEBUG.DEBUG = False
116 | config.DEBUG.SAVE_BATCH_IMAGES_GT = False
117 | config.DEBUG.SAVE_BATCH_IMAGES_PRED = False
118 | config.DEBUG.SAVE_HEATMAPS_GT = False
119 | config.DEBUG.SAVE_HEATMAPS_PRED = False
120 | 
121 | 
122 | def _update_dict(k, v):
123 |     if k == 'DATASET':
124 |         if 'MEAN' in v and v['MEAN']:
125 |             v['MEAN'] = np.array([eval(x) if isinstance(x, str) else x
126 |                                   for x in v['MEAN']])
127 |         if 'STD' in v and v['STD']:
128 |             v['STD'] = np.array([eval(x) if isinstance(x, str) else x
129 |                                  for x in v['STD']])
130 |     if k == 'MODEL':
131 |         if 'EXTRA' in v and 'HEATMAP_SIZE' in v['EXTRA']:
132 |             if isinstance(v['EXTRA']['HEATMAP_SIZE'], int):
133 |                 v['EXTRA']['HEATMAP_SIZE'] = np.array(
134 |                     [v['EXTRA']['HEATMAP_SIZE'], v['EXTRA']['HEATMAP_SIZE']])
135 |             else:
136 |                 v['EXTRA']['HEATMAP_SIZE'] = np.array(
137 |                     v['EXTRA']['HEATMAP_SIZE'])
138 |         if 'IMAGE_SIZE' in v:
139 |             if isinstance(v['IMAGE_SIZE'], int):
140 |                 v['IMAGE_SIZE'] = np.array([v['IMAGE_SIZE'], v['IMAGE_SIZE']])
141 |             else:
142 |                 v['IMAGE_SIZE'] = np.array(v['IMAGE_SIZE'])
143 |     for vk, vv in v.items():
144 |         if vk in config[k]:
145 |             config[k][vk] = vv
146 |         else:
147 |             raise ValueError("{}.{} not exist in config.py".format(k, vk))
148 | 
149 | 
150 | def update_config(config_file):
151 |     exp_config = None
152 |     with open(config_file) as f:
153 |         exp_config = edict(yaml.load(f))
154 |         for k, v in exp_config.items():
155 |             if k in config:
156 |                 if isinstance(v, dict):
157 |                     _update_dict(k, v)
158 |                 else:
159 |                     if k == 'SCALES':
160 |                         config[k][0] = (tuple(v))
161 |                     else:
162 |                         config[k] = v
163 |             else:
164 |                 raise ValueError("{} not exist in config.py".format(k))
165 | 
166 | 
167 | def gen_config(config_file):
168 |     cfg = dict(config)
169 |     for k, v in cfg.items():
170 |         if isinstance(v, edict):
171 |             cfg[k] = dict(v)
172 | 
173 |     with open(config_file, 'w') as f:
174 |         yaml.dump(dict(cfg), f, default_flow_style=False)
175 | 
176 | 
177 | def update_dir(model_dir, log_dir, data_dir):
178 |     if model_dir:
179 |         config.OUTPUT_DIR = model_dir
180 | 
181 |     if log_dir:
182 |         config.LOG_DIR = log_dir
183 | 
184 |     if data_dir:
185 |         config.DATA_DIR = data_dir
186 | 
187 |     config.DATASET.ROOT = os.path.join(
188 |             config.DATA_DIR, config.DATASET.ROOT)
189 | 
190 |     config.TEST.COCO_BBOX_FILE = os.path.join(
191 |             config.DATA_DIR, config.TEST.COCO_BBOX_FILE)
192 | 
193 |     config.MODEL.PRETRAINED = os.path.join(
194 |             config.DATA_DIR, config.MODEL.PRETRAINED)
195 | 
196 | 
197 | def get_model_name(cfg):
198 |     name = cfg.MODEL.NAME
199 |     full_name = cfg.MODEL.NAME
200 |     extra = cfg.MODEL.EXTRA
201 |     if name in ['pose_resnet']:
202 |         name = '{model}_{num_layers}'.format(
203 |             model=name,
204 |             num_layers=extra.NUM_LAYERS)
205 |         deconv_suffix = ''.join(
206 |             'd{}'.format(num_filters)
207 |             for num_filters in extra.NUM_DECONV_FILTERS)
208 |         full_name = '{height}x{width}_{name}_{deconv_suffix}'.format(
209 |             height=cfg.MODEL.IMAGE_SIZE[1],
210 |             width=cfg.MODEL.IMAGE_SIZE[0],
211 |             name=name,
212 |             deconv_suffix=deconv_suffix)
213 |     else:
214 |         raise ValueError('Unkown model: {}'.format(cfg.MODEL))
215 | 
216 |     return name, full_name
217 | 
218 | 
219 | if __name__ == '__main__':
220 |     import sys
221 |     name, full_name = gen_config(sys.argv[1])
222 |     print(name)
223 |     print(full_name)
224 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ### Introduction
  2 | 
  3 | This is a pytorch implementation of [MultiPoseNet](https://arxiv.org/abs/1807.04067) ( ECCV 2018, Muhammed Kocabas et al.)
  4 | 
  5 | ![baseline checkpoint result](./demo/output/pic1_canvas.png)
  6 | 
  7 | [![License](https://img.shields.io/github/license/mashape/apistatus.svg)](https://opensource.org/licenses/MIT) 
  8 | 
  9 | ### Contents
 10 | 
 11 | 0. [**Update**](#update)
 12 | 
 13 | 1. [Requirements](#requirements)
 14 | 2. [Training](#training)
 15 | 3. [Validation](#validation)
 16 | 4. [Demo](#demo)
 17 | 5. [Result](#result)
 18 | 6. [Acknowledgements](#acknowledgements)
 19 | 7. [Citation](#citation)
 20 | 
 21 | ### Demo
 22 | 
 23 | Run inference on your own pictures.
 24 | 
 25 | - Prepare checkpoint:
 26 |   - Download our baseline model ([Google Drive](https://drive.google.com/open?id=1Y38q5mIY2XL7mmdaBrF06beYcZZO6v2Z),  [Tsinghua Cloud](https://cloud.tsinghua.edu.cn/f/8b7f780fe1df46febe73/), backbone: resnet101) or use your own model.
 27 |   - Specify the checkpoints file path `params.ckpt` in file `multipose_test.py`. 
 28 |   - Specify the pictures file path `testdata_dir`  and results file path `testresult_dir` in file `multipose_test.py`. 
 29 | 
 30 | - Run:
 31 | ```python
 32 | python ./evaluate/multipose_test.py  # inference on your own pictures
 33 | python ./evaluate/multipose_coco_eval.py  # COCO evaluation
 34 | ```
 35 | 
 36 | ### Result
 37 | 
 38 | - mAP (baseline checkpoint, temporarily)
 39 | 
 40 | ```
 41 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.590
 42 |  Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets= 20 ] = 0.791
 43 |  Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets= 20 ] = 0.644
 44 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.565
 45 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.636
 46 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.644
 47 |  Average Recall     (AR) @[ IoU=0.50      | area=   all | maxDets= 20 ] = 0.810
 48 |  Average Recall     (AR) @[ IoU=0.75      | area=   all | maxDets= 20 ] = 0.689
 49 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.601
 50 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.709
 51 | ```
 52 | 
 53 | ### Requirements
 54 | 
 55 | #### Prerequisites
 56 | - **Disable cudnn for batch_norm**: (See: [@Microsoft / human-pose-estimation.pytorch#installation](https://github.com/Microsoft/human-pose-estimation.pytorch#installation))
 57 | 
 58 | ```bash
 59 | # PYTORCH=/path/to/pytorch
 60 | # for pytorch v0.4.0
 61 | sed -i "1194s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py
 62 | # for pytorch v0.4.1
 63 | sed -i "1254s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py
 64 | 
 65 | # Note that instructions like # PYTORCH=/path/to/pytorch indicate that you should pick 
 66 | # a path where you'd like to have pytorch installed and then set an environment
 67 | # variable (PYTORCH in this case) accordingly.
 68 | ```
 69 | 
 70 | - If you are using Anaconda, we suggest you create a new conda environment :`conda env create -f multipose_environment.yaml`. Maybe you should change the `channels:` and `prefix:` setting in `multipose_environment.yaml` to fit your own Anaconda environment.
 71 |   - `source activate Multipose`
 72 |   - `pip install pycocotools`
 73 | 
 74 | - You can also follow `dependencies` setting in `multipose_environment.yaml` to build your own Python environment.
 75 |   - Pytorch = 0.4.0, Python = 3.6
 76 |   - pycocotools=2.0.0, numpy=1.14.3, scikit-image=0.13.1, opencv=3.4.2
 77 |   - ......
 78 | 
 79 | - Build the NMS extension
 80 | ```bash
 81 |   cd ./lib
 82 |   bash build.sh
 83 |   cd ..
 84 | ```
 85 | 
 86 | #### Data preparation
 87 | 
 88 | **You can skip this step if you just want to run inference on your own pictures using our baseline checkpoint**
 89 | 
 90 | - For Training Keypoint Estimation Subnet, we followed [ZheC/Realtime_Multi-Person_Pose_Estimation](https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation)'s first 4 Training Steps prepared our COCO2014 dataset (train2014, val2014 and mask2014). 
 91 | - We also use COCO2017 dataset to train Person Detection Subnet.
 92 | 
 93 | Make them look like this:
 94 | 
 95 | ```bash
 96 | ${COCO_ROOT}
 97 |    --annotations
 98 |       --instances_train2017.json
 99 |       --instances_val2017.json
100 |       --person_keypoints_train2017.json
101 |       --person_keypoints_val2017.json
102 |    --images
103 |       --train2014
104 |       --val2014
105 |       --train2017
106 |       --val2017
107 |    --mask2014
108 |    --COCO.json
109 | ```
110 | 
111 | ### Training
112 | 
113 | - Prepare
114 |   - Change the hyper-parameter `coco_root` to your own COCO path.
115 |   - You can change the parameter `params.gpus` to define which GPU device you want to use, such as `params.gpus = [0,1,2,3]`. 
116 |   - The trained model will be saved in  `params.save_dir`  folder every epoch.
117 | - Run:
118 | ```python
119 | python ./training/multipose_keypoint_train.py  # train keypoint subnet
120 | python ./training/multipose_detection_train.py  # train detection subnet
121 | python ./training/multipose_prn_train.py  # train PRN subnet
122 | ```
123 | 
124 | ### Validation
125 | 
126 | - Prepare checkpoint:
127 |   - Download our baseline model ([Google Drive](https://drive.google.com/open?id=1Y38q5mIY2XL7mmdaBrF06beYcZZO6v2Z),  [Tsinghua Cloud](https://cloud.tsinghua.edu.cn/f/8b7f780fe1df46febe73/), backbone: resnet101) or use your own model.
128 |   - Specify the checkpoints file path `params.ckpt` in file `multipose_*_val.py`. 
129 | 
130 | - Run:
131 | ```python
132 | python ./evaluate/multipose_keypoint_val.py  # validate keypoint subnet on first 2644 of val2014 marked by 'isValidation = 1', as our minval dataset.
133 | python ./evaluate/multipose_detection_val.py  # validate detection subnet on val2017
134 | python ./evaluate/multipose_prn_val.py  # validate PRN subnet on val2017
135 | ```
136 | 
137 | ### To Do
138 | 
139 | - [x] Keypoint Estimation Subnet for 17 human keypoints annotated in [COCO dataset](http://cocodataset.org/)
140 | - [x] Keypoint Estimation Subnet with intermediate supervision
141 | - [x] Combine Keypoint Estimation Subnet with Person Detection Subnet(RetinaNet)
142 | - [x] Combine Keypoint Estimation Subnet with [Pose Residual Network](https://github.com/salihkaragoz/pose-residual-network-pytorch/tree/master)
143 | - [ ] Keypoint Estimation Subnet with person segmentation mask
144 | 
145 | ### Update
146 | 
147 | - 180925:
148 |   - Add Person Detection Subnet (RetinaNet) in `posenet.py`.
149 |   - Add NMS extension in `./lib`.
150 | 
151 | - 180930:
152 |   - Add the training code `multipose_detection_train.py` for RetinaNet.  
153 |   - Add `multipose_keypoint_*.py` and `multipose_detection_*.py` for Keypoint Estimation Subnet and Person Detection Subnet respectively. Remove `multipose_resnet_*.py`.
154 | 
155 | - 1801003:
156 |   - Add the training code `multipose_prn_train.py` for PRN.  
157 |   - Add `multipose_coco_eval.py` for COCO evaluation.
158 | 
159 | - 181115:
160 |   - New dataloader for detection subnet, remove `RetinaNet_data_pipeline.py`
161 |   - Add intermediate supervision in Keypoint Estimation Subnet
162 |   - Enable batch_norm for Keypoint Estimation Subnet. 
163 |   - New prerequisites: [Disable cudnn for batch_norm](https://github.com/LiMeng95/MultiPoseNet.pytorch#prerequisites)
164 |   - New checkpoint ([Google Drive](https://drive.google.com/open?id=1Y38q5mIY2XL7mmdaBrF06beYcZZO6v2Z),  [Tsinghua Cloud](https://cloud.tsinghua.edu.cn/f/8b7f780fe1df46febe73/), backbone: resnet101)
165 | 
166 | ### Acknowledgements
167 | 
168 | - [@ZheC Realtime_Multi-Person_Pose_Estimation](https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation) : The first 4 Training Steps to generate our own COCO dataset.
169 | - Thanks [@IcewineChen](https://github.com/IcewineChen/pytorch-MultiPoseNet) for the implement of `posenet`.
170 | - Thanks [@yhenon](https://github.com/yhenon/pytorch-retinanet) for the implement of RetinaNet in PyTorch.
171 | - [@Microsoft / human-pose-estimation.pytorch#installation](https://github.com/Microsoft/human-pose-estimation.pytorch#installation) :Disable cudnn for batch_norm
172 | 


--------------------------------------------------------------------------------
/network/joint_utils.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import math
  3 | import numpy as np
  4 | from scipy.ndimage.filters import gaussian_filter, maximum_filter
  5 | from scipy.ndimage.morphology import generate_binary_structure
  6 | 
  7 | # Color code used to plot different joints and limbs (eg: joint_type=3 and
  8 | # limb_type=3 will use colors[3])
  9 | colors = [
 10 |     [255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0],
 11 |     [85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255],
 12 |     [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255],
 13 |     [255, 0, 255], [255, 0, 170], [255, 0, 85], [255, 0, 0]]
 14 | limbSeq = [[0,1], [1,2], [2,3], [0,4], [4,5], [5,6], [0,7], [7,8], [8,9], [0,10], [10,11], [11,12], \
 15 |            [0,13], [13,15], [0,14],[14,16]]
 16 | NUM_JOINTS = 18
 17 | 
 18 | 
 19 | def find_peaks(param, img):
 20 |     """
 21 |     Given a (grayscale) image, find local maxima whose value is above a given
 22 |     threshold (param['thre1'])
 23 |     :param img: Input image (2d array) where we want to find peaks
 24 |     :return: 2d np.array containing the [x,y] coordinates of each peak found
 25 |     in the image
 26 |     """
 27 | 
 28 |     peaks_binary = (maximum_filter(img, footprint=generate_binary_structure(
 29 |         2, 1)) == img) * (img > param['thre1'])
 30 |     # Note reverse ([::-1]): we return [[x y], [x y]...] instead of [[y x], [y
 31 |     # x]...]
 32 |     return np.array(np.nonzero(peaks_binary)[::-1]).T
 33 | 
 34 | 
 35 | def compute_resized_coords(coords, resizeFactor):
 36 |     """
 37 |     Given the index/coordinates of a cell in some input array (e.g. image),
 38 |     provides the new coordinates if that array was resized by making it
 39 |     resizeFactor times bigger.
 40 |     E.g.: image of size 3x3 is resized to 6x6 (resizeFactor=2), we'd like to
 41 |     know the new coordinates of cell [1,2] -> Function would return [2.5,4.5]
 42 |     :param coords: Coordinates (indices) of a cell in some input array
 43 |     :param resizeFactor: Resize coefficient = shape_dest/shape_source. E.g.:
 44 |     resizeFactor=2 means the destination array is twice as big as the
 45 |     original one
 46 |     :return: Coordinates in an array of size
 47 |     shape_dest=resizeFactor*shape_source, expressing the array indices of the
 48 |     closest point to 'coords' if an image of size shape_source was resized to
 49 |     shape_dest
 50 |     """
 51 | 
 52 |     # 1) Add 0.5 to coords to get coordinates of center of the pixel (e.g.
 53 |     # index [0,0] represents the pixel at location [0.5,0.5])
 54 |     # 2) Transform those coordinates to shape_dest, by multiplying by resizeFactor
 55 |     # 3) That number represents the location of the pixel center in the new array,
 56 |     # so subtract 0.5 to get coordinates of the array index/indices (revert
 57 |     # step 1)
 58 |     return (np.array(coords, dtype=float) + 0.5) * resizeFactor - 0.5
 59 | 
 60 | 
 61 | def NMS(param, heatmaps, upsampFactor=1., bool_refine_center=True, bool_gaussian_filt=False):
 62 |     """
 63 |     NonMaximaSuppression: find peaks (local maxima) in a set of grayscale images
 64 |     :param heatmaps: set of grayscale images on which to find local maxima (3d np.array,
 65 |     with dimensions image_height x image_width x num_heatmaps)
 66 |     :param upsampFactor: Size ratio between CPM heatmap output and the input image size.
 67 |     Eg: upsampFactor=16 if original image was 480x640 and heatmaps are 30x40xN
 68 |     :param bool_refine_center: Flag indicating whether:
 69 |      - False: Simply return the low-res peak found upscaled by upsampFactor (subject to grid-snap)
 70 |      - True: (Recommended, very accurate) Upsample a small patch around each low-res peak and
 71 |      fine-tune the location of the peak at the resolution of the original input image
 72 |     :param bool_gaussian_filt: Flag indicating whether to apply a 1d-GaussianFilter (smoothing)
 73 |     to each upsampled patch before fine-tuning the location of each peak.
 74 |     :return: a NUM_JOINTS x 4 np.array where each row represents a joint type (0=nose, 1=neck...)
 75 |     and the columns indicate the {x,y} position, the score (probability) and a unique id (counter)
 76 |     """
 77 |     # MODIFIED BY CARLOS: Instead of upsampling the heatmaps to heatmap_avg and
 78 |     # then performing NMS to find peaks, this step can be sped up by ~25-50x by:
 79 |     # (9-10ms [with GaussFilt] or 5-6ms [without GaussFilt] vs 250-280ms on RoG
 80 |     # 1. Perform NMS at (low-res) CPM's output resolution
 81 |     # 1.1. Find peaks using scipy.ndimage.filters.maximum_filter
 82 |     # 2. Once a peak is found, take a patch of 5x5 centered around the peak, upsample it, and
 83 |     # fine-tune the position of the actual maximum.
 84 |     #  '-> That's equivalent to having found the peak on heatmap_avg, but much faster because we only
 85 |     #      upsample and scan the 5x5 patch instead of the full (e.g.) 480x640
 86 | 
 87 |     joint_list_per_joint_type = []
 88 |     cnt_total_joints = 0
 89 | 
 90 |     # For every peak found, win_size specifies how many pixels in each
 91 |     # direction from the peak we take to obtain the patch that will be
 92 |     # upsampled. Eg: win_size=1 -> patch is 3x3; win_size=2 -> 5x5
 93 |     # (for BICUBIC interpolation to be accurate, win_size needs to be >=2!)
 94 |     win_size = 2
 95 | 
 96 |     for joint in range(NUM_JOINTS):
 97 |         map_orig = heatmaps[:, :, joint]
 98 |         peak_coords = find_peaks(param, map_orig)
 99 |         peaks = np.zeros((len(peak_coords), 4))
100 |         for i, peak in enumerate(peak_coords):
101 |             if bool_refine_center:
102 |                 x_min, y_min = np.maximum(0, peak - win_size)
103 |                 x_max, y_max = np.minimum(
104 |                     np.array(map_orig.T.shape) - 1, peak + win_size)
105 | 
106 |                 # Take a small patch around each peak and only upsample that
107 |                 # tiny region
108 |                 patch = map_orig[y_min:y_max + 1, x_min:x_max + 1]
109 |                 map_upsamp = cv2.resize(
110 |                     patch, None, fx=upsampFactor, fy=upsampFactor, interpolation=cv2.INTER_CUBIC)
111 | 
112 |                 # Gaussian filtering takes an average of 0.8ms/peak (and there might be
113 |                 # more than one peak per joint!) -> For now, skip it (it's
114 |                 # accurate enough)
115 |                 map_upsamp = gaussian_filter(
116 |                     map_upsamp, sigma=3) if bool_gaussian_filt else map_upsamp
117 | 
118 |                 # Obtain the coordinates of the maximum value in the patch
119 |                 location_of_max = np.unravel_index(
120 |                     map_upsamp.argmax(), map_upsamp.shape)
121 |                 # Remember that peaks indicates [x,y] -> need to reverse it for
122 |                 # [y,x]
123 |                 location_of_patch_center = compute_resized_coords(
124 |                     peak[::-1] - [y_min, x_min], upsampFactor)
125 |                 # Calculate the offset wrt to the patch center where the actual
126 |                 # maximum is
127 |                 refined_center = (location_of_max - location_of_patch_center)
128 |                 peak_score = map_upsamp[location_of_max]
129 |             else:
130 |                 refined_center = [0, 0]
131 |                 # Flip peak coordinates since they are [x,y] instead of [y,x]
132 |                 peak_score = map_orig[tuple(peak[::-1])]
133 |             peaks[i, :] = tuple([int(round(x)) for x in compute_resized_coords(
134 |                 peak_coords[i], upsampFactor) + refined_center[::-1]]) + (peak_score, cnt_total_joints)
135 |             cnt_total_joints += 1
136 |         joint_list_per_joint_type.append(peaks)
137 | 
138 |     return joint_list_per_joint_type
139 | 
140 | 
141 | def get_joint_list(img_orig, param, heatmaps, scale):
142 | 
143 |     joint_list_per_joint_type = NMS(param,
144 |                                     heatmaps, img_orig.shape[0] / float(heatmaps.shape[0]))
145 | 
146 |     for peaks in joint_list_per_joint_type:
147 |         peaks[:, :2] = peaks[:, :2]*scale
148 | 
149 |     joint_list = np.array([tuple(peak) + (joint_type,) for joint_type, joint_peaks
150 |                            in enumerate(joint_list_per_joint_type) for peak in joint_peaks])
151 | 
152 |     return joint_list
153 | 
154 | 
155 | def draw(canvas, joints, bbox):
156 | 
157 |     x1 = int(bbox[0])
158 |     y1 = int(bbox[1])
159 |     x2 = int(bbox[0]+bbox[2])
160 |     y2 = int(bbox[1]+bbox[3])
161 |     cv2.rectangle(canvas, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)
162 | 
163 |     for i in range(17):
164 |         if int(joints[i][2]) == 0:
165 |             continue
166 |         x = int(joints[i][0])
167 |         y = int(joints[i][1])
168 |         cv2.circle(canvas, (x, y), 4, colors[i], thickness=-1)
169 | 
170 |     #     cur_canvas = canvas.copy()
171 |     stickwidth = 2
172 |     for i in range(16):
173 |         if joints[limbSeq[i][0]][2] == 0 or joints[limbSeq[i][1]][2] == 0:
174 |             continue
175 |         X = (int(joints[limbSeq[i][0]][0]), int(joints[limbSeq[i][1]][0]))
176 |         Y = (int(joints[limbSeq[i][0]][1]), int(joints[limbSeq[i][1]][1]))
177 |         mX = np.mean(X)
178 |         mY = np.mean(Y)
179 |         length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
180 |         angle = math.degrees(math.atan2(Y[0] - Y[1], X[0] - X[1]))
181 |         polygon = cv2.ellipse2Poly((int(mX), int(mY)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
182 |         cv2.fillConvexPoly(canvas, polygon, colors[i])
183 | 
184 |     return canvas
185 | 
186 | def plot_result(img_orig, result):
187 | 
188 |     for idx, person_data in enumerate(result):
189 | 
190 |         bbox = person_data['bbox']
191 |         keypoints = person_data['keypoints']
192 | 
193 |         x = keypoints[0::3]
194 |         y = keypoints[1::3]
195 |         v = keypoints[2::3]
196 | 
197 |         joints = []
198 |         for i in range(len(x)):
199 |             joints.append([x[i], y[i], v[i]])
200 | 
201 |         img_orig = draw(img_orig, joints, bbox)
202 |     return img_orig


--------------------------------------------------------------------------------
/datasets/coco_data/ImageAugmentation.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | import random
  4 | import sys
  5 | 
  6 | import cv2
  7 | import matplotlib.pyplot as plt
  8 | import numpy as np
  9 | from scipy import misc, ndimage
 10 | 
 11 | 
 12 | """The purpose of Augmentor is to automate image augmentation 
 13 |    in order to expand datasets as input for our algorithms.
 14 | :aut_scale : Scales them by dice2 (<1, so it is zoom out). 
 15 | :aug_croppad centerB: int with shape (2,), centerB will point to centerA.
 16 | :aug_flip: Mirrors the image around a vertical line running through its center.
 17 | :aug_rotate: Rotates the image. The angle of rotation, in degrees, 
 18 |              is specified by a random integer value that is included
 19 |              in the transform argument.
 20 |              
 21 | :param params_transform: store the value of stride and crop_szie_y, crop_size_x                 
 22 | """
 23 | 
 24 | 
 25 | def aug_scale(meta, img, mask_miss, params_transform):
 26 |     dice = random.random()  # (0,1)
 27 |     if (dice > params_transform['scale_prob']):
 28 | 
 29 |         scale_multiplier = 1
 30 |     else:
 31 |         dice2 = random.random()
 32 |         # linear shear into [scale_min, scale_max]
 33 |         scale_multiplier = (
 34 |             params_transform['scale_max'] - params_transform['scale_min']) * dice2 + \
 35 |             params_transform['scale_min']
 36 |     scale_abs = params_transform['target_dist'] / meta['scale_provided']
 37 |     scale = scale_abs * scale_multiplier
 38 |     img = cv2.resize(img, (0, 0), fx=scale, fy=scale,
 39 |                      interpolation=cv2.INTER_CUBIC)
 40 | 
 41 |     mask_miss = cv2.resize(mask_miss, (0, 0), fx=scale,
 42 |                            fy=scale, interpolation=cv2.INTER_CUBIC)
 43 |     #mask_all = cv2.resize(mask_all, (0, 0), fx=scale,
 44 |     #                      fy=scale, interpolation=cv2.INTER_CUBIC)
 45 | 
 46 |     # modify meta data
 47 |     meta['objpos'] *= scale
 48 |     meta['joint_self'][:, :2] *= scale
 49 |     if (meta['numOtherPeople'] != 0):
 50 |         meta['objpos_other'] *= scale
 51 |         meta['joint_others'][:, :, :2] *= scale
 52 |     return meta, img, mask_miss#, mask_all
 53 | 
 54 | 
 55 | def aug_croppad(meta, img, mask_miss, params_transform):
 56 |     dice_x = random.random()
 57 |     dice_y = random.random()
 58 |     crop_x = int(params_transform['crop_size_x'])
 59 |     crop_y = int(params_transform['crop_size_y'])
 60 |     x_offset = int((dice_x - 0.5) * 2 *
 61 |                    params_transform['center_perterb_max'])
 62 |     y_offset = int((dice_y - 0.5) * 2 *
 63 |                    params_transform['center_perterb_max'])
 64 | 
 65 |     center = meta['objpos'] + np.array([x_offset, y_offset])
 66 |     center = center.astype(int)
 67 | 
 68 |     # pad up and down
 69 |     pad_v = np.ones((crop_y, img.shape[1], 3), dtype=np.uint8) * 128
 70 |     pad_v_mask_miss = np.ones(
 71 |         (crop_y, mask_miss.shape[1]), dtype=np.uint8) * 255
 72 | 
 73 |     img = np.concatenate((pad_v, img, pad_v), axis=0)
 74 |     mask_miss = np.concatenate(
 75 |         (pad_v_mask_miss, mask_miss, pad_v_mask_miss), axis=0)
 76 |     #mask_all = np.concatenate(
 77 |     #    (pad_v_mask_miss, mask_all, pad_v_mask_miss), axis=0)
 78 | 
 79 |     # pad right and left
 80 |     pad_h = np.ones((img.shape[0], crop_x, 3), dtype=np.uint8) * 128
 81 |     pad_h_mask_miss = np.ones(
 82 |         (mask_miss.shape[0], crop_x), dtype=np.uint8) * 255
 83 | 
 84 |     img = np.concatenate((pad_h, img, pad_h), axis=1)
 85 |     mask_miss = np.concatenate(
 86 |         (pad_h_mask_miss, mask_miss, pad_h_mask_miss), axis=1)
 87 |     #mask_all = np.concatenate(
 88 |     #    (pad_h_mask_miss, mask_all, pad_h_mask_miss), axis=1)
 89 | 
 90 |     img = img[center[1] + int(crop_y / 2):center[1] + int(crop_y / 2) + crop_y,
 91 |               center[0] + int(crop_x / 2):center[0] + int(crop_x / 2) + crop_x, :]
 92 | 
 93 |     mask_miss = mask_miss[center[1] + int(crop_y / 2):center[1] + int(crop_y / 2) + crop_y + 1, center[0] +int(crop_x / 2):center[0] + int(crop_x / 2) + crop_x + 1]
 94 |     #mask_all = mask_all[center[1] + int(crop_y / 2):center[1] + int(crop_y / 2) + crop_y + 1, center[0] + int(crop_x / 2):center[0] + int(crop_x / 2) + crop_x + 1]
 95 | 
 96 |     offset_left = crop_x / 2 - center[0]
 97 |     offset_up = crop_y / 2 - center[1]
 98 | 
 99 |     offset = np.array([offset_left, offset_up])
100 |     meta['objpos'] += offset
101 |     meta['joint_self'][:, :2] += offset
102 |     mask = np.logical_or.reduce((meta['joint_self'][:, 0] >= crop_x,
103 |                                  meta['joint_self'][:, 0] < 0,
104 |                                  meta['joint_self'][:, 1] >= crop_y,
105 |                                  meta['joint_self'][:, 1] < 0))
106 | 
107 |     meta['joint_self'][mask == True, 2] = 2
108 |     if (meta['numOtherPeople'] != 0):
109 |         meta['objpos_other'] += offset
110 |         meta['joint_others'][:, :, :2] += offset
111 |         mask = np.logical_or.reduce((meta['joint_others'][:, :, 0] >= crop_x,
112 |                                      meta['joint_others'][:, :, 0] < 0,
113 |                                      meta['joint_others'][:, :, 1] >= crop_y,
114 |                                      meta['joint_others'][:, :, 1] < 0))
115 | 
116 |         meta['joint_others'][mask == True, 2] = 2
117 | 
118 |     return meta, img, mask_miss#, mask_all
119 | 
120 | 
121 | def aug_flip(meta, img, mask_miss, params_transform):
122 |     mode = params_transform['mode']
123 |     num_other_people = meta['numOtherPeople']
124 |     dice = random.random()
125 |     doflip = dice <= params_transform['flip_prob']
126 | 
127 |     if doflip:
128 |         img = img.copy()
129 |         cv2.flip(src=img, flipCode=1, dst=img)
130 |         w = img.shape[1]
131 | 
132 |         mask_miss = mask_miss.copy()
133 |         #mask_all = mask_all.copy()
134 |         cv2.flip(src=mask_miss, flipCode=1, dst=mask_miss)
135 |         #cv2.flip(src=mask_all, flipCode=1, dst=mask_all)
136 | 
137 |         '''
138 |         The order in this work:
139 |             (0-'nose'   1-'neck' 2-'right_shoulder' 3-'right_elbow' 4-'right_wrist'
140 |             5-'left_shoulder' 6-'left_elbow'        7-'left_wrist'  8-'right_hip'  
141 |             9-'right_knee'   10-'right_ankle'   11-'left_hip'   12-'left_knee' 
142 |             13-'left_ankle'  14-'right_eye'     15-'left_eye'   16-'right_ear' 
143 |             17-'left_ear' )
144 |         '''
145 |         meta['objpos'][0] = w - 1 - meta['objpos'][0]
146 |         meta['joint_self'][:, 0] = w - 1 - meta['joint_self'][:, 0]
147 |         # print meta['joint_self']
148 |         meta['joint_self'] = meta['joint_self'][[0, 1, 5, 6,
149 |                                                  7, 2, 3, 4, 11, 12, 13, 8, 9, 10, 15, 14, 17, 16]]
150 |         if (num_other_people != 0):
151 |             meta['objpos_other'][:, 0] = w - 1 - meta['objpos_other'][:, 0]
152 |             meta['joint_others'][:, :, 0] = w - \
153 |                 1 - meta['joint_others'][:, :, 0]
154 |             for i in range(num_other_people):
155 |                 meta['joint_others'][i] = meta['joint_others'][i][[
156 |                     0, 1, 5, 6, 7, 2, 3, 4, 11, 12, 13, 8, 9, 10, 15, 14, 17, 16]]
157 | 
158 |     return meta, img, mask_miss#, mask_all
159 | 
160 | 
161 | def rotatepoint(p, R):
162 |     point = np.zeros((3, 1))
163 |     point[0] = p[0]
164 |     point[1] = p[1]
165 |     point[2] = 1
166 | 
167 |     new_point = R.dot(point)
168 | 
169 |     p[0] = new_point[0]
170 | 
171 |     p[1] = new_point[1]
172 |     return p
173 | 
174 | 
175 | # The correct way to rotation an image
176 | # http://www.pyimagesearch.com/2017/01/02/rotate-images-correctly-with-opencv-and-python/
177 | 
178 | 
179 | def rotate_bound(image, angle, bordervalue):
180 |     # grab the dimensions of the image and then determine the
181 |     # center
182 |     (h, w) = image.shape[:2]
183 |     (cX, cY) = (w // 2, h // 2)
184 | 
185 |     # grab the rotation matrix (applying the negative of the
186 |     # angle to rotate clockwise), then grab the sine and cosine
187 |     # (i.e., the rotation components of the matrix)
188 |     M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
189 |     cos = np.abs(M[0, 0])
190 |     sin = np.abs(M[0, 1])
191 | 
192 |     # compute the new bounding dimensions of the image
193 |     nW = int((h * sin) + (w * cos))
194 |     nH = int((h * cos) + (w * sin))
195 | 
196 |     # adjust the rotation matrix to take into account translation
197 |     M[0, 2] += (nW / 2) - cX
198 |     M[1, 2] += (nH / 2) - cY
199 | 
200 |     # perform the actual rotation and return the image
201 |     return cv2.warpAffine(image, M, (nW, nH), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT,
202 |                           borderValue=bordervalue), M
203 | 
204 | 
205 | def aug_rotate(meta, img, mask_miss, params_transform, type="random", input=0, fillType="nearest", constant=0):
206 |     dice = random.random()
207 |     degree = (dice - 0.5) * 2 * \
208 |         params_transform['max_rotate_degree']  # degree [-40,40]
209 | 
210 |     img_rot, R = rotate_bound(img, np.copy(degree), (128, 128, 128))
211 | 
212 |     # Not sure it will cause mask_miss to rotate rightly, just avoid it fails
213 |     # by np.copy().
214 |     mask_miss_rot, _ = rotate_bound(mask_miss, np.copy(degree), (255))
215 |     #mask_all_rot, _ = rotate_bound(mask_all, np.copy(degree), (255))
216 | 
217 |     # modify meta data
218 |     meta['objpos'] = rotatepoint(meta['objpos'], R)
219 | 
220 |     for i in range(18):
221 |         meta['joint_self'][i, :] = rotatepoint(meta['joint_self'][i, :], R)
222 | 
223 |     for j in range(meta['numOtherPeople']):
224 | 
225 |         meta['objpos_other'][j, :] = rotatepoint(meta['objpos_other'][j, :], R)
226 | 
227 |         for i in range(18):
228 |             meta['joint_others'][j, i, :] = rotatepoint(
229 |                 meta['joint_others'][j, i, :], R)
230 | 
231 |     return meta, img_rot, mask_miss_rot#, mask_all_rot
232 | 
233 | 
234 | def aug_scale_bbox(meta, img, params_transform):
235 |     dice = random.random()  # (0,1)
236 |     if (dice > params_transform['scale_prob']):
237 | 
238 |         scale_multiplier = 1
239 |     else:
240 |         dice2 = random.random()
241 |         # linear shear into [scale_min, scale_max]
242 |         scale_multiplier = (
243 |             params_transform['scale_max'] - params_transform['scale_min']) * dice2 + \
244 |             params_transform['scale_min']
245 |     scale_abs = params_transform['target_dist'] / meta['scale_provided']
246 |     scale = scale_abs * scale_multiplier
247 |     img = cv2.resize(img, (0, 0), fx=scale, fy=scale,
248 |                      interpolation=cv2.INTER_CUBIC)
249 | 
250 |     meta['objpos'] *= scale
251 |     adjust_instance_list = []
252 |     for m in meta['instance_mask_list']:
253 |         m = cv2.resize(m, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
254 |         adjust_instance_list.append(m)
255 |     meta['instance_mask_list'] = adjust_instance_list
256 | 
257 |     return meta, img
258 | 
259 | 
260 | def aug_croppad_bbox(meta, img, params_transform):
261 |     dice_x = random.random()
262 |     dice_y = random.random()
263 |     crop_x = int(params_transform['crop_size_x'])
264 |     crop_y = int(params_transform['crop_size_y'])
265 |     x_offset = int((dice_x - 0.5) * 2 *
266 |                    params_transform['center_perterb_max'])
267 |     y_offset = int((dice_y - 0.5) * 2 *
268 |                    params_transform['center_perterb_max'])
269 | 
270 |     center = meta['objpos'] + np.array([x_offset, y_offset])
271 |     center = center.astype(int)
272 | 
273 |     # pad up and down
274 |     pad_v = np.ones((crop_y, img.shape[1], 3), dtype=np.uint8) * 128
275 |     mask = meta['instance_mask_list'][0]
276 |     pad_v_instance_mask = np.zeros(
277 |         (crop_y, mask.shape[1]), dtype=np.uint8)
278 | 
279 |     img = np.concatenate((pad_v, img, pad_v), axis=0)
280 |     adjust_instance_list = []
281 |     for m in meta['instance_mask_list']:
282 |         m = np.concatenate((pad_v_instance_mask, m, pad_v_instance_mask), axis=0)
283 |         adjust_instance_list.append(m)
284 |     meta['instance_mask_list'] = adjust_instance_list
285 | 
286 |     # pad right and left
287 |     pad_h = np.ones((img.shape[0], crop_x, 3), dtype=np.uint8) * 128
288 |     mask = meta['instance_mask_list'][0]
289 |     pad_h_instance_mask = np.zeros(
290 |         (mask.shape[0], crop_x), dtype=np.uint8)
291 | 
292 |     img = np.concatenate((pad_h, img, pad_h), axis=1)
293 |     adjust_instance_list = []
294 |     for m in meta['instance_mask_list']:
295 |         m = np.concatenate((pad_h_instance_mask, m, pad_h_instance_mask), axis=1)
296 |         m = m[center[1] + int(crop_y / 2):center[1] + int(crop_y / 2) + crop_y + 1,
297 |             center[0] + int(crop_x / 2):center[0] + int(crop_x / 2) + crop_x + 1]
298 |         adjust_instance_list.append(m)
299 |     meta['instance_mask_list'] = adjust_instance_list
300 | 
301 |     img = img[center[1] + int(crop_y / 2):center[1] + int(crop_y / 2) + crop_y,
302 |               center[0] + int(crop_x / 2):center[0] + int(crop_x / 2) + crop_x, :]
303 | 
304 |     return meta, img
305 | 
306 | 
307 | def aug_flip_bbox(meta, img, params_transform):
308 |     dice = random.random()
309 |     doflip = dice <= params_transform['flip_prob']
310 | 
311 |     if doflip:
312 |         img = img.copy()
313 |         cv2.flip(src=img, flipCode=1, dst=img)
314 | 
315 |         adjust_instance_list = []
316 |         for m in meta['instance_mask_list']:
317 |             m = m.copy()
318 |             cv2.flip(src=m, flipCode=1, dst=m)
319 |             adjust_instance_list.append(m)
320 |         meta['instance_mask_list'] = adjust_instance_list
321 | 
322 |     return meta, img
323 | 
324 | 
325 | def aug_rotate_bbox(meta, img, params_transform, type="random", input=0, fillType="nearest", constant=0):
326 |     dice = random.random()
327 |     degree = (dice - 0.5) * 2 * \
328 |         params_transform['max_rotate_degree']  # degree [-40,40]
329 | 
330 |     img_rot, _ = rotate_bound(img, np.copy(degree), (128, 128, 128))
331 | 
332 |     # Not sure it will cause mask_miss to rotate rightly, just avoid it fails
333 |     # by np.copy().
334 |     adjust_instance_list = []
335 |     for m in meta['instance_mask_list']:
336 |         m, _ = rotate_bound(m, np.copy(degree), (0))
337 |         adjust_instance_list.append(m)
338 |     meta['instance_mask_list'] = adjust_instance_list
339 | 
340 |     return meta, img_rot


--------------------------------------------------------------------------------
/training/trainer.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import os
  4 | import sys
  5 | import datetime
  6 | import numpy as np
  7 | from collections import OrderedDict
  8 | import shutil
  9 | 
 10 | #import encoding
 11 | import torch.nn as nn
 12 | from torch.optim.lr_scheduler import ReduceLROnPlateau, _LRScheduler
 13 | from torch.optim.optimizer import Optimizer
 14 | 
 15 | from lib.utils.log import logger
 16 | from lib.utils.timer import Timer
 17 | from lib.utils.path import mkdir
 18 | import lib.utils.meter as meter_utils
 19 | import network.net_utils as net_utils
 20 | from datasets.data_parallel import ListDataParallel
 21 | 
 22 | 
 23 | def get_learning_rates(optimizer):
 24 |     lrs = [pg['lr'] for pg in optimizer.param_groups]
 25 |     lrs = np.asarray(lrs, dtype=np.float)
 26 |     return lrs
 27 | 
 28 | 
 29 | def default_visualization_fn(writer, step, log_dict):
 30 |     """
 31 |     Visualization with tensorboard
 32 |     :type writer: SummaryWriter
 33 |     :type step: int
 34 |     :type log_dict: dict
 35 |     :return:
 36 |     """
 37 |     for k, v in log_dict.items():
 38 |         if isinstance(v, (float, int)):
 39 |             writer.add_scalar(k, v, step)
 40 |         elif isinstance(v, np.ndarray):
 41 |             writer.add_image(k, v, step)
 42 | 
 43 | 
 44 | class TrainParams(object):
 45 |     # required params
 46 |     exp_name = 'experiment_name'
 47 |     subnet_name = 'keypoint_subnet'
 48 |     batch_size = 32
 49 |     max_epoch = 30
 50 |     optimizer = None
 51 | 
 52 |     # learning rate scheduler
 53 |     lr_scheduler = None         # should be an instance of ReduceLROnPlateau or _LRScheduler
 54 |     max_grad_norm = np.inf
 55 | 
 56 |     # params based on your local env
 57 |     gpus = [0]
 58 |     save_dir = None             # default `save_dir` is `outputs/{exp_name}`
 59 | 
 60 |     # loading existing checkpoint
 61 |     ckpt = None                 # path to the ckpt file, will load the last ckpt in the `save_dir` if `None`
 62 |     re_init = False             # ignore ckpt if `True`
 63 |     zero_epoch = False          # force `last_epoch` to zero
 64 |     ignore_opt_state = False    # ignore the saved optimizer states
 65 | 
 66 |     # saving checkpoints
 67 |     save_freq_epoch = 1             # save one ckpt per `save_freq_epoch` epochs
 68 |     save_freq_step = sys.maxsize    # save one ckpt per `save_freq_setp` steps, default value is inf
 69 |     save_nckpt_max = sys.maxsize    # max number of saved ckpts
 70 | 
 71 |     # validation during training
 72 |     val_freq = 500              # run validation per `val_freq` steps
 73 |     val_nbatch = 10             # number of batches to be validated
 74 |     val_nbatch_end_epoch = 200  # max number of batches to be validated after each epoch
 75 | 
 76 |     # visualization
 77 |     print_freq = 20             # print log per `print_freq` steps
 78 |     use_tensorboard = False     # use tensorboardX if True
 79 |     visualization_fn = None     # custom function to handle `log_dict`, default value is `default_visualization_fn`
 80 | 
 81 |     def update(self, params_dict):
 82 |         state_dict = self.state_dict()
 83 |         for k, v in params_dict.items():
 84 |             if k in state_dict or hasattr(self, k):
 85 |                 setattr(self, k, v)
 86 |             else:
 87 |                 logger.warning('Unknown option: {}: {}'.format(k, v))
 88 | 
 89 |     def state_dict(self):
 90 |         state_dict = OrderedDict()
 91 |         for k in TrainParams.__dict__.keys():
 92 |             if not k.startswith('_'):
 93 |                 state_dict[k] = getattr(self, k)
 94 |         del state_dict['update']
 95 |         del state_dict['state_dict']
 96 | 
 97 |         return state_dict
 98 | 
 99 |     def __str__(self):
100 |         state_dict = self.state_dict()
101 |         text = 'TrainParams {\n'
102 |         for k, v in state_dict.items():
103 |             text += '\t{}: {}\n'.format(k, v)
104 |         text += '}\n'
105 |         return text
106 | 
107 | 
108 | class Trainer(object):
109 | 
110 |     TrainParams = TrainParams
111 | 
112 |     # hooks
113 |     on_start_epoch_hooks = []
114 |     on_end_epoch_hooks = []
115 | 
116 |     def __init__(self, model, train_params, batch_processor, train_data, val_data=None):
117 |         assert isinstance(train_params, TrainParams)
118 |         self.params = train_params
119 | 
120 |         # Data loaders
121 |         self.train_data = train_data
122 |         self.val_data = val_data # sDataLoader.copy(val_data) if isinstance(val_data, DataLoader) else val_data
123 |         # self.val_stream = self.val_data.get_stream() if self.val_data else None
124 | 
125 |         self.batch_processor = batch_processor
126 |         self.batch_per_epoch = len(self.train_data)
127 | 
128 |         # set CUDA_VISIBLE_DEVICES=gpus
129 |         gpus = ','.join([str(x) for x in self.params.gpus])
130 |         os.environ['CUDA_VISIBLE_DEVICES'] = gpus
131 |         self.params.gpus = tuple(range(len(self.params.gpus)))
132 |         logger.info('Set CUDA_VISIBLE_DEVICES to {}...'.format(gpus))
133 | 
134 |         # Optimizer and learning rate
135 |         self.last_epoch = 0
136 |         self.optimizer = self.params.optimizer  # type: Optimizer
137 |         if not isinstance(self.optimizer, Optimizer):
138 |             logger.error('optimizer should be an instance of Optimizer, '
139 |                          'but got {}'.format(type(self.optimizer)))
140 |             raise ValueError
141 |         self.lr_scheduler = self.params.lr_scheduler  # type: ReduceLROnPlateau or _LRScheduler
142 |         if self.lr_scheduler and not isinstance(self.lr_scheduler, (ReduceLROnPlateau, _LRScheduler)):
143 |             logger.error('lr_scheduler should be an instance of _LRScheduler or ReduceLROnPlateau, '
144 |                          'but got {}'.format(type(self.lr_scheduler)))
145 |             raise ValueError
146 |         logger.info('Set lr_scheduler to {}'.format(type(self.lr_scheduler)))
147 | 
148 |         self.log_values = OrderedDict()
149 |         self.batch_timer = Timer()
150 |         self.data_timer = Timer()
151 | 
152 |         # load model
153 |         self.model = model
154 |         ckpt = self.params.ckpt
155 |         if not self.params.save_dir:
156 |             self.params.save_dir = os.path.join('outputs', self.params.exp_name)
157 |         mkdir(self.params.save_dir)
158 |         logger.info('Set output dir to {}'.format(self.params.save_dir))
159 |         if ckpt is None:
160 |             # find the last ckpt
161 |             ckpts = [fname for fname in os.listdir(self.params.save_dir) if os.path.splitext(fname)[-1] == '.h5']
162 |             ckpt = os.path.join(
163 |                 self.params.save_dir, sorted(ckpts, key=lambda name: int(os.path.splitext(name)[0].split('_')[-1]))[-1]
164 |             ) if len(ckpts) > 0 else None
165 | 
166 |         if ckpt is not None and not self.params.re_init:
167 |            self._load_ckpt(ckpt)
168 |            logger.info('Load ckpt from {}'.format(ckpt))
169 | 
170 |         self.model = ListDataParallel(self.model, device_ids=self.params.gpus)
171 |         self.model = self.model.cuda(self.params.gpus[0])
172 |         self.model.train()
173 |         if self.params.subnet_name != 'keypoint_subnet':
174 |             self.model.module.freeze_bn()  # nn.BatchNorm2d.eval() if not 'keypoint_subnet'
175 | 
176 |     def train(self):
177 |         best_loss = np.inf
178 |         for epoch in range(self.last_epoch, self.params.max_epoch):
179 |             self.last_epoch += 1
180 |             logger.info('Start training epoch {}'.format(self.last_epoch))
181 | 
182 |             for fun in self.on_start_epoch_hooks:
183 |                 fun(self)
184 | 
185 |             # adjust learning rate
186 |             if isinstance(self.lr_scheduler, _LRScheduler):
187 |                 cur_lrs = get_learning_rates(self.optimizer)
188 |                 self.lr_scheduler.step(self.last_epoch)
189 |                 logger.info('Set learning rates from {} to {}'.format(cur_lrs, get_learning_rates(self.optimizer)))
190 | 
191 |             train_loss = self._train_one_epoch()
192 | 
193 |             for fun in self.on_end_epoch_hooks:
194 |                 fun(self)
195 | 
196 |             # save model
197 |             if (self.last_epoch % self.params.save_freq_epoch == 0) or (self.last_epoch == self.params.max_epoch - 1):
198 |                 save_name = 'ckpt_{}.h5'.format(self.last_epoch)
199 |                 save_to = os.path.join(self.params.save_dir, save_name)
200 |                 self._save_ckpt(save_to)
201 | 
202 |                 # find best model
203 |                 if self.params.val_nbatch_end_epoch > 0:
204 |                     val_loss = self._val_one_epoch(self.params.val_nbatch_end_epoch)
205 |                     if val_loss < best_loss:
206 |                         best_file = os.path.join(self.params.save_dir,
207 |                                                  'ckpt_{}_{:.5f}.h5.best'.format(self.last_epoch, val_loss))
208 |                         shutil.copyfile(save_to, best_file)
209 |                         logger.info('Found a better ckpt ({:.5f} -> {:.5f}), '
210 |                                     'saved to {}'.format(best_loss, val_loss, best_file))
211 |                         best_loss = val_loss
212 | 
213 |                     if isinstance(self.lr_scheduler, ReduceLROnPlateau):
214 |                         self.lr_scheduler.step(val_loss, self.last_epoch)
215 | 
216 |     def _save_ckpt(self, save_to):
217 |         model = self.model.module if isinstance(self.model, nn.DataParallel) else self.model
218 |         net_utils.save_net(save_to, model, epoch=self.last_epoch,
219 |                            optimizers=[self.optimizer], rm_prev_opt=True, max_n_ckpts=self.params.save_nckpt_max)
220 |         logger.info('Save ckpt to {}'.format(save_to))
221 | 
222 |     def _load_ckpt(self, ckpt):
223 |         epoch, state_dicts = net_utils.load_net(ckpt, self.model, load_state_dict=True)
224 |         if not self.params.ignore_opt_state and not self.params.zero_epoch and epoch >= 0:
225 |             self.last_epoch = epoch
226 |             logger.info('Set last epoch to {}'.format(self.last_epoch))
227 |             if state_dicts is not None:
228 |                 self.optimizer.load_state_dict(state_dicts[0])
229 |                 net_utils.set_optimizer_state_devices(self.optimizer.state, self.params.gpus[0])
230 |                 logger.info('Load optimizer state from checkpoint, '
231 |                             'new learning rate: {}'.format(get_learning_rates(self.optimizer)))
232 | 
233 |     def _train_one_epoch(self):
234 |         self.batch_timer.clear()
235 |         self.data_timer.clear()
236 |         self.batch_timer.tic()
237 |         self.data_timer.tic()
238 |         total_loss = meter_utils.AverageValueMeter()
239 |         for step, batch in enumerate(self.train_data):
240 |             inputs, gts, _ = self.batch_processor(self, batch)
241 | 
242 |             self.data_timer.toc()
243 | 
244 |             # forward
245 |             output, saved_for_loss = self.model(*inputs)
246 | 
247 |             loss, saved_for_log = self.model.module.build_loss(saved_for_loss, *gts)
248 | 
249 |             # backward
250 |             self.optimizer.zero_grad()
251 |             loss.backward()
252 |             total_loss.add(loss.item())
253 | 
254 |             # clip grad
255 |             if not np.isinf(self.params.max_grad_norm):
256 |                 max_norm = nn.utils.clip_grad_norm(self.model.parameters(), self.params.max_grad_norm, float('inf'))
257 |                 saved_for_log['max_grad'] = max_norm
258 | 
259 |             self.optimizer.step(None)
260 | 
261 |             self._process_log(saved_for_log, self.log_values)
262 |             self.batch_timer.toc()
263 | 
264 |             # print log
265 |             reset = False
266 | 
267 |             if step % self.params.print_freq == 0:
268 |                 self._print_log(step, self.log_values, title='Training', max_n_batch=self.batch_per_epoch)
269 |                 reset = True
270 | 
271 |             if step % self.params.save_freq_step == 0 and step > 0:
272 |                 save_to = os.path.join(self.params.save_dir,
273 |                                        'ckpt_{}.h5.ckpt'.format((self.last_epoch - 1) * self.batch_per_epoch + step))
274 |                 self._save_ckpt(save_to)
275 | 
276 |             if reset:
277 |                 self._reset_log(self.log_values)
278 | 
279 |             self.data_timer.tic()
280 |             self.batch_timer.tic()
281 | 
282 |         total_loss, std = total_loss.value()
283 |         return total_loss
284 | 
285 |     def _val_one_epoch(self, n_batch):
286 |         training_mode = self.model.training
287 |         self.model.eval()
288 |         logs = OrderedDict()
289 |         sum_loss = meter_utils.AverageValueMeter()
290 |         logger.info('Val on validation set...')
291 | 
292 |         self.batch_timer.clear()
293 |         self.data_timer.clear()
294 |         self.batch_timer.tic()
295 |         self.data_timer.tic()
296 |         for step, batch in enumerate(self.val_data):
297 |             self.data_timer.toc()
298 |             if step > n_batch:
299 |                 break
300 | 
301 |             inputs, gts, _ = self.batch_processor(self, batch)
302 |             _, saved_for_loss = self.model(*inputs)
303 |             self.batch_timer.toc()
304 | 
305 |             loss, saved_for_log = self.model.module.build_loss(saved_for_loss, *gts)
306 |             sum_loss.add(loss.item())
307 |             self._process_log(saved_for_log, logs)
308 | 
309 |             if step % self.params.print_freq == 0 or step == len(self.val_data)-1:
310 |                 self._print_log(step, logs, 'Validation', max_n_batch=min(n_batch, len(self.val_data)))
311 | 
312 |             self.data_timer.tic()
313 |             self.batch_timer.tic()
314 | 
315 |         mean, std = sum_loss.value()
316 |         logger.info('Validation loss: mean: {}, std: {}'.format(mean, std))
317 |         self.model.train(mode=training_mode)
318 |         if self.params.subnet_name != 'keypoint_subnet':
319 |             self.model.module.freeze_bn()
320 |         return mean
321 | 
322 |     def _process_log(self, src_dict, dest_dict):
323 |         for k, v in src_dict.items():
324 |             if isinstance(v, (int, float)):
325 |                 dest_dict.setdefault(k, meter_utils.AverageValueMeter())
326 |                 dest_dict[k].add(float(v))
327 |             else:
328 |                 dest_dict[k] = v
329 | 
330 |     def _print_log(self, step, log_values, title='', max_n_batch=None):
331 |         log_str = '{}\n'.format(self.params.exp_name)
332 |         log_str += '{}: epoch {}'.format(title, self.last_epoch)
333 | 
334 |         if max_n_batch:
335 |             log_str += '[{}/{}], lr: {}'.format(step, max_n_batch, get_learning_rates(self.optimizer))
336 | 
337 |         i = 0
338 |         # global_step = step + (self.last_epoch - 1) * self.batch_per_epoch
339 |         for k, v in log_values.items():
340 |             if isinstance(v, meter_utils.AverageValueMeter):
341 |                 mean, std = v.value()
342 |                 log_str += '\n\t{}: {:.10f}'.format(k, mean)
343 |                 i += 1
344 | 
345 |         if max_n_batch:
346 |             # print time
347 |             data_time = self.data_timer.duration + 1e-6
348 |             batch_time = self.batch_timer.duration + 1e-6
349 |             rest_seconds = int((max_n_batch - step) * batch_time)
350 |             log_str += '\n\t({:.2f}/{:.2f}s,' \
351 |                        ' fps:{:.1f}, rest: {})'.format(data_time, batch_time,
352 |                                                        self.params.batch_size / batch_time,
353 |                                                        str(datetime.timedelta(seconds=rest_seconds)))
354 |             self.batch_timer.clear()
355 |             self.data_timer.clear()
356 | 
357 |         logger.info(log_str)
358 | 
359 |     def _reset_log(self, log_values):
360 |         for k, v in log_values.items():
361 |             if isinstance(v, meter_utils.AverageValueMeter):
362 |                 v.reset()
363 | 


--------------------------------------------------------------------------------
/network/posenet.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | # keypoint subnet + detection subnet(RetinaNet) + PRN
  3 | import math
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | from torch.autograd import Variable
  9 | from collections import OrderedDict
 10 | from network.fpn import FPN50, FPN101
 11 | from torch.nn import init
 12 | 
 13 | from network.utils import BBoxTransform, ClipBoxes
 14 | from network.anchors import Anchors
 15 | import network.losses as losses
 16 | from lib.nms.pth_nms import pth_nms
 17 | 
 18 | 
 19 | def nms(dets, thresh):
 20 |     "Dispatch to either CPU or GPU NMS implementations.\
 21 |     Accept dets as tensor"""
 22 |     return pth_nms(dets, thresh)
 23 | 
 24 | 
 25 | class Concat(nn.Module):
 26 |     def __init__(self):
 27 |         super(Concat, self).__init__()
 28 | 
 29 |     def forward(self, up1, up2, up3, up4):
 30 |         return torch.cat((up1, up2, up3, up4), 1)
 31 | 
 32 | 
 33 | class RegressionModel(nn.Module):
 34 |     def __init__(self, num_features_in, num_anchors=9, feature_size=256):
 35 |         super(RegressionModel, self).__init__()
 36 | 
 37 |         self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
 38 |         self.act1 = nn.ReLU()
 39 | 
 40 |         self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 41 |         self.act2 = nn.ReLU()
 42 | 
 43 |         self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 44 |         self.act3 = nn.ReLU()
 45 | 
 46 |         self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 47 |         self.act4 = nn.ReLU()
 48 | 
 49 |         self.output = nn.Conv2d(feature_size, num_anchors * 4, kernel_size=3, padding=1)
 50 | 
 51 |     def forward(self, x):
 52 |         out = self.conv1(x)
 53 |         out = self.act1(out)
 54 | 
 55 |         out = self.conv2(out)
 56 |         out = self.act2(out)
 57 | 
 58 |         out = self.conv3(out)
 59 |         out = self.act3(out)
 60 | 
 61 |         out = self.conv4(out)
 62 |         out = self.act4(out)
 63 | 
 64 |         out = self.output(out)
 65 | 
 66 |         # out is B x C x W x H, with C = 4*num_anchors
 67 |         out = out.permute(0, 2, 3, 1)
 68 | 
 69 |         return out.contiguous().view(out.shape[0], -1, 4)
 70 | 
 71 | 
 72 | class ClassificationModel(nn.Module):
 73 |     def __init__(self, num_features_in, num_anchors=9, num_classes=80, prior=0.01, feature_size=256):
 74 |         super(ClassificationModel, self).__init__()
 75 | 
 76 |         self.num_classes = num_classes
 77 |         self.num_anchors = num_anchors
 78 | 
 79 |         self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
 80 |         self.act1 = nn.ReLU()
 81 | 
 82 |         self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 83 |         self.act2 = nn.ReLU()
 84 | 
 85 |         self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 86 |         self.act3 = nn.ReLU()
 87 | 
 88 |         self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 89 |         self.act4 = nn.ReLU()
 90 | 
 91 |         self.output = nn.Conv2d(feature_size, num_anchors * num_classes, kernel_size=3, padding=1)
 92 |         self.output_act = nn.Sigmoid()
 93 | 
 94 |     def forward(self, x):
 95 |         out = self.conv1(x)
 96 |         out = self.act1(out)
 97 | 
 98 |         out = self.conv2(out)
 99 |         out = self.act2(out)
100 | 
101 |         out = self.conv3(out)
102 |         out = self.act3(out)
103 | 
104 |         out = self.conv4(out)
105 |         out = self.act4(out)
106 | 
107 |         out = self.output(out)
108 |         out = self.output_act(out)
109 | 
110 |         # out is B x C x W x H, with C = n_classes + n_anchors
111 |         out1 = out.permute(0, 2, 3, 1)
112 | 
113 |         batch_size, width, height, channels = out1.shape
114 | 
115 |         out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes)
116 | 
117 |         return out2.contiguous().view(x.shape[0], -1, self.num_classes)
118 | 
119 | 
120 | class Flatten(nn.Module):
121 |     def forward(self, input):
122 |         return input.view(input.size(0), -1)
123 | 
124 | 
125 | class Add(nn.Module):
126 |     def forward(self, input1, input2):
127 |         return torch.add(input1, input2)
128 | 
129 | 
130 | class PRN(nn.Module):
131 |     def __init__(self,node_count, coeff):
132 |         super(PRN, self).__init__()
133 |         self.flatten   = Flatten()
134 |         self.height    = coeff*28
135 |         self.width     = coeff*18
136 |         self.dens1     = nn.Linear(self.height*self.width*17, node_count)
137 |         self.bneck     = nn.Linear(node_count, node_count)
138 |         self.dens2     = nn.Linear(node_count, self.height*self.width*17)
139 |         self.drop      = nn.Dropout()
140 |         self.add       = Add()
141 |         self.softmax   = nn.Softmax(dim=1)
142 | 
143 |     def forward(self, x):
144 |         res = self.flatten(x)
145 |         out = self.drop(F.relu(self.dens1(res)))
146 |         out = self.drop(F.relu(self.bneck(out)))
147 |         out = F.relu(self.dens2(out))
148 |         out = self.add(out, res)
149 |         out = self.softmax(out)
150 |         out = out.view(out.size()[0], self.height, self.width, 17)
151 | 
152 |         return out
153 | 
154 | class poseNet(nn.Module):
155 |     def __init__(self, layers, prn_node_count=1024, prn_coeff=2):
156 |         super(poseNet, self).__init__()
157 |         if layers == 101:
158 |             self.fpn = FPN101()
159 |         if layers == 50:
160 |             self.fpn = FPN50()
161 | 
162 |         ##################################################################################
163 |         # keypoints subnet
164 |         # intermediate supervision
165 |         self.convfin_k2 = nn.Conv2d(256, 19, kernel_size=1, stride=1, padding=0)
166 |         self.convfin_k3 = nn.Conv2d(256, 19, kernel_size=1, stride=1, padding=0)
167 |         self.convfin_k4 = nn.Conv2d(256, 19, kernel_size=1, stride=1, padding=0)
168 |         self.convfin_k5 = nn.Conv2d(256, 19, kernel_size=1, stride=1, padding=0)
169 | 
170 |         # 2 conv(kernel=3x3)，change channels from 256 to 128
171 |         self.convt1 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1)
172 |         self.convt2 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1)
173 |         self.convt3 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1)
174 |         self.convt4 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1)
175 |         self.convs1 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
176 |         self.convs2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
177 |         self.convs3 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
178 |         self.convs4 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
179 | 
180 |         self.upsample1 = nn.Upsample(scale_factor=8, mode='nearest', align_corners=None)
181 |         self.upsample2 = nn.Upsample(scale_factor=4, mode='nearest', align_corners=None)
182 |         self.upsample3 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None)
183 |         # self.upsample4 = nn.Upsample(size=(120,120),mode='bilinear',align_corners=True)
184 | 
185 |         self.concat = Concat()
186 |         self.conv2 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1)
187 |         self.convfin = nn.Conv2d(256, 18, kernel_size=1, stride=1, padding=0)
188 | 
189 |         ##################################################################################
190 |         # detection subnet
191 |         self.regressionModel = RegressionModel(256)
192 |         self.classificationModel = ClassificationModel(256, num_classes=1)
193 |         self.anchors = Anchors()
194 |         self.regressBoxes = BBoxTransform()
195 |         self.clipBoxes = ClipBoxes()
196 |         self.focalLoss = losses.FocalLoss()
197 | 
198 |         ##################################################################################
199 |         # prn subnet
200 |         self.prn = PRN(prn_node_count, prn_coeff)
201 | 
202 |         ##################################################################################
203 |         # initialize weights
204 |         self._initialize_weights_norm()
205 |         prior = 0.01
206 |         self.classificationModel.output.weight.data.fill_(0)
207 |         self.classificationModel.output.bias.data.fill_(-math.log((1.0 - prior) / prior))
208 |         self.regressionModel.output.weight.data.fill_(0)
209 |         self.regressionModel.output.bias.data.fill_(0)
210 | 
211 |         self.freeze_bn()  # from retinanet
212 | 
213 |     def _initialize_weights_norm(self):
214 |         for m in self.modules():
215 |             if isinstance(m, nn.Conv2d):
216 |                 init.normal_(m.weight, std=0.01)
217 |                 if m.bias is not None:  # resnet101 conv2d doesn't add bias
218 |                     init.constant_(m.bias, 0.0)
219 | 
220 |     def freeze_bn(self):
221 |         '''Freeze BatchNorm layers.'''
222 |         for layer in self.modules():
223 |             if isinstance(layer, nn.BatchNorm2d):
224 |                 layer.eval()
225 | 
226 |     def forward(self, x):
227 | 
228 |         img_batch, subnet_name = x
229 | 
230 |         if subnet_name == 'keypoint_subnet':
231 |             return self.keypoint_forward(img_batch)
232 |         elif subnet_name == 'detection_subnet':
233 |             return self.detection_forward(img_batch)
234 |         elif subnet_name == 'prn_subnet':
235 |             return self.prn_forward(img_batch)
236 |         else:  # entire_net
237 |             features = self.fpn(img_batch)
238 |             p2, p3, p4, p5 = features[0]  # fpn features for keypoint subnet
239 |             features = features[1]  # fpn features for detection subnet
240 | 
241 |             ##################################################################################
242 |             # keypoints subnet
243 |             p5 = self.convt1(p5)
244 |             p5 = self.convs1(p5)
245 |             p4 = self.convt2(p4)
246 |             p4 = self.convs2(p4)
247 |             p3 = self.convt3(p3)
248 |             p3 = self.convs3(p3)
249 |             p2 = self.convt4(p2)
250 |             p2 = self.convs4(p2)
251 | 
252 |             p5 = self.upsample1(p5)
253 |             p4 = self.upsample2(p4)
254 |             p3 = self.upsample3(p3)
255 | 
256 |             concat = self.concat(p5, p4, p3, p2)
257 |             predict_keypoint = self.convfin(F.relu(self.conv2(concat)))
258 |             del p5, p4, p3, p2, concat
259 | 
260 |             ##################################################################################
261 |             # detection subnet
262 |             regression = torch.cat([self.regressionModel(feature) for feature in features], dim=1)
263 |             classification = torch.cat([self.classificationModel(feature) for feature in features], dim=1)
264 |             anchors = self.anchors(img_batch)
265 | 
266 |             transformed_anchors = self.regressBoxes(anchors, regression)
267 |             transformed_anchors = self.clipBoxes(transformed_anchors, img_batch)
268 | 
269 |             scores = torch.max(classification, dim=2, keepdim=True)[0]
270 | 
271 |             scores_over_thresh = (scores > 0.05)[0, :, 0]#0.05
272 | 
273 |             if scores_over_thresh.sum() == 0:
274 |                 # no boxes to NMS, just return
275 |                 return predict_keypoint, [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)]
276 | 
277 |             classification = classification[:, scores_over_thresh, :]
278 |             transformed_anchors = transformed_anchors[:, scores_over_thresh, :]
279 |             scores = scores[:, scores_over_thresh, :]
280 | 
281 |             anchors_nms_idx = nms(torch.cat([transformed_anchors, scores], dim=2)[0, :, :], 0.5)  # threshold = 0.5, inpsize=480
282 | 
283 |             nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(dim=1)
284 | 
285 |             return predict_keypoint, [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]]
286 | 
287 | 
288 |     def keypoint_forward(self, img_batch):
289 |         saved_for_loss = []
290 | 
291 |         p2, p3, p4, p5 = self.fpn(img_batch)[0] # fpn features for keypoint subnet
292 | 
293 |         ##################################################################################
294 |         # keypoints subnet
295 |         # intermediate supervision
296 |         saved_for_loss.append(self.convfin_k2(p2))
297 |         saved_for_loss.append(self.upsample3(self.convfin_k3(p3)))
298 |         saved_for_loss.append(self.upsample2(self.convfin_k4(p4)))
299 |         saved_for_loss.append(self.upsample1(self.convfin_k5(p5)))
300 | 
301 |         #
302 |         p5 = self.convt1(p5)
303 |         p5 = self.convs1(p5)
304 |         p4 = self.convt2(p4)
305 |         p4 = self.convs2(p4)
306 |         p3 = self.convt3(p3)
307 |         p3 = self.convs3(p3)
308 |         p2 = self.convt4(p2)
309 |         p2 = self.convs4(p2)
310 | 
311 |         p5 = self.upsample1(p5)
312 |         p4 = self.upsample2(p4)
313 |         p3 = self.upsample3(p3)
314 | 
315 |         predict_keypoint = self.convfin(F.relu(self.conv2(self.concat(p5, p4, p3, p2))))
316 |         saved_for_loss.append(predict_keypoint)
317 | 
318 |         return predict_keypoint, saved_for_loss
319 | 
320 |     def detection_forward(self, img_batch):
321 |         saved_for_loss = []
322 | 
323 |         features = self.fpn(img_batch)[1]  # fpn features for detection subnet
324 | 
325 |         ##################################################################################
326 |         # detection subnet
327 |         regression = torch.cat([self.regressionModel(feature) for feature in features], dim=1)
328 |         classification = torch.cat([self.classificationModel(feature) for feature in features], dim=1)
329 |         anchors = self.anchors(img_batch)
330 | 
331 |         saved_for_loss.append(classification)
332 |         saved_for_loss.append(regression)
333 |         saved_for_loss.append(anchors)
334 | 
335 |         return [], saved_for_loss
336 | 
337 |     def prn_forward(self, img_batch):
338 |         saved_for_loss = []
339 | 
340 |         res = self.prn.flatten(img_batch)
341 |         out = self.prn.drop(F.relu(self.prn.dens1(res)))
342 |         out = self.prn.drop(F.relu(self.prn.bneck(out)))
343 |         out = F.relu(self.prn.dens2(out))
344 |         out = self.prn.add(out,res)
345 |         out = self.prn.softmax(out)
346 |         out = out.view(out.size()[0], self.prn.height, self.prn.width, 17)
347 | 
348 |         saved_for_loss.append(out)
349 | 
350 |         return out, saved_for_loss
351 | 
352 |     @staticmethod
353 |     def build_loss(saved_for_loss, *args):
354 | 
355 |         subnet_name = args[0]
356 | 
357 |         if subnet_name == 'keypoint_subnet':
358 |             return build_keypoint_loss(saved_for_loss, args[1], args[2])
359 |         elif subnet_name == 'detection_subnet':
360 |             return build_detection_loss(saved_for_loss, args[1])
361 |         elif subnet_name == 'prn_subnet':
362 |             return build_prn_loss(saved_for_loss, args[1])
363 |         else:
364 |             return 0
365 | 
366 | 
367 | def build_keypoint_loss(saved_for_loss, heat_temp, heat_weight):
368 | 
369 |     names = build_names()
370 |     saved_for_log = OrderedDict()
371 |     criterion = nn.MSELoss(size_average=True).cuda()
372 |     total_loss = 0
373 |     div1 = 1.
374 |     #div2 = 100.
375 | 
376 |     for j in range(5):
377 | 
378 |         pred1 = saved_for_loss[j][:, :18, :, :] * heat_weight
379 |         gt1 = heat_weight * heat_temp
380 | 
381 |         #pred2 = saved_for_loss[j][:, 18:, :, :]
382 |         #gt2 = mask_all
383 | 
384 |         # Compute losses
385 |         loss1 = criterion(pred1, gt1)/div1  # heatmap_loss
386 |         #loss2 = criterion(pred2, gt2)/div2  # mask_loss
387 |         total_loss += loss1
388 |         #total_loss += loss2
389 | 
390 |         # Get value from Tensor and save for log
391 |         saved_for_log[names[j*2]] = loss1.item()
392 |         #saved_for_log[names[j*2+1]] = loss2.item()
393 | 
394 |     saved_for_log['max_ht'] = torch.max(
395 |         saved_for_loss[-1].data[:, :18, :, :]).item()
396 |     saved_for_log['min_ht'] = torch.min(
397 |         saved_for_loss[-1].data[:, :18, :, :]).item()
398 |     #saved_for_log['max_mask'] = torch.max(
399 |     #    saved_for_loss[-1].data[:, 18:, :, :]).item()
400 |     #saved_for_log['min_mask'] = torch.min(
401 |     #    saved_for_loss[-1].data[:, 18:, :, :]).item()
402 | 
403 |     return total_loss, saved_for_log
404 | 
405 | def build_detection_loss(saved_for_loss, anno):
406 |     '''
407 |     :param saved_for_loss: [classifications, regressions, anchors]
408 |     :param anno: annotations
409 |     :return: classification_loss, regression_loss
410 |     '''
411 |     saved_for_log = OrderedDict()
412 | 
413 |     # Compute losses
414 |     focalLoss = losses.FocalLoss()
415 |     classification_loss, regression_loss = focalLoss(*saved_for_loss, anno)
416 |     classification_loss = classification_loss.mean()
417 |     regression_loss = regression_loss.mean()
418 |     total_loss = classification_loss + regression_loss
419 | 
420 |     # Get value from Tensor and save for log
421 |     saved_for_log['total_loss'] = total_loss.item()
422 |     saved_for_log['classification_loss'] = classification_loss.item()
423 |     saved_for_log['regression_loss'] = regression_loss.item()
424 | 
425 |     return total_loss, saved_for_log
426 | 
427 | def build_prn_loss(saved_for_loss, label):
428 |     '''
429 |     :param saved_for_loss: [out]
430 |     :param label: label
431 |     :return: prn loss
432 |     '''
433 |     saved_for_log = OrderedDict()
434 | 
435 |     criterion = nn.BCELoss(size_average=True).cuda()
436 |     total_loss = 0
437 | 
438 |     # Compute losses
439 |     loss1 = criterion(saved_for_loss[0], label)
440 |     total_loss += loss1
441 | 
442 |     # Get value from Tensor and save for log
443 |     saved_for_log['PRN loss'] = loss1.item()
444 | 
445 |     return total_loss, saved_for_log
446 | 
447 | def build_names():
448 |     names = []
449 |     for j in range(2, 6):
450 |         names.append('heatmap_loss_k%d' % j)
451 |         names.append('seg_loss_k%d' % j)
452 |     names.append('heatmap_loss')
453 |     names.append('seg_loss')
454 |     return names
455 | 
456 | 


--------------------------------------------------------------------------------
/datasets/coco_data/COCO_data_pipeline.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | import os
  3 | 
  4 | import cv2
  5 | import numpy as np
  6 | 
  7 | import torch
  8 | from datasets.coco_data.heatmap import putGaussianMaps
  9 | from datasets.coco_data.ImageAugmentation import (aug_croppad, aug_flip, aug_rotate, aug_scale,
 10 |                                                   aug_croppad_bbox, aug_flip_bbox, aug_rotate_bbox, aug_scale_bbox)
 11 | from datasets.coco_data.preprocessing import resnet_preprocess
 12 | from torch.utils.data import DataLoader, Dataset
 13 | from functools import partial, reduce
 14 | 
 15 | from pycocotools.coco import COCO, maskUtils
 16 | 
 17 | '''
 18 | train2014  : 82783 simages
 19 | val2014    : 40504 images
 20 | 
 21 | first 2644 of val2014 marked by 'isValidation = 1', as our minval dataset.
 22 | So all training data have 82783+40504-2644 = 120643 samples
 23 | '''
 24 | 
 25 | params_transform = dict()
 26 | params_transform['mode'] = 5
 27 | # === aug_scale ===
 28 | params_transform['scale_min'] = 0.8
 29 | params_transform['scale_max'] = 1.2
 30 | params_transform['scale_prob'] = 1
 31 | params_transform['target_dist'] = 0.6
 32 | # === aug_rotate ===
 33 | params_transform['max_rotate_degree'] = 40
 34 | 
 35 | # ===
 36 | params_transform['center_perterb_max'] = 40
 37 | 
 38 | # === aug_flip ===
 39 | params_transform['flip_prob'] = 0.3
 40 | 
 41 | params_transform['np'] = 56
 42 | params_transform['sigma'] = 7.0
 43 | 
 44 | def annToRLE(ann, height, width):
 45 |     """
 46 |     Convert annotation which can be polygons, uncompressed RLE to RLE.
 47 |     :return: binary mask (numpy 2D array)
 48 |     """
 49 |     segm = ann['segmentation']
 50 |     if isinstance(segm, list):
 51 |         # polygon -- a single object might consist of multiple parts
 52 |         # we merge all parts into one mask rle code
 53 |         rles = maskUtils.frPyObjects(segm, height, width)
 54 |         rle = maskUtils.merge(rles)
 55 |     elif isinstance(segm['counts'], list):
 56 |         # uncompressed RLE
 57 |         rle = maskUtils.frPyObjects(segm, height, width)
 58 |     else:
 59 |         # rle
 60 |         rle = ann['segmentation']
 61 |     return rle
 62 | 
 63 | 
 64 | def annToMask(ann, height, width):
 65 |     """
 66 |     Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
 67 |     :return: binary mask (numpy 2D array)
 68 |     """
 69 |     rle = annToRLE(ann, height, width)
 70 |     m = maskUtils.decode(rle)
 71 |     return m
 72 | 
 73 | class Cocokeypoints(Dataset):
 74 |     def __init__(self, root, mask_dir, index_list, data, inp_size, feat_stride, preprocess='resnet', transform=None,
 75 |                  target_transform=None):
 76 | 
 77 |         params_transform['crop_size_x'] = inp_size
 78 |         params_transform['crop_size_y'] = inp_size
 79 |         params_transform['stride'] = feat_stride
 80 | 
 81 |         # add preprocessing as a choice, so we don't modify it manually.
 82 |         self.preprocess = preprocess
 83 |         self.data = data
 84 |         self.mask_dir = mask_dir
 85 |         self.numSample = len(index_list)
 86 |         self.index_list = index_list
 87 |         self.root = root
 88 |         self.transform = transform
 89 |         self.target_transform = target_transform
 90 | 
 91 |     def get_anno(self, meta_data):
 92 |         """
 93 |         get meta information
 94 |         """
 95 |         anno = dict()
 96 |         anno['dataset'] = meta_data['dataset']
 97 |         anno['img_height'] = int(meta_data['img_height'])
 98 |         anno['img_width'] = int(meta_data['img_width'])
 99 | 
100 |         anno['isValidation'] = meta_data['isValidation']
101 |         anno['people_index'] = int(meta_data['people_index'])
102 |         anno['annolist_index'] = int(meta_data['annolist_index'])
103 | 
104 |         # (b) objpos_x (float), objpos_y (float)
105 |         anno['objpos'] = np.array(meta_data['objpos'])
106 |         anno['scale_provided'] = meta_data['scale_provided']
107 |         anno['joint_self'] = np.array(meta_data['joint_self'])
108 | 
109 |         anno['numOtherPeople'] = int(meta_data['numOtherPeople'])
110 |         anno['num_keypoints_other'] = np.array(
111 |             meta_data['num_keypoints_other'])
112 |         anno['joint_others'] = np.array(meta_data['joint_others'])
113 |         anno['objpos_other'] = np.array(meta_data['objpos_other'])
114 |         anno['scale_provided_other'] = meta_data['scale_provided_other']
115 |         anno['bbox_other'] = meta_data['bbox_other']
116 |         anno['segment_area_other'] = meta_data['segment_area_other']
117 | 
118 |         if anno['numOtherPeople'] == 1:
119 |             anno['joint_others'] = np.expand_dims(anno['joint_others'], 0)
120 |             anno['objpos_other'] = np.expand_dims(anno['objpos_other'], 0)
121 |         return anno
122 | 
123 |     def add_neck(self, meta):
124 |         '''
125 |         MS COCO annotation order:
126 |         0: nose	   		1: l eye		2: r eye	3: l ear	4: r ear
127 |         5: l shoulder	6: r shoulder	7: l elbow	8: r elbow
128 |         9: l wrist		10: r wrist		11: l hip	12: r hip	13: l knee
129 |         14: r knee		15: l ankle		16: r ankle
130 | 
131 |         The order in this work:
132 |         (0-'nose'	1-'neck' 2-'right_shoulder' 3-'right_elbow' 4-'right_wrist'
133 |         5-'left_shoulder' 6-'left_elbow'	    7-'left_wrist'  8-'right_hip'
134 |         9-'right_knee'	 10-'right_ankle'	11-'left_hip'   12-'left_knee'
135 |         13-'left_ankle'	 14-'right_eye'	    15-'left_eye'   16-'right_ear'
136 |         17-'left_ear' )
137 |         '''
138 |         our_order = [0, 17, 6, 8, 10, 5, 7, 9,
139 |                      12, 14, 16, 11, 13, 15, 2, 1, 4, 3]
140 |         # Index 6 is right shoulder and Index 5 is left shoulder
141 |         right_shoulder = meta['joint_self'][6, :]
142 |         left_shoulder = meta['joint_self'][5, :]
143 |         neck = (right_shoulder + left_shoulder) / 2
144 |         if right_shoulder[2] == 2 or left_shoulder[2] == 2:
145 |             neck[2] = 2
146 |         elif right_shoulder[2] == 1 or left_shoulder[2] == 1:
147 |             neck[2] = 1
148 |         else:
149 |             neck[2] = right_shoulder[2] * left_shoulder[2]
150 | 
151 |         neck = neck.reshape(1, len(neck))
152 |         neck = np.round(neck)
153 |         meta['joint_self'] = np.vstack((meta['joint_self'], neck))
154 |         meta['joint_self'] = meta['joint_self'][our_order, :]
155 |         temp = []
156 | 
157 |         for i in range(meta['numOtherPeople']):
158 |             right_shoulder = meta['joint_others'][i, 6, :]
159 |             left_shoulder = meta['joint_others'][i, 5, :]
160 |             neck = (right_shoulder + left_shoulder) / 2
161 |             if (right_shoulder[2] == 2 or left_shoulder[2] == 2):
162 |                 neck[2] = 2
163 |             elif (right_shoulder[2] == 1 or left_shoulder[2] == 1):
164 |                 neck[2] = 1
165 |             else:
166 |                 neck[2] = right_shoulder[2] * left_shoulder[2]
167 |             neck = neck.reshape(1, len(neck))
168 |             neck = np.round(neck)
169 |             single_p = np.vstack((meta['joint_others'][i], neck))
170 |             single_p = single_p[our_order, :]
171 |             temp.append(single_p)
172 |         meta['joint_others'] = np.array(temp)
173 | 
174 |         return meta
175 | 
176 |     def remove_illegal_joint(self, meta):
177 |         crop_x = int(params_transform['crop_size_x'])
178 |         crop_y = int(params_transform['crop_size_y'])
179 |         mask = np.logical_or.reduce((meta['joint_self'][:, 0] >= crop_x,
180 |                                      meta['joint_self'][:, 0] < 0,
181 |                                      meta['joint_self'][:, 1] >= crop_y,
182 |                                      meta['joint_self'][:, 1] < 0))
183 |         # out_bound = np.nonzero(mask)
184 |         # print(mask.shape)
185 |         meta['joint_self'][mask == True, :] = (1, 1, 2)
186 |         if (meta['numOtherPeople'] != 0):
187 |             mask = np.logical_or.reduce((meta['joint_others'][:, :, 0] >= crop_x,
188 |                                          meta['joint_others'][:, :, 0] < 0,
189 |                                          meta['joint_others'][:,
190 |                                                               :, 1] >= crop_y,
191 |                                          meta['joint_others'][:, :, 1] < 0))
192 |             meta['joint_others'][mask == True, :] = (1, 1, 2)
193 | 
194 |         return meta
195 | 
196 |     def get_ground_truth(self, meta, mask_miss):
197 | 
198 |         number_keypoints = 18
199 | 
200 |         stride = params_transform['stride']
201 |         mode = params_transform['mode']
202 |         crop_size_y = params_transform['crop_size_y']
203 |         crop_size_x = params_transform['crop_size_x']
204 |         num_parts = params_transform['np']
205 |         nop = meta['numOtherPeople']
206 |         grid_y = int(crop_size_y / stride)
207 |         grid_x = int(crop_size_x / stride)
208 |         channels = (num_parts + 1) * 2
209 |         heatmaps = np.zeros((grid_y, grid_x, number_keypoints))
210 | 
211 |         mask_miss = cv2.resize(mask_miss, (0, 0), fx=1.0 / stride, fy=1.0 / stride,
212 |                                interpolation=cv2.INTER_CUBIC).astype(np.float32)
213 |         mask_miss = mask_miss / 255.
214 |         mask_miss = np.expand_dims(mask_miss, axis=2)
215 |         heat_mask = np.repeat(mask_miss, number_keypoints, axis=2)  # 19
216 | 
217 |         #mask_all = cv2.resize(mask_all, (0, 0), fx=1.0 / stride, fy=1.0 / stride,
218 |         #                      interpolation=cv2.INTER_CUBIC).astype(np.float32)
219 |         #mask_all = mask_all / 255.
220 |         #mask_all = np.expand_dims(mask_all, axis=2)
221 | 
222 |         # confidance maps for body parts
223 |         for i in range(number_keypoints):
224 |             if (meta['joint_self'][i, 2] <= 1):
225 |                 center = meta['joint_self'][i, :2]
226 |                 gaussian_map = heatmaps[:, :, i]
227 |                 heatmaps[:, :, i] = putGaussianMaps(
228 |                     center, gaussian_map, params_transform=params_transform)
229 |             for j in range(nop):
230 |                 if (meta['joint_others'][j, i, 2] <= 1):
231 |                     center = meta['joint_others'][j, i, :2]
232 |                     gaussian_map = heatmaps[:, :, i]
233 |                     heatmaps[:, :, i] = putGaussianMaps(
234 |                         center, gaussian_map, params_transform=params_transform)
235 | 
236 |         return heat_mask, heatmaps
237 | 
238 |     def __getitem__(self, index):
239 |         idx = self.index_list[index]
240 |         img = cv2.imread(os.path.join(self.root, self.data[idx]['img_paths']))
241 |         img_idx = self.data[idx]['img_paths'][-16:-3]
242 | #        print img.shape
243 |         if "COCO_val" in self.data[idx]['dataset']:
244 |             mask_miss = cv2.imread(
245 |                 self.mask_dir + 'mask2014/val2014_mask_miss_' + img_idx + 'png', 0)
246 |             #mask_all = cv2.imread(
247 |             #    self.mask_dir + 'mask2014/val2014_mask_all_' + img_idx + 'png', 0)
248 |         elif "COCO" in self.data[idx]['dataset']:
249 |             mask_miss = cv2.imread(
250 |                 self.mask_dir + 'mask2014/train2014_mask_miss_' + img_idx + 'png', 0)
251 |             #mask_all = cv2.imread(
252 |             #    self.mask_dir + 'mask2014/train2014_mask_all_' + img_idx + 'png', 0)
253 |         meta_data = self.get_anno(self.data[idx])
254 | 
255 |         meta_data = self.add_neck(meta_data)
256 | 
257 |         augmentations = [
258 |             partial(aug_meth, params_transform=params_transform)
259 |             for aug_meth in [
260 |                 aug_scale,
261 |                 aug_rotate,
262 |                 aug_croppad,
263 |                 aug_flip
264 |             ]
265 |         ]
266 | 
267 |         meta_data, img, mask_miss = reduce(
268 |             lambda md_i_mm_ma, f: f(*md_i_mm_ma),
269 |             augmentations,
270 |             (meta_data, img, mask_miss)
271 |         )
272 | 
273 |         meta_data = self.remove_illegal_joint(meta_data)
274 | 
275 |         heat_mask, heatmaps = self.get_ground_truth(
276 |             meta_data, mask_miss)
277 | 
278 |         # image preprocessing, which comply the model
279 |         # trianed on Imagenet dataset
280 |         if self.preprocess == 'resnet':
281 |             img = resnet_preprocess(img)
282 | 
283 |         img = torch.from_numpy(img)
284 |         heatmaps = torch.from_numpy(
285 |             heatmaps.transpose((2, 0, 1)).astype(np.float32))
286 |         heat_mask = torch.from_numpy(
287 |             heat_mask.transpose((2, 0, 1)).astype(np.float32))
288 |         #mask_all = torch.from_numpy(
289 |         #    mask_all.transpose((2, 0, 1)).astype(np.float32))
290 | 
291 |         return img, heatmaps, heat_mask#, mask_all
292 | 
293 |     def __len__(self):
294 |         return self.numSample
295 | 
296 | class Cocobbox(Dataset):
297 |     def __init__(self, root, mask_dir, index_list, data, inp_size, feat_stride, coco,
298 |                  preprocess='resnet', training=True):
299 | 
300 |         params_transform['crop_size_x'] = inp_size
301 |         params_transform['crop_size_y'] = inp_size
302 |         params_transform['stride'] = feat_stride
303 | 
304 |         # add preprocessing as a choice, so we don't modify it manually.
305 |         self.preprocess = preprocess
306 |         self.data = data
307 |         self.index_list = index_list
308 |         self.numSample = len(self.index_list)
309 |         self.training = training
310 | 
311 |         if self.training:
312 |             img_path = os.path.join(root, 'train2017')
313 |         else:
314 |             img_path = os.path.join(root, 'val2017')
315 | 
316 |         self.instance_info_list, self.image_path_list = self.get_instance_info_list(img_path, coco)
317 | 
318 |     def get_instance_info_list(self, img_path, coco):
319 | 
320 |         instance_info_list = []
321 |         image_path_list = []
322 | 
323 |         for idx in self.index_list:
324 |             image_info = coco.loadImgs(int(self.data[idx]['image_id']))[0]
325 |             image_path = os.path.join(img_path, image_info['file_name'])
326 |             if not os.path.exists(image_path):
327 |                 print(
328 |                     "[skip] json annotation found, but cannot found image: {}".format(image_path))
329 |                 continue
330 |             image_path_list.append(image_path)
331 | 
332 |             annos_ids = coco.getAnnIds(imgIds=self.data[idx]['image_id'])
333 |             annos_info = coco.loadAnns(annos_ids)
334 |             instance_info = {}
335 |             instance_info["anns"] = annos_info
336 |             instance_info["height"] = image_info["height"]
337 |             instance_info["width"] = image_info["width"]
338 |             instance_info_list.append(instance_info)
339 | 
340 |         return instance_info_list, image_path_list
341 | 
342 |     def get_instance_mask(self, instance_info):
343 |         height = instance_info['height']
344 |         width = instance_info['width']
345 |         anns = instance_info['anns']
346 | 
347 |         instance_masks = []
348 |         class_ids = []
349 |         for anno in anns:
350 |             class_id = 1
351 |             m = annToMask(anno, height, width)
352 |             # Some objects are so small that they're less than 1 pixel area
353 |             # and end up rounded out. Skip those objects.
354 |             if m.max() < 1:
355 |                 continue
356 |             # Is it a crowd? If so, use a negative class ID.
357 |             if anno['iscrowd']:
358 |                 # Use negative class ID for crowds
359 |                 class_id = -1
360 |                 # For crowd masks, annToMask() sometimes returns a mask
361 |                 # smaller than the given dimensions. If so, resize it.
362 |                 if m.shape[0] != height or m.shape[1] != width:
363 |                     m = np.ones([height, width], dtype=bool)
364 |             instance_masks.append(m)
365 |             class_ids.append(class_id)
366 |         return instance_masks, class_ids
367 | 
368 |     def get_anno(self, meta_data, instance_info):
369 |         """
370 |         get meta information
371 |         """
372 |         anno = dict()
373 | 
374 |         # (b) objpos_x (float), objpos_y (float)
375 |         anno['objpos'] = np.array(meta_data['objpos'])
376 |         anno['scale_provided'] = meta_data['scale_provided']
377 | 
378 |         anno['instance_mask_list'], anno['instance_cls_list'] = self.get_instance_mask(instance_info)
379 | 
380 |         return anno
381 | 
382 |     def get_ground_truth(self, meta, instance_info):
383 |         extracted_bbox = []
384 | 
385 |         for m_idx, m in enumerate(meta['instance_mask_list']):
386 |             if meta['instance_cls_list'][m_idx] == -1:  # is_crowd = 1
387 |                 if instance_info['anns'][m_idx]['iscrowd'] != 1:
388 |                     print('is_crowd error')
389 |                 continue
390 |             horizontal_indicies = np.where(np.any(m, axis=0))[0]
391 |             vertical_indicies = np.where(np.any(m, axis=1))[0]
392 |             if horizontal_indicies.shape[0]:
393 |                 x1, x2 = horizontal_indicies[[0, -1]]
394 |                 y1, y2 = vertical_indicies[[0, -1]]
395 |                 # x2 and y2 should not be part of the box. Increment by 1.
396 |                 x2 += 1
397 |                 y2 += 1
398 |                 bbox_cls = 0
399 |             else:
400 |                 # No mask for this instance. Might happen due to
401 |                 # resizing or cropping. Set bbox to zeros
402 |                 x1, x2, y1, y2, bbox_cls = -1, -1, -1, -1, -1
403 |             extracted_bbox.append([x1, y1, x2, y2, bbox_cls])
404 | 
405 |         return extracted_bbox
406 | 
407 |     def __getitem__(self, index):
408 |         img = cv2.imread(self.image_path_list[index])
409 | 
410 |         idx = self.index_list[index]
411 |         meta_data = self.get_anno(self.data[idx], self.instance_info_list[index])
412 | 
413 |         augmentations = [
414 |             partial(aug_meth, params_transform=params_transform)
415 |             for aug_meth in [
416 |                 aug_scale_bbox,
417 |                 aug_rotate_bbox,
418 |                 aug_croppad_bbox,
419 |                 aug_flip_bbox
420 |             ]
421 |         ]
422 | 
423 |         meta_data, img = reduce(
424 |             lambda md_i_mm_ma, f: f(*md_i_mm_ma),
425 |             augmentations,
426 |             (meta_data, img)
427 |         )
428 | 
429 |         extracted_bbox = self.get_ground_truth(meta_data, self.instance_info_list[index])
430 | 
431 |         # image preprocessing, which comply the model
432 |         # trianed on Imagenet dataset
433 |         if self.preprocess == 'resnet':
434 |             img = resnet_preprocess(img)
435 | 
436 |         img = torch.from_numpy(img)
437 |         bbox = torch.from_numpy(np.array(extracted_bbox).astype(np.float32))
438 | 
439 |         return img, bbox
440 | 
441 |     def __len__(self):
442 |         return self.numSample
443 | 
444 | def bbox_collater(data):
445 |     imgs = torch.stack([s[0] for s in data], 0)
446 |     bbox = [s[1] for s in data]
447 | 
448 |     max_num_annots = max(bb.shape[0] for bb in bbox)
449 | 
450 |     bbox_padded = torch.ones((len(bbox), max_num_annots, 5)) * -1
451 |     #print(annot_padded.shape)
452 |     if max_num_annots > 0:
453 |         for idx, bb in enumerate(bbox):
454 |             #print(annot.shape)
455 |             if bb.shape[0] > 0:
456 |                 bbox_padded[idx, :bb.shape[0], :] = bb
457 | 
458 |     return imgs, bbox_padded


--------------------------------------------------------------------------------
/evaluate/tester.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import os
  4 | import cv2
  5 | import math
  6 | import datetime
  7 | import numpy as np
  8 | import json
  9 | from collections import OrderedDict
 10 | from network.joint_utils import get_joint_list, plot_result
 11 | from tqdm import tqdm
 12 | 
 13 | import torch
 14 | import torch.nn as nn
 15 | from lib.utils.log import logger
 16 | import lib.utils.meter as meter_utils
 17 | import network.net_utils as net_utils
 18 | from lib.utils.timer import Timer
 19 | from datasets.coco_data.preprocessing import resnet_preprocess
 20 | from datasets.coco_data.prn_gaussian import gaussian, crop
 21 | 
 22 | from pycocotools.coco import COCO
 23 | from pycocotools.cocoeval import COCOeval
 24 | 
 25 | 
 26 | def _factor_closest(num, factor, is_ceil=True):
 27 |     """Returns the closest integer to `num` that is divisible by `factor`
 28 | 
 29 |     Actually, that's a lie. By default, we return the closest factor _greater_
 30 |     than the input. If, however, you set `it_ceil` to `False`, we return the
 31 |     closest factor _less_ than the input.
 32 |     """
 33 |     num = float(num) / factor
 34 |     num = np.ceil(num) if is_ceil else np.floor(num)
 35 |     return int(num) * factor
 36 | 
 37 | 
 38 | def crop_with_factor(im, dest_size, factor=32, pad_val=0, basedon='min'):
 39 |     """Scale and pad an image to the desired size and divisibility
 40 | 
 41 |     Scale the specified dimension of the input image to `dest_size` then pad
 42 |     the result until it is cleanly divisible by `factor`.
 43 | 
 44 |     Args:
 45 |         im (Image): The input image.
 46 |         dest_size (int): The desired size of the unpadded, scaled image's
 47 |             dimension specified by `basedon`.
 48 |         factor (int): Pad the scaled image until it is factorable
 49 |         pad_val (number): Value to pad with.
 50 |         basedon (string): Specifies which dimension to base the scaling on.
 51 |             Valid inputs are 'min', 'max', 'w', and 'h'. Defaults to 'min'.
 52 | 
 53 |     Returns:
 54 |         A tuple of three elements:
 55 |             - The scaled and padded image.
 56 |             - The scaling factor.
 57 |             - The size of the non-padded part of the resulting image.
 58 |     """
 59 |     # Compute the scaling factor.
 60 |     im_size_min, im_size_max = np.min(im.shape[0:2]), np.max(im.shape[0:2])
 61 |     im_base = {'min': im_size_min,
 62 |                'max': im_size_max,
 63 |                'w': im.shape[1],
 64 |                'h': im.shape[0]}
 65 |     im_scale = float(dest_size) / im_base.get(basedon, im_size_min)
 66 | 
 67 |     # Scale the image.
 68 |     im = cv2.resize(im, None, fx=im_scale, fy=im_scale)
 69 | 
 70 |     # Compute the padded image shape. Ensure it's divisible by factor.
 71 |     h, w = im.shape[:2]
 72 |     new_h, new_w = _factor_closest(h, factor), _factor_closest(w, factor)
 73 |     # new_ = max(new_h, new_w)
 74 |     new_shape = [new_h, new_w] if im.ndim < 3 else [new_h, new_w, im.shape[-1]]
 75 |     # new_shape = [new_, new_] if im.ndim < 3 else [new_, new_, im.shape[-1]]
 76 | 
 77 |     # Pad the image.
 78 |     im_padded = np.full(new_shape, fill_value=pad_val, dtype=im.dtype)
 79 |     im_padded[0:h, 0:w] = im
 80 | 
 81 |     return im_padded, im_scale, im.shape
 82 | 
 83 | 
 84 | class TestParams(object):
 85 | 
 86 |     trunk = 'resnet101'  # select the model
 87 |     coeff = 2
 88 |     in_thres = 0.21
 89 | 
 90 |     testdata_dir = './demo/test_images/'
 91 |     testresult_dir = './demo/output/'
 92 |     testresult_write_image = False  # write image results or not
 93 |     testresult_write_json = False  # write json results or not
 94 |     gpus = [0]
 95 |     ckpt = './demo/models/ckpt_baseline_resnet101.h5'  # checkpoint file to load, no need to change this
 96 |     coco_root = 'coco_root/'
 97 |     coco_result_filename = './extra/multipose_coco2017_results.json'
 98 | 
 99 |     # # required params
100 |     inp_size = 480  # input size 480*480
101 |     exp_name = 'multipose101'
102 |     subnet_name = 'keypoint_subnet'
103 |     batch_size = 32
104 |     print_freq = 20
105 | 
106 | class Tester(object):
107 | 
108 |     TestParams = TestParams
109 | 
110 |     def __init__(self, model, train_params, batch_processor=None, val_data=None):
111 |         assert isinstance(train_params, TestParams)
112 |         self.params = train_params
113 |         self.batch_timer = Timer()
114 |         self.data_timer = Timer()
115 |         self.val_data = val_data if val_data else None
116 |         self.batch_processor = batch_processor if batch_processor else None
117 | 
118 |         # load model
119 |         self.model = model
120 |         ckpt = self.params.ckpt
121 | 
122 |         if ckpt is not None:
123 |             self._load_ckpt(ckpt)
124 |             logger.info('Load ckpt from {}'.format(ckpt))
125 | 
126 |         self.model = nn.DataParallel(self.model, device_ids=self.params.gpus)
127 |         self.model = self.model.cuda(device=self.params.gpus[0])
128 |         self.model.eval()
129 |         self.model.module.freeze_bn()
130 | 
131 |     def coco_eval(self):
132 | 
133 |         coco_val = os.path.join(self.params.coco_root, 'annotations/person_keypoints_val2017.json')
134 |         coco = COCO(coco_val)
135 |         img_ids = coco.getImgIds(catIds=[1])
136 | 
137 |         multipose_results = []
138 |         coco_order = [0, 14, 13, 16, 15, 4, 1, 5, 2, 6, 3, 10, 7, 11, 8, 12, 9]
139 | 
140 |         for img_id in tqdm(img_ids):
141 | 
142 |             img_name = coco.loadImgs(img_id)[0]['file_name']
143 | 
144 |             oriImg = cv2.imread(os.path.join(self.params.coco_root, 'images/val2017/', img_name)).astype(np.float32)
145 |             multiplier = self._get_multiplier(oriImg)
146 | 
147 |             # Get results of original image
148 |             orig_heat, orig_bbox_all = self._get_outputs(multiplier, oriImg)
149 | 
150 |             # Get results of flipped image
151 |             swapped_img = oriImg[:, ::-1, :]
152 |             flipped_heat, flipped_bbox_all = self._get_outputs(multiplier, swapped_img)
153 | 
154 |             # compute averaged heatmap
155 |             heatmaps = self._handle_heat(orig_heat, flipped_heat)
156 | 
157 |             # segment_map = heatmaps[:, :, 17]
158 |             param = {'thre1': 0.1, 'thre2': 0.05, 'thre3': 0.5}
159 |             joint_list = get_joint_list(oriImg, param, heatmaps[:, :, :18], 1)
160 |             joint_list = joint_list.tolist()
161 | 
162 |             joints = []
163 |             for joint in joint_list:
164 |                 if int(joint[-1]) != 1:
165 |                     joint[-1] = max(0, int(joint[-1]) - 1)
166 |                     joints.append(joint)
167 |             joint_list = joints
168 | 
169 |             prn_result = self.prn_process(joint_list, orig_bbox_all[1], img_name, img_id)
170 |             for result in prn_result:
171 |                 keypoints = result['keypoints']
172 |                 coco_keypoint = []
173 |                 for i in range(17):
174 |                     coco_keypoint.append(keypoints[coco_order[i] * 3])
175 |                     coco_keypoint.append(keypoints[coco_order[i] * 3 + 1])
176 |                     coco_keypoint.append(keypoints[coco_order[i] * 3 + 2])
177 |                 result['keypoints'] = coco_keypoint
178 |                 multipose_results.append(result)
179 | 
180 |         ann_filename = self.params.coco_result_filename
181 |         with open(ann_filename, "w") as f:
182 |             json.dump(multipose_results, f, indent=4)
183 |         # load results in COCO evaluation tool
184 |         coco_pred = coco.loadRes(ann_filename)
185 |         # run COCO evaluation
186 |         coco_eval = COCOeval(coco, coco_pred, 'keypoints')
187 |         coco_eval.params.imgIds = img_ids
188 |         coco_eval.evaluate()
189 |         coco_eval.accumulate()
190 |         coco_eval.summarize()
191 | 
192 |         if not self.params.testresult_write_json:
193 |             os.remove(ann_filename)
194 | 
195 |     def test(self):
196 | 
197 |         img_list = os.listdir(self.params.testdata_dir)
198 |         multipose_results = []
199 | 
200 |         for img_name in tqdm(img_list):
201 | 
202 |             img = cv2.imread(os.path.join(self.params.testdata_dir, img_name)).astype(np.float32)
203 |             shape_dst = np.max(img.shape)
204 |             scale = float(shape_dst) / self.params.inp_size
205 |             pad_size = np.abs(img.shape[1] - img.shape[0])
206 |             img_resized = np.pad(img, ([0, pad_size], [0, pad_size], [0, 0]), 'constant')[:shape_dst, :shape_dst]
207 |             img_resized = cv2.resize(img_resized, (self.params.inp_size, self.params.inp_size))
208 |             img_input = resnet_preprocess(img_resized)
209 |             img_input = torch.from_numpy(np.expand_dims(img_input, 0))
210 | 
211 |             with torch.no_grad():
212 |                 img_input = img_input.cuda(device=self.params.gpus[0])
213 | 
214 |             heatmaps, [scores, classification, transformed_anchors] = self.model([img_input, self.params.subnet_name])
215 |             heatmaps = heatmaps.cpu().detach().numpy()
216 |             heatmaps = np.squeeze(heatmaps, 0)
217 |             heatmaps = np.transpose(heatmaps, (1, 2, 0))
218 |             heatmap_max = np.max(heatmaps[:, :, :18], 2)
219 |             # segment_map = heatmaps[:, :, 17]
220 |             param = {'thre1': 0.1, 'thre2': 0.05, 'thre3': 0.5}
221 |             joint_list = get_joint_list(img_resized, param, heatmaps[:, :, :18], scale)
222 |             joint_list = joint_list.tolist()
223 |             del img_resized
224 | 
225 |             joints = []
226 |             for joint in joint_list:
227 |                 if int(joint[-1]) != 1:
228 |                     joint[-1] = max(0, int(joint[-1]) - 1)
229 |                     joints.append(joint)
230 |             joint_list = joints
231 | 
232 |             # bounding box from retinanet
233 |             scores = scores.cpu().detach().numpy()
234 |             classification = classification.cpu().detach().numpy()
235 |             transformed_anchors = transformed_anchors.cpu().detach().numpy()
236 |             idxs = np.where(scores > 0.5)
237 |             bboxs=[]
238 |             for j in range(idxs[0].shape[0]):
239 |                 bbox = transformed_anchors[idxs[0][j], :]*scale
240 |                 if int(classification[idxs[0][j]]) == 0:  # class0=people
241 |                     bboxs.append(bbox.tolist())
242 | 
243 |             prn_result = self.prn_process(joint_list, bboxs, img_name)
244 |             for result in prn_result:
245 |                 multipose_results.append(result)
246 | 
247 |             if self.params.testresult_write_image:
248 |                 canvas = plot_result(img, prn_result)
249 |                 cv2.imwrite(os.path.join(self.params.testresult_dir, img_name.split('.', 1)[0] + '_1heatmap.png'), heatmap_max * 256)
250 |                 cv2.imwrite(os.path.join(self.params.testresult_dir, img_name.split('.', 1)[0] + '_2canvas.png'), canvas)
251 | 
252 |         if self.params.testresult_write_json:
253 |             with open(self.params.testresult_dir+'multipose_results.json', "w") as f:
254 |                 json.dump(multipose_results, f)
255 | 
256 |     def _get_multiplier(self, img):
257 |         """Computes the sizes of image at different scales
258 |         :param img: numpy array, the current image
259 |         :returns : list of float. The computed scales
260 |         """
261 |         scale_search = [0.5, 1., 1.5, 2, 2.5]
262 |         return [x * self.params.inp_size / float(img.shape[0]) for x in scale_search]
263 | 
264 |     def _get_outputs(self, multiplier, img):
265 |         """Computes the averaged heatmap and paf for the given image
266 |         :param multiplier:
267 |         :param origImg: numpy array, the image being processed
268 |         :param model: pytorch model
269 |         :returns: numpy arrays, the averaged paf and heatmap
270 |         """
271 | 
272 |         heatmap_avg = np.zeros((img.shape[0], img.shape[1], 18))
273 |         bbox_all = []
274 |         # max_scale = multiplier[-1]
275 |         # max_size = max_scale * img.shape[0]
276 |         # # padding
277 |         # max_cropped, _, _ = crop_with_factor(
278 |         #     img, max_size, factor=32)
279 | 
280 |         for m in range(len(multiplier)):
281 |             scale = multiplier[m]
282 |             inp_size = scale * img.shape[0]
283 | 
284 |             # padding
285 |             im_cropped, im_scale, real_shape = crop_with_factor(
286 |                 img, inp_size, factor=32, pad_val=128)
287 |             im_data = resnet_preprocess(im_cropped)
288 | 
289 |             im_data = np.expand_dims(im_data, 0)
290 |             with torch.no_grad():
291 |                 im_data = torch.from_numpy(im_data).type(torch.FloatTensor).cuda(device=self.params.gpus[0])
292 | 
293 |             heatmaps, [scores, classification, transformed_anchors] = self.model([im_data, self.params.subnet_name])
294 |             heatmaps = heatmaps.cpu().detach().numpy().transpose(0, 2, 3, 1)
295 |             scores = scores.cpu().detach().numpy()
296 |             classification = classification.cpu().detach().numpy()
297 |             transformed_anchors = transformed_anchors.cpu().detach().numpy()
298 | 
299 |             heatmap = heatmaps[0, :int(im_cropped.shape[0] / 4), :int(im_cropped.shape[1] / 4), :]
300 |             heatmap = cv2.resize(heatmap, None, fx=4, fy=4, interpolation=cv2.INTER_CUBIC)
301 |             heatmap = heatmap[0:real_shape[0], 0:real_shape[1], :]
302 |             heatmap = cv2.resize(
303 |                 heatmap, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC)
304 | 
305 |             heatmap_avg = heatmap_avg + heatmap / len(multiplier)
306 | 
307 |             # bboxs
308 |             idxs = np.where(scores > 0.5)
309 |             bboxs=[]
310 |             for j in range(idxs[0].shape[0]):
311 |                 bbox = transformed_anchors[idxs[0][j], :]/im_scale
312 |                 if int(classification[idxs[0][j]]) == 0:  # class0=people
313 |                     bboxs.append(bbox.tolist())
314 |             bbox_all.append(bboxs)
315 | 
316 |         return heatmap_avg, bbox_all
317 | 
318 |     def _handle_heat(self, normal_heat, flipped_heat):
319 |         """Compute the average of normal and flipped heatmap
320 |         :param normal_heat: numpy array, the normal heatmap
321 |         :param flipped_heat: numpy array, the flipped heatmap
322 |         :returns: numpy arrays, the averaged heatmap
323 |         """
324 | 
325 |         # The order to swap left and right of heatmap
326 |         swap_heat = np.array((0, 1, 5, 6, 7, 2, 3, 4, 11, 12,
327 |                               13, 8, 9, 10, 15, 14, 17, 16))#, 18
328 | 
329 |         averaged_heatmap = (normal_heat + flipped_heat[:, ::-1, :][:, :, swap_heat]) / 2.
330 | 
331 |         return averaged_heatmap
332 | 
333 |     def prn_process(self, kps, bbox_list, file_name, image_id=0):
334 | 
335 |         prn_result = []
336 | 
337 |         idx = 0
338 |         ks = []
339 |         for j in range(17):  # joint type
340 |             t = []
341 |             for k in kps:
342 |                 if k[-1] == j:  # joint type
343 |                     x = k[0]
344 |                     y = k[1]
345 |                     v = 1  # k[2]
346 |                     if v > 0:
347 |                         t.append([x, y, 1, idx])
348 |                         idx += 1
349 |             ks.append(t)
350 |         peaks = ks
351 | 
352 |         w = int(18 * self.params.coeff)
353 |         h = int(28 * self.params.coeff)
354 | 
355 |         bboxes = []
356 |         for bbox_item in bbox_list:
357 |             bboxes.append([bbox_item[0], bbox_item[1], bbox_item[2]-bbox_item[0], bbox_item[3]-bbox_item[1]])
358 | 
359 |         if len(bboxes) == 0 or len(peaks) == 0:
360 |             return prn_result
361 | 
362 |         weights_bbox = np.zeros((len(bboxes), h, w, 4, 17))
363 | 
364 |         for joint_id, peak in enumerate(peaks):  # joint_id: which joint
365 |             for instance_id, instance in enumerate(peak):  # instance_id: which people
366 |                 p_x = instance[0]
367 |                 p_y = instance[1]
368 |                 for bbox_id, b in enumerate(bboxes):
369 |                     is_inside = p_x > b[0] - b[2] * self.params.in_thres and \
370 |                                 p_y > b[1] - b[3] * self.params.in_thres and \
371 |                                 p_x < b[0] + b[2] * (1.0 + self.params.in_thres) and \
372 |                                 p_y < b[1] + b[3] * (1.0 + self.params.in_thres)
373 |                     if is_inside:
374 |                         x_scale = float(w) / math.ceil(b[2])
375 |                         y_scale = float(h) / math.ceil(b[3])
376 |                         x0 = int((p_x - b[0]) * x_scale)
377 |                         y0 = int((p_y - b[1]) * y_scale)
378 |                         if x0 >= w and y0 >= h:
379 |                             x0 = w - 1
380 |                             y0 = h - 1
381 |                         elif x0 >= w:
382 |                             x0 = w - 1
383 |                         elif y0 >= h:
384 |                             y0 = h - 1
385 |                         elif x0 < 0 and y0 < 0:
386 |                             x0 = 0
387 |                             y0 = 0
388 |                         elif x0 < 0:
389 |                             x0 = 0
390 |                         elif y0 < 0:
391 |                             y0 = 0
392 |                         p = 1e-9
393 |                         weights_bbox[bbox_id, y0, x0, :, joint_id] = [1, instance[2], instance[3], p]
394 |         old_weights_bbox = np.copy(weights_bbox)
395 | 
396 |         for j in range(weights_bbox.shape[0]):
397 |             for t in range(17):
398 |                 weights_bbox[j, :, :, 0, t] = gaussian(weights_bbox[j, :, :, 0, t])
399 | 
400 |         output_bbox = []
401 |         for j in range(weights_bbox.shape[0]):
402 |             inp = weights_bbox[j, :, :, 0, :]
403 |             input = torch.from_numpy(np.expand_dims(inp, axis=0)).cuda().float()
404 |             output, _ = self.model([input, 'prn_subnet'])
405 |             temp = np.reshape(output.data.cpu().numpy(), (56, 36, 17))
406 |             output_bbox.append(temp)
407 | 
408 |         output_bbox = np.array(output_bbox)
409 | 
410 |         keypoints_score = []
411 | 
412 |         for t in range(17):
413 |             indexes = np.argwhere(old_weights_bbox[:, :, :, 0, t] == 1)
414 |             keypoint = []
415 |             for i in indexes:
416 |                 cr = crop(output_bbox[i[0], :, :, t], (i[1], i[2]), N=15)
417 |                 score = np.sum(cr)
418 | 
419 |                 kp_id = old_weights_bbox[i[0], i[1], i[2], 2, t]
420 |                 kp_score = old_weights_bbox[i[0], i[1], i[2], 1, t]
421 |                 p_score = old_weights_bbox[i[0], i[1], i[2], 3, t]  ## ??
422 |                 bbox_id = i[0]
423 | 
424 |                 score = kp_score * score
425 | 
426 |                 s = [kp_id, bbox_id, kp_score, score]
427 | 
428 |                 keypoint.append(s)
429 |             keypoints_score.append(keypoint)
430 | 
431 |         bbox_keypoints = np.zeros((weights_bbox.shape[0], 17, 3))
432 |         bbox_ids = np.arange(len(bboxes)).tolist()
433 | 
434 |         # kp_id, bbox_id, kp_score, my_score
435 |         for i in range(17):
436 |             joint_keypoints = keypoints_score[i]
437 |             if len(joint_keypoints) > 0:  # if have output result in one type keypoint
438 | 
439 |                 kp_ids = list(set([x[0] for x in joint_keypoints]))
440 | 
441 |                 table = np.zeros((len(bbox_ids), len(kp_ids), 4))
442 | 
443 |                 for b_id, bbox in enumerate(bbox_ids):
444 |                     for k_id, kp in enumerate(kp_ids):
445 |                         own = [x for x in joint_keypoints if x[0] == kp and x[1] == bbox]
446 | 
447 |                         if len(own) > 0:
448 |                             table[bbox, k_id] = own[0]
449 |                         else:
450 |                             table[bbox, k_id] = [0] * 4
451 | 
452 |                 for b_id, bbox in enumerate(bbox_ids):  # all bbx, from 0 to ...
453 | 
454 |                     row = np.argsort(-table[bbox, :, 3])  # in bbx(bbox), sort from big to small, keypoint score
455 | 
456 |                     if table[bbox, row[0], 3] > 0:  # score
457 |                         for r in row:  # all keypoints
458 |                             if table[bbox, r, 3] > 0:
459 |                                 column = np.argsort(
460 |                                     -table[:, r, 3])  # sort all keypoints r, from big to small, bbx score
461 | 
462 |                                 if bbox == column[0]:  # best bbx. best keypoint
463 |                                     bbox_keypoints[bbox, i, :] = [x[:3] for x in peaks[i] if x[3] == table[bbox, r, 0]][
464 |                                         0]
465 |                                     break
466 |                                 else:  # for bbx column[0], the worst keypoint is row2[0],
467 |                                     row2 = np.argsort(table[column[0], :, 3])
468 |                                     if row2[0] == r:
469 |                                         bbox_keypoints[bbox, i, :] = \
470 |                                             [x[:3] for x in peaks[i] if x[3] == table[bbox, r, 0]][0]
471 |                                         break
472 |             else:  # len(joint_keypoints) == 0:
473 |                 for j in range(weights_bbox.shape[0]):
474 |                     b = bboxes[j]
475 |                     x_scale = float(w) / math.ceil(b[2])
476 |                     y_scale = float(h) / math.ceil(b[3])
477 | 
478 |                     for t in range(17):
479 |                         indexes = np.argwhere(old_weights_bbox[j, :, :, 0, t] == 1)
480 |                         if len(indexes) == 0:
481 |                             max_index = np.argwhere(output_bbox[j, :, :, t] == np.max(output_bbox[j, :, :, t]))
482 |                             bbox_keypoints[j, t, :] = [max_index[0][1] / x_scale + b[0],
483 |                                                        max_index[0][0] / y_scale + b[1], 0]
484 | 
485 |         my_keypoints = []
486 | 
487 |         for i in range(bbox_keypoints.shape[0]):
488 |             k = np.zeros(51)
489 |             k[0::3] = bbox_keypoints[i, :, 0]
490 |             k[1::3] = bbox_keypoints[i, :, 1]
491 |             k[2::3] = bbox_keypoints[i, :, 2]
492 | 
493 |             pose_score = 0
494 |             count = 0
495 |             for f in range(17):
496 |                 if bbox_keypoints[i, f, 0] != 0 and bbox_keypoints[i, f, 1] != 0:
497 |                     count += 1
498 |                 pose_score += bbox_keypoints[i, f, 2]
499 |             pose_score /= 17.0
500 | 
501 |             my_keypoints.append(k)
502 | 
503 |             image_data = {
504 |                 'image_id': image_id,
505 |                 'file_name': file_name,
506 |                 'category_id': 1,
507 |                 'bbox': bboxes[i],
508 |                 'score': pose_score,
509 |                 'keypoints': k.tolist()
510 |             }
511 |             prn_result.append(image_data)
512 | 
513 |         return prn_result
514 | 
515 |     def val(self):
516 |         self.model.eval()
517 |         logs = OrderedDict()
518 |         sum_loss = meter_utils.AverageValueMeter()
519 |         logger.info('Val on validation set...')
520 | 
521 |         self.batch_timer.clear()
522 |         self.data_timer.clear()
523 |         self.batch_timer.tic()
524 |         self.data_timer.tic()
525 |         for step, batch in enumerate(self.val_data):
526 |             self.data_timer.toc()
527 | 
528 |             inputs, gts, _ = self.batch_processor(self, batch)
529 |             _, saved_for_loss = self.model(*inputs)
530 |             self.batch_timer.toc()
531 | 
532 |             loss, saved_for_log = self.model.module.build_loss(saved_for_loss, *gts)
533 |             sum_loss.add(loss.item())
534 |             self._process_log(saved_for_log, logs)
535 | 
536 |             if step % self.params.print_freq == 0:
537 |                 self._print_log(step, logs, 'Validation', max_n_batch=len(self.val_data))
538 | 
539 |             self.data_timer.tic()
540 |             self.batch_timer.tic()
541 | 
542 |         mean, std = sum_loss.value()
543 |         logger.info('\n\nValidation loss: mean: {}, std: {}'.format(mean, std))
544 | 
545 |     def _load_ckpt(self, ckpt):
546 |         _, _ = net_utils.load_net(ckpt, self.model, load_state_dict=True)
547 | 
548 |     def _process_log(self, src_dict, dest_dict):
549 |         for k, v in src_dict.items():
550 |             if isinstance(v, (int, float)):
551 |                 dest_dict.setdefault(k, meter_utils.AverageValueMeter())
552 |                 dest_dict[k].add(float(v))
553 |             else:
554 |                 dest_dict[k] = v
555 | 
556 |     def _print_log(self, step, log_values, title='', max_n_batch=None):
557 |         log_str = '{}\n'.format(self.params.exp_name)
558 |         log_str += '{}: epoch {}'.format(title, 0)
559 | 
560 |         log_str += '[{}/{}]'.format(step, max_n_batch)
561 | 
562 |         i = 0
563 |         for k, v in log_values.items():
564 |             if isinstance(v, meter_utils.AverageValueMeter):
565 |                 mean, std = v.value()
566 |                 log_str += '\n\t{}: {:.10f}'.format(k, mean)
567 |                 i += 1
568 | 
569 |         if max_n_batch:
570 |             # print time
571 |             data_time = self.data_timer.duration + 1e-6
572 |             batch_time = self.batch_timer.duration + 1e-6
573 |             rest_seconds = int((max_n_batch - step) * batch_time)
574 |             log_str += '\n\t({:.2f}/{:.2f}s,' \
575 |                        ' fps:{:.1f}, rest: {})'.format(data_time, batch_time,
576 |                                                        self.params.batch_size / batch_time,
577 |                                                        str(datetime.timedelta(seconds=rest_seconds)))
578 |             self.batch_timer.clear()
579 |             self.data_timer.clear()
580 | 
581 |         logger.info(log_str)
582 | 


--------------------------------------------------------------------------------