├── lib ├── __init__.py ├── core │ ├── __init__.py │ └── config.py ├── nms │ ├── __init__.py │ ├── _ext │ │ ├── __init__.py │ │ └── nms │ │ │ ├── _nms.so │ │ │ └── __init__.py │ ├── src │ │ ├── cuda │ │ │ ├── nms_kernel.cu.o │ │ │ ├── nms_kernel.h │ │ │ └── nms_kernel.cu │ │ ├── nms_cuda.h │ │ ├── nms.h │ │ ├── nms_cuda.c │ │ └── nms.c │ ├── build.py │ └── pth_nms.py ├── utils │ ├── __init__.py │ ├── path.py │ ├── log.py │ ├── meter.py │ └── timer.py └── build.sh ├── datasets ├── __init__.py ├── coco_data │ ├── __init__.py │ ├── preprocessing.py │ ├── heatmap.py │ ├── prn_gaussian.py │ ├── prn_data_pipeline.py │ ├── ImageAugmentation.py │ └── COCO_data_pipeline.py ├── dataloader.py ├── coco.py └── data_parallel.py ├── evaluate ├── __init__.py ├── multipose_coco_eval.py ├── multipose_test.py ├── multipose_keypoint_val.py ├── multipose_detection_val.py ├── multipose_prn_val.py └── tester.py ├── network ├── __init__.py ├── utils.py ├── net_utils.py ├── anchors.py ├── fpn.py ├── losses.py ├── joint_utils.py └── posenet.py ├── training ├── __init__.py ├── batch_processor.py ├── multipose_prn_train.py ├── multipose_detection_train.py ├── multipose_keypoint_train.py └── trainer.py ├── .gitignore ├── demo ├── test_images │ ├── pic1.jpg │ └── pic2.jpg ├── output │ ├── pic1_canvas.png │ └── pic2_canvas.png └── models │ └── README.md ├── configs └── coco │ └── first_experiment.yaml ├── multipose_environment.yaml └── README.md /lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluate/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/nms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /network/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/nms/_ext/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.pyc 3 | *~ 4 | -------------------------------------------------------------------------------- /datasets/coco_data/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /lib/nms/_ext/nms/_nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiMeng95/MultiPoseNet.pytorch/HEAD/lib/nms/_ext/nms/_nms.so -------------------------------------------------------------------------------- /demo/test_images/pic1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiMeng95/MultiPoseNet.pytorch/HEAD/demo/test_images/pic1.jpg -------------------------------------------------------------------------------- /demo/test_images/pic2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiMeng95/MultiPoseNet.pytorch/HEAD/demo/test_images/pic2.jpg -------------------------------------------------------------------------------- /demo/output/pic1_canvas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiMeng95/MultiPoseNet.pytorch/HEAD/demo/output/pic1_canvas.png -------------------------------------------------------------------------------- /demo/output/pic2_canvas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiMeng95/MultiPoseNet.pytorch/HEAD/demo/output/pic2_canvas.png -------------------------------------------------------------------------------- /lib/nms/src/cuda/nms_kernel.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiMeng95/MultiPoseNet.pytorch/HEAD/lib/nms/src/cuda/nms_kernel.cu.o -------------------------------------------------------------------------------- /lib/nms/src/nms_cuda.h: -------------------------------------------------------------------------------- 1 | int gpu_nms(THLongTensor * keep_out, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh); -------------------------------------------------------------------------------- /lib/nms/src/nms.h: -------------------------------------------------------------------------------- 1 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh); -------------------------------------------------------------------------------- /demo/models/README.md: -------------------------------------------------------------------------------- 1 | Our baseline model:([Google Drive](https://drive.google.com/open?id=1Y38q5mIY2XL7mmdaBrF06beYcZZO6v2Z), [Tsinghua Cloud](https://cloud.tsinghua.edu.cn/f/7328ce2cb7bd4f558a78/), backbone: resnet101) -------------------------------------------------------------------------------- /lib/utils/path.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | 5 | def mkdir(path, rm_exist=False): 6 | if os.path.isdir(path): 7 | if not rm_exist: 8 | return 9 | shutil.rmtree(path) 10 | 11 | os.makedirs(path) 12 | -------------------------------------------------------------------------------- /lib/nms/src/cuda/nms_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _NMS_KERNEL 2 | #define _NMS_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 9 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 10 | 11 | void _nms(int boxes_num, float * boxes_dev, 12 | unsigned long long * mask_dev, float nms_overlap_thresh); 13 | 14 | #ifdef __cplusplus 15 | } 16 | #endif 17 | 18 | #endif 19 | 20 | -------------------------------------------------------------------------------- /lib/nms/_ext/nms/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._nms import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/build.sh: -------------------------------------------------------------------------------- 1 | CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \ 2 | -gencode arch=compute_35,code=sm_35 \ 3 | -gencode arch=compute_50,code=sm_50 \ 4 | -gencode arch=compute_52,code=sm_52 \ 5 | -gencode arch=compute_60,code=sm_60 \ 6 | -gencode arch=compute_61,code=sm_61" 7 | 8 | 9 | # Build NMS 10 | cd nms/src/cuda 11 | echo "Compiling nms kernels by nvcc..." 12 | /usr/local/cuda/bin/nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH 13 | cd ../../ 14 | python build.py 15 | cd ../ 16 | -------------------------------------------------------------------------------- /lib/utils/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def get_logger(name='root'): 5 | formatter = logging.Formatter( 6 | # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s') 7 | fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') 8 | 9 | handler = logging.StreamHandler() 10 | handler.setFormatter(formatter) 11 | 12 | logger = logging.getLogger(name) 13 | logger.setLevel(logging.DEBUG) 14 | logger.addHandler(handler) 15 | return logger 16 | 17 | 18 | logger = get_logger('root') 19 | -------------------------------------------------------------------------------- /datasets/coco_data/preprocessing.py: -------------------------------------------------------------------------------- 1 | """ 2 | Provides different utilities to preprocess images. 3 | Args: 4 | image: A np.array representing an image of (h,w,3). 5 | 6 | Returns: 7 | A preprocessed image. which dtype is np.float32 8 | and transposed to (3,h,w). 9 | 10 | """ 11 | 12 | import cv2 13 | import numpy as np 14 | 15 | def resnet_preprocess(image): 16 | image = image.astype(np.float32) / 255. 17 | means = [0.485, 0.456, 0.406] 18 | stds = [0.229, 0.224, 0.225] 19 | 20 | preprocessed_img = image.copy()[:, :, ::-1] 21 | for i in range(3): 22 | preprocessed_img[:, :, i] = preprocessed_img[:, :, i] - means[i] 23 | preprocessed_img[:, :, i] = preprocessed_img[:, :, i] / stds[i] 24 | 25 | preprocessed_img = preprocessed_img.transpose((2, 0, 1)).astype(np.float32) 26 | return preprocessed_img 27 | -------------------------------------------------------------------------------- /lib/nms/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/nms.c'] 7 | headers = ['src/nms.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | if torch.cuda.is_available(): 12 | print('Including CUDA code.') 13 | sources += ['src/nms_cuda.c'] 14 | headers += ['src/nms_cuda.h'] 15 | defines += [('WITH_CUDA', None)] 16 | with_cuda = True 17 | 18 | this_file = os.path.dirname(os.path.realpath(__file__)) 19 | print(this_file) 20 | extra_objects = ['src/cuda/nms_kernel.cu.o'] 21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 22 | 23 | ffi = create_extension( 24 | '_ext.nms', 25 | headers=headers, 26 | sources=sources, 27 | define_macros=defines, 28 | relative_to=__file__, 29 | with_cuda=with_cuda, 30 | extra_objects=extra_objects, 31 | extra_compile_args=['-std=c99'] 32 | ) 33 | 34 | if __name__ == '__main__': 35 | ffi.build() 36 | -------------------------------------------------------------------------------- /lib/utils/meter.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | 4 | 5 | class Meter(object): 6 | def reset(self): 7 | pass 8 | 9 | def add(self): 10 | pass 11 | 12 | def value(self): 13 | pass 14 | 15 | 16 | class AverageValueMeter(Meter): 17 | def __init__(self): 18 | super(AverageValueMeter, self).__init__() 19 | self.reset() 20 | 21 | def add(self, value, n=1): 22 | self.sum += value 23 | self.var += value * value 24 | self.n += n 25 | 26 | def value(self): 27 | n = self.n 28 | if n == 0: 29 | mean, std = np.nan, np.nan 30 | elif n == 1: 31 | return self.sum, np.inf 32 | else: 33 | mean = self.sum / n 34 | std = math.sqrt((self.var - n * mean * mean) / (n - 1.0)) 35 | return mean, std 36 | 37 | def reset(self): 38 | self.sum = 0.0 39 | self.n = 0 40 | self.var = 0.0 41 | 42 | def __float__(self): 43 | return self.value()[0] -------------------------------------------------------------------------------- /evaluate/multipose_coco_eval.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | root_path = os.path.realpath(__file__).split('/evaluate/multipose_coco_eval.py')[0] 3 | os.chdir(root_path) 4 | sys.path.append(root_path) 5 | 6 | from network.posenet import poseNet 7 | from evaluate.tester import Tester 8 | 9 | backbone = 'resnet101' 10 | 11 | # Set Training parameters 12 | params = Tester.TestParams() 13 | params.subnet_name = 'both' 14 | params.inp_size = 480 # input picture size = (inp_size, inp_size) 15 | params.coeff = 2 16 | params.in_thres = 0.21 17 | params.coco_root = '/data/COCO/' 18 | params.testresult_write_json = False # Whether to write json result 19 | params.coco_result_filename = './demo/multipose_coco2017_results.json' 20 | params.ckpt = './demo/models/ckpt_baseline_resnet101.h5' 21 | 22 | # model 23 | if backbone == 'resnet101': 24 | model = poseNet(101) 25 | elif backbone == 'resnet50': 26 | model = poseNet(50) 27 | 28 | for name, module in model.named_children(): 29 | for para in module.parameters(): 30 | para.requires_grad = False 31 | 32 | tester = Tester(model, params) 33 | tester.coco_eval() # pic_test 34 | -------------------------------------------------------------------------------- /evaluate/multipose_test.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | root_path = os.path.realpath(__file__).split('/evaluate/multipose_test.py')[0] 3 | os.chdir(root_path) 4 | sys.path.append(root_path) 5 | 6 | from network.posenet import poseNet 7 | from evaluate.tester import Tester 8 | 9 | backbone = 'resnet101' 10 | 11 | # Set Training parameters 12 | params = Tester.TestParams() 13 | params.subnet_name = 'both' 14 | params.inp_size = 480 # input picture size = (inp_size, inp_size) 15 | params.coeff = 2 16 | params.in_thres = 0.21 17 | params.testdata_dir = './demo/test_images/' 18 | params.testresult_dir = './demo/output/' 19 | params.testresult_write_image = True # Whether to write result pictures 20 | params.testresult_write_json = False # Whether to write json result 21 | params.ckpt = './demo/models/ckpt_baseline_resnet101.h5' 22 | 23 | # model 24 | if backbone == 'resnet101': 25 | model = poseNet(101) 26 | elif backbone == 'resnet50': 27 | model = poseNet(50) 28 | 29 | for name, module in model.named_children(): 30 | for para in module.parameters(): 31 | para.requires_grad = False 32 | 33 | tester = Tester(model, params) 34 | tester.test() # pic_test 35 | -------------------------------------------------------------------------------- /datasets/dataloader.py: -------------------------------------------------------------------------------- 1 | from typing import Generator 2 | from torch.utils.data.dataloader import DataLoader, _DataLoaderIter 3 | from lib.utils.log import logger 4 | 5 | 6 | class sDataLoader(DataLoader): 7 | def get_stream(self): 8 | """ 9 | Return a generate that can yield endless data. 10 | :Example: 11 | stream = get_stream() 12 | for i in range(100): 13 | batch = next(stream) 14 | 15 | :return: stream 16 | :rtype: Generator 17 | """ 18 | while True: 19 | for data in _DataLoaderIter(self): 20 | yield data 21 | 22 | @staticmethod 23 | def copy(loader): 24 | """ 25 | Init a sDataloader from an existing Dataloader 26 | :param loader: an instance of Dataloader 27 | :type loader: DataLoader 28 | :return: a new instance of sDataloader 29 | :rtype: sDataLoader 30 | """ 31 | if not isinstance(loader, DataLoader): 32 | logger('loader should be an instance of Dataloader, but got {}'.format(type(loader))) 33 | return loader 34 | 35 | new_loader = sDataLoader(loader.dataset) 36 | for k, v in loader.__dict__.items(): 37 | setattr(new_loader, k, v) 38 | return new_loader 39 | -------------------------------------------------------------------------------- /lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | self.duration = 0. 21 | 22 | def tic(self): 23 | # using time.time instead of time.clock because time time.clock 24 | # does not normalize for multithreading 25 | self.start_time = time.time() 26 | 27 | def toc(self, average=True): 28 | self.diff = time.time() - self.start_time 29 | self.total_time += self.diff 30 | self.calls += 1 31 | self.average_time = self.total_time / self.calls 32 | if average: 33 | self.duration = self.average_time 34 | else: 35 | self.duration = self.diff 36 | return self.duration 37 | 38 | def clear(self): 39 | self.total_time = 0. 40 | self.calls = 0 41 | self.start_time = 0. 42 | self.diff = 0. 43 | self.average_time = 0. 44 | self.duration = 0. 45 | 46 | -------------------------------------------------------------------------------- /lib/nms/pth_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from ._ext import nms 3 | import numpy as np 4 | 5 | def pth_nms(dets, thresh): 6 | """ 7 | dets has to be a tensor 8 | """ 9 | if not dets.is_cuda: 10 | x1 = dets[:, 0] 11 | y1 = dets[:, 1] 12 | x2 = dets[:, 2] 13 | y2 = dets[:, 3] 14 | scores = dets[:, 4] 15 | 16 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 17 | order = scores.sort(0, descending=True)[1] 18 | # order = torch.from_numpy(np.ascontiguousarray(scores.numpy().argsort()[::-1])).long() 19 | 20 | keep = torch.LongTensor(dets.size(0)) 21 | num_out = torch.LongTensor(1) 22 | nms.cpu_nms(keep, num_out, dets, order, areas, thresh) 23 | 24 | return keep[:num_out[0]] 25 | else: 26 | x1 = dets[:, 0] 27 | y1 = dets[:, 1] 28 | x2 = dets[:, 2] 29 | y2 = dets[:, 3] 30 | scores = dets[:, 4] 31 | 32 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 33 | order = scores.sort(0, descending=True)[1] 34 | # order = torch.from_numpy(np.ascontiguousarray(scores.cpu().numpy().argsort()[::-1])).long().cuda() 35 | 36 | dets = dets[order].contiguous() 37 | 38 | keep = torch.LongTensor(dets.size(0)) 39 | num_out = torch.LongTensor(1) 40 | # keep = torch.cuda.LongTensor(dets.size(0)) 41 | # num_out = torch.cuda.LongTensor(1) 42 | nms.gpu_nms(keep, num_out, dets, thresh) 43 | 44 | return order[keep[:num_out[0]].cuda()].contiguous() 45 | # return order[keep[:num_out[0]]].contiguous() 46 | 47 | -------------------------------------------------------------------------------- /datasets/coco_data/heatmap.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | import random 4 | import sys 5 | 6 | import cv2 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | from scipy import misc, ndimage 10 | 11 | 12 | """Implement the generate of every channel of ground truth heatmap. 13 | :param centerA: int with shape (2,), every coordinate of person's keypoint. 14 | :param accumulate_confid_map: one channel of heatmap, which is accumulated, 15 | np.log(100) is the max value of heatmap. 16 | :param params_transform: store the value of stride and crop_szie_y, crop_size_x 17 | """ 18 | 19 | 20 | def putGaussianMaps(center, accumulate_confid_map, params_transform): 21 | crop_size_y = params_transform['crop_size_y'] 22 | crop_size_x = params_transform['crop_size_x'] 23 | stride = params_transform['stride'] 24 | sigma = params_transform['sigma'] 25 | 26 | grid_y = int(crop_size_y / stride) 27 | grid_x = int(crop_size_x / stride) 28 | start = stride / 2.0 - 0.5 29 | y_range = [i for i in range(grid_y)] 30 | x_range = [i for i in range(grid_x)] 31 | xx, yy = np.meshgrid(x_range, y_range) 32 | xx = xx * stride + start 33 | yy = yy * stride + start 34 | d2 = (xx - center[0]) ** 2 + (yy - center[1]) ** 2 35 | exponent = d2 / 2.0 / sigma / sigma 36 | mask = exponent <= 4.6052 37 | cofid_map = np.exp(-exponent) 38 | cofid_map = np.multiply(mask, cofid_map) 39 | accumulate_confid_map += cofid_map 40 | accumulate_confid_map[accumulate_confid_map > 1.0] = 1.0 41 | return accumulate_confid_map 42 | -------------------------------------------------------------------------------- /evaluate/multipose_keypoint_val.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | root_path = os.path.realpath(__file__).split('/evaluate/multipose_keypoint_val.py')[0] 3 | os.chdir(root_path) 4 | sys.path.append(root_path) 5 | 6 | from training.batch_processor import batch_processor 7 | from network.posenet import poseNet 8 | from datasets.coco import get_loader 9 | from evaluate.tester import Tester 10 | 11 | # Hyper-params 12 | coco_root = '/data/COCO/' 13 | backbone = 'resnet101' # 'resnet50' 14 | data_dir = coco_root+'images/' 15 | mask_dir = coco_root 16 | json_path = coco_root+'COCO.json' 17 | inp_size = 480 # input size 480*480 18 | feat_stride = 4 19 | 20 | # Set Training parameters 21 | params = Tester.TestParams() 22 | params.subnet_name = 'keypoint_subnet' 23 | params.gpus = [0] 24 | params.ckpt = './demo/models/ckpt_baseline_resnet101.h5' 25 | params.batch_size = 6 * len(params.gpus) 26 | params.print_freq = 50 27 | 28 | # validation data 29 | valid_data = get_loader(json_path, data_dir, mask_dir, inp_size, feat_stride, 30 | preprocess='resnet', batch_size=params.batch_size-2*len(params.gpus), training=False, 31 | shuffle=False, num_workers=4, subnet=params.subnet_name) 32 | print('val dataset len: {}'.format(len(valid_data.dataset))) 33 | 34 | # model 35 | if backbone == 'resnet101': 36 | model = poseNet(101) 37 | elif backbone == 'resnet50': 38 | model = poseNet(50) 39 | 40 | for name, module in model.named_children(): 41 | for para in module.parameters(): 42 | para.requires_grad = False 43 | 44 | tester = Tester(model, params, batch_processor, valid_data) 45 | tester.val() 46 | -------------------------------------------------------------------------------- /evaluate/multipose_detection_val.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | root_path = os.path.realpath(__file__).split('/evaluate/multipose_detection_val.py')[0] 3 | os.chdir(root_path) 4 | sys.path.append(root_path) 5 | 6 | from training.batch_processor import batch_processor 7 | from network.posenet import poseNet 8 | from datasets.coco import get_loader 9 | from evaluate.tester import Tester 10 | 11 | # Hyper-params 12 | coco_root = '/data/COCO/' 13 | backbone = 'resnet101' # 'resnet50' 14 | data_dir = coco_root+'images/' 15 | mask_dir = coco_root 16 | json_path = coco_root+'COCO.json' 17 | inp_size = 608 # input size 608*608 18 | feat_stride = 4 19 | 20 | # Set Training parameters 21 | params = Tester.TestParams() 22 | params.subnet_name = 'detection_subnet' 23 | params.gpus = [0] 24 | params.ckpt = './demo/models/ckpt_baseline_resnet101.h5' 25 | params.batch_size = 25 * len(params.gpus) 26 | params.print_freq = 100 27 | 28 | # validation data 29 | valid_data = get_loader(json_path, data_dir, mask_dir, inp_size, feat_stride, 30 | preprocess='resnet', batch_size=params.batch_size-10*len(params.gpus), training=False, 31 | shuffle=False, num_workers=8, subnet=params.subnet_name) 32 | print('val dataset len: {}'.format(len(valid_data.dataset))) 33 | 34 | # model 35 | if backbone == 'resnet101': 36 | model = poseNet(101) 37 | elif backbone == 'resnet50': 38 | model = poseNet(50) 39 | 40 | for name, module in model.named_children(): 41 | for para in module.parameters(): 42 | para.requires_grad = False 43 | 44 | tester = Tester(model, params, batch_processor, valid_data) 45 | tester.val() 46 | -------------------------------------------------------------------------------- /evaluate/multipose_prn_val.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | root_path = os.path.realpath(__file__).split('/evaluate/multipose_prn_val.py')[0] 3 | os.chdir(root_path) 4 | sys.path.append(root_path) 5 | 6 | from network.posenet import poseNet 7 | from pycocotools.coco import COCO 8 | from datasets.coco_data.prn_data_pipeline import PRN_CocoDataset 9 | from torch.utils.data import DataLoader 10 | from training.batch_processor import batch_processor 11 | from evaluate.tester import Tester 12 | 13 | 14 | # Hyper-params 15 | coco_root = '/data/COCO/' 16 | backbone='resnet101' # 'resnet50' 17 | inp_size = 480 # input size 480*480 18 | feat_stride = 4 19 | node_count = 1024 # Hidden Layer Node Count 20 | coeff = 2 # Coefficient of bbox size 21 | threshold = 0.21 # BBOX threshold 22 | num_of_keypoints = 3 # Minimum number of keypoints for each bbox in training 23 | 24 | # Set Training parameters 25 | params = Tester.TestParams() 26 | params.subnet_name = 'prn_subnet' 27 | params.gpus = [0] 28 | params.ckpt = './demo/models/ckpt_baseline_resnet101.h5' 29 | params.batch_size = 8 * len(params.gpus) 30 | params.print_freq = 500 31 | 32 | # validation data 33 | coco_val = COCO(os.path.join(coco_root, 'annotations/person_keypoints_val2017.json')) 34 | valid_data = DataLoader(dataset=PRN_CocoDataset( 35 | coco_val, num_of_keypoints=num_of_keypoints, coeff=coeff, threshold=threshold, 36 | inp_size=inp_size, feat_stride=feat_stride), batch_size=params.batch_size, num_workers=4, shuffle=False) 37 | print('val dataset len: {}'.format(len(valid_data.dataset))) 38 | 39 | # model 40 | if backbone == 'resnet101': 41 | model = poseNet(101, prn_node_count=node_count, prn_coeff=coeff) 42 | elif backbone == 'resnet50': 43 | model = poseNet(50, prn_node_count=node_count, prn_coeff=coeff) 44 | 45 | for name, module in model.named_children(): 46 | for para in module.parameters(): 47 | para.requires_grad = False 48 | 49 | tester = Tester(model, params, batch_processor, valid_data) 50 | tester.val() 51 | -------------------------------------------------------------------------------- /configs/coco/first_experiment.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | DATASET: 6 | DATASET: mpii 7 | DATA_FORMAT: jpg 8 | FLIP: true 9 | HYBRID_JOINTS_TYPE: '' 10 | ROOT: '' 11 | ROT_FACTOR: 30 12 | SCALE_FACTOR: 0.25 13 | SELECT_DATA: false 14 | TEST_SET: valid 15 | TRAIN_SET: train 16 | DATA_DIR: '' 17 | DEBUG: 18 | DEBUG: false 19 | SAVE_BATCH_IMAGES_GT: false 20 | SAVE_BATCH_IMAGES_PRED: false 21 | SAVE_HEATMAPS_GT: false 22 | SAVE_HEATMAPS_PRED: false 23 | GPUS: '0' 24 | LOG_DIR: '' 25 | LOSS: 26 | USE_TARGET_WEIGHT: true 27 | MODEL: 28 | EXTRA: !!python/object/new:easydict.EasyDict 29 | dictitems: 30 | DECONV_WITH_BIAS: false 31 | FINAL_CONV_KERNEL: 1 32 | HEATMAP_SIZE: &id001 33 | - 64 34 | - 64 35 | NUM_DECONV_FILTERS: &id002 36 | - 256 37 | - 256 38 | - 256 39 | NUM_DECONV_KERNELS: &id003 40 | - 4 41 | - 4 42 | - 4 43 | NUM_DECONV_LAYERS: 3 44 | NUM_LAYERS: 50 45 | SIGMA: 2 46 | TARGET_TYPE: gaussian 47 | state: 48 | DECONV_WITH_BIAS: false 49 | FINAL_CONV_KERNEL: 1 50 | HEATMAP_SIZE: *id001 51 | NUM_DECONV_FILTERS: *id002 52 | NUM_DECONV_KERNELS: *id003 53 | NUM_DECONV_LAYERS: 3 54 | NUM_LAYERS: 50 55 | SIGMA: 2 56 | TARGET_TYPE: gaussian 57 | IMAGE_SIZE: 58 | - 256 59 | - 256 60 | INIT_WEIGHTS: true 61 | NAME: pose_resnet 62 | NUM_JOINTS: 16 63 | PRETRAINED: '' 64 | OUTPUT_DIR: '' 65 | PRINT_FREQ: 20 66 | TEST: 67 | BATCH_SIZE: 32 68 | BBOX_THRE: 1.0 69 | COCO_BBOX_FILE: '' 70 | FLIP_TEST: false 71 | IMAGE_THRE: 0.0 72 | IN_VIS_THRE: 0.0 73 | MODEL_FILE: '' 74 | NMS_THRE: 1.0 75 | OKS_THRE: 0.5 76 | POST_PROCESS: true 77 | SHIFT_HEATMAP: true 78 | USE_GT_BBOX: false 79 | TRAIN: 80 | BATCH_SIZE: 32 81 | BEGIN_EPOCH: 0 82 | CHECKPOINT: '' 83 | END_EPOCH: 140 84 | GAMMA1: 0.99 85 | GAMMA2: 0.0 86 | LR: 0.001 87 | LR_FACTOR: 0.1 88 | LR_STEP: 89 | - 90 90 | - 110 91 | MOMENTUM: 0.9 92 | NESTEROV: false 93 | OPTIMIZER: adam 94 | RESUME: false 95 | SHUFFLE: true 96 | WD: 0.0001 97 | WORKERS: 4 98 | -------------------------------------------------------------------------------- /network/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | 6 | class BBoxTransform(nn.Module): 7 | 8 | def __init__(self, mean=None, std=None): 9 | super(BBoxTransform, self).__init__() 10 | if mean is None: 11 | self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32)).cuda() 12 | else: 13 | self.mean = mean 14 | if std is None: 15 | self.std = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32)).cuda() 16 | else: 17 | self.std = std 18 | 19 | def forward(self, boxes, deltas): 20 | 21 | widths = boxes[:, :, 2] - boxes[:, :, 0] 22 | heights = boxes[:, :, 3] - boxes[:, :, 1] 23 | ctr_x = boxes[:, :, 0] + 0.5 * widths 24 | ctr_y = boxes[:, :, 1] + 0.5 * heights 25 | 26 | dx = deltas[:, :, 0] * self.std[0] + self.mean[0] 27 | dy = deltas[:, :, 1] * self.std[1] + self.mean[1] 28 | dw = deltas[:, :, 2] * self.std[2] + self.mean[2] 29 | dh = deltas[:, :, 3] * self.std[3] + self.mean[3] 30 | 31 | pred_ctr_x = ctr_x + dx * widths 32 | pred_ctr_y = ctr_y + dy * heights 33 | pred_w = torch.exp(dw) * widths 34 | pred_h = torch.exp(dh) * heights 35 | 36 | pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w 37 | pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h 38 | pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w 39 | pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h 40 | 41 | pred_boxes = torch.stack([pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2], dim=2) 42 | 43 | return pred_boxes 44 | 45 | 46 | class ClipBoxes(nn.Module): 47 | 48 | def __init__(self, width=None, height=None): 49 | super(ClipBoxes, self).__init__() 50 | 51 | def forward(self, boxes, img): 52 | 53 | batch_size, num_channels, height, width = img.shape 54 | 55 | boxes[:, :, 0] = torch.clamp(boxes[:, :, 0], min=0) 56 | boxes[:, :, 1] = torch.clamp(boxes[:, :, 1], min=0) 57 | 58 | boxes[:, :, 2] = torch.clamp(boxes[:, :, 2], max=width) 59 | boxes[:, :, 3] = torch.clamp(boxes[:, :, 3], max=height) 60 | 61 | return boxes 62 | -------------------------------------------------------------------------------- /training/batch_processor.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Utility functions for rtpose project 3 | -------------------------------------------- 4 | Change to pytorch=0.4.0 by @LiMeng95 5 | Utility functions for Multipose project 6 | ''' 7 | 8 | import torch 9 | 10 | def batch_processor(state, batch): 11 | gpus = state.params.gpus 12 | subnet_name = state.params.subnet_name # 'detection_subnet'/'keypoint_subnet'/'prn_subnet' 13 | 14 | if subnet_name == 'keypoint_subnet': 15 | inp, heat_temp, heat_weight = batch 16 | 17 | if not state.model.training: # used for inference 18 | with torch.no_grad(): 19 | input_var = inp.cuda(device=gpus[0]) 20 | heat_weight_var = heat_weight.cuda(device=gpus[0], async=False) 21 | heat_temp_var = heat_temp.cuda(device=gpus[0], async=False) 22 | else: 23 | input_var = inp.cuda(device=gpus[0]) 24 | heat_weight_var = heat_weight.cuda(device=gpus[0], async=False) 25 | heat_temp_var = heat_temp.cuda(device=gpus[0], async=False) 26 | 27 | inputs = [[input_var, subnet_name]] 28 | gts = [subnet_name, heat_temp_var, heat_weight_var] 29 | saved_for_eval = [] 30 | elif subnet_name == 'detection_subnet': #'detection_subnet' 31 | inp, anno = batch # anno: [x1, y1, x2, y2, category_id] 32 | 33 | if not state.model.training: # used for inference 34 | with torch.no_grad(): 35 | input_var = inp.cuda(device=gpus[0]) 36 | anno_var = anno.cuda(device=gpus[0]) 37 | else: 38 | input_var = inp.cuda(device=gpus[0]) 39 | anno_var = anno.cuda(device=gpus[0]) 40 | 41 | inputs = [[input_var, subnet_name]] 42 | gts = [subnet_name, anno_var] 43 | saved_for_eval = [] 44 | else: #'prn_subnet' 45 | inp, label = batch # input, label 46 | 47 | if not state.model.training: # used for inference 48 | with torch.no_grad(): 49 | input_var = inp.cuda(device=gpus[0]).float() 50 | anno_var = label.cuda(device=gpus[0]).float() 51 | else: 52 | input_var = inp.cuda(device=gpus[0]).float() 53 | anno_var = label.cuda(device=gpus[0]).float() 54 | 55 | inputs = [[input_var, subnet_name]] 56 | gts = [subnet_name, anno_var] 57 | saved_for_eval = [] 58 | 59 | return inputs, gts, saved_for_eval 60 | 61 | -------------------------------------------------------------------------------- /lib/nms/src/nms_cuda.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "cuda/nms_kernel.h" 13 | 14 | 15 | extern THCState *state; 16 | 17 | int gpu_nms(THLongTensor * keep, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh) { 18 | // boxes has to be sorted 19 | THArgCheck(THLongTensor_isContiguous(keep), 0, "boxes must be contiguous"); 20 | THArgCheck(THCudaTensor_isContiguous(state, boxes), 2, "boxes must be contiguous"); 21 | // Number of ROIs 22 | int boxes_num = THCudaTensor_size(state, boxes, 0); 23 | int boxes_dim = THCudaTensor_size(state, boxes, 1); 24 | 25 | float* boxes_flat = THCudaTensor_data(state, boxes); 26 | 27 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 28 | THCudaLongTensor * mask = THCudaLongTensor_newWithSize2d(state, boxes_num, col_blocks); 29 | unsigned long long* mask_flat = THCudaLongTensor_data(state, mask); 30 | 31 | _nms(boxes_num, boxes_flat, mask_flat, nms_overlap_thresh); 32 | 33 | THLongTensor * mask_cpu = THLongTensor_newWithSize2d(boxes_num, col_blocks); 34 | THLongTensor_copyCuda(state, mask_cpu, mask); 35 | THCudaLongTensor_free(state, mask); 36 | 37 | unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu); 38 | 39 | THLongTensor * remv_cpu = THLongTensor_newWithSize1d(col_blocks); 40 | unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu); 41 | THLongTensor_fill(remv_cpu, 0); 42 | 43 | long * keep_flat = THLongTensor_data(keep); 44 | long num_to_keep = 0; 45 | 46 | int i, j; 47 | for (i = 0; i < boxes_num; i++) { 48 | int nblock = i / threadsPerBlock; 49 | int inblock = i % threadsPerBlock; 50 | 51 | if (!(remv_cpu_flat[nblock] & (1ULL << inblock))) { 52 | keep_flat[num_to_keep++] = i; 53 | unsigned long long *p = &mask_cpu_flat[0] + i * col_blocks; 54 | for (j = nblock; j < col_blocks; j++) { 55 | remv_cpu_flat[j] |= p[j]; 56 | } 57 | } 58 | } 59 | 60 | long * num_out_flat = THLongTensor_data(num_out); 61 | * num_out_flat = num_to_keep; 62 | 63 | THLongTensor_free(mask_cpu); 64 | THLongTensor_free(remv_cpu); 65 | 66 | return 1; 67 | } 68 | -------------------------------------------------------------------------------- /datasets/coco.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from torchvision.transforms import ToTensor 4 | from datasets.coco_data.COCO_data_pipeline import Cocokeypoints, Cocobbox, bbox_collater 5 | from datasets.dataloader import sDataLoader 6 | from pycocotools.coco import COCO 7 | 8 | 9 | def get_loader(json_path, data_dir, mask_dir, inp_size, feat_stride, preprocess, 10 | batch_size, training=True, shuffle=True, num_workers=3, subnet='keypoint_subnet'): 11 | """ Build a COCO dataloader 12 | :param json_path: string, path to jso file 13 | :param datadir: string, path to coco data 14 | :returns : the data_loader 15 | """ 16 | with open(json_path) as data_file: 17 | data_this = json.load(data_file) 18 | data = data_this['root'] 19 | 20 | num_samples = len(data) 21 | train_indexes = [] 22 | val_indexes = [] 23 | 24 | if subnet == 'keypoint_subnet': 25 | for count in range(num_samples): 26 | if data[count]['isValidation'] != 0.: 27 | val_indexes.append(count) 28 | else: 29 | train_indexes.append(count) 30 | 31 | coco_data = Cocokeypoints(root=data_dir, mask_dir=mask_dir, 32 | index_list=train_indexes if training else val_indexes, 33 | data=data, inp_size=inp_size, feat_stride=feat_stride, 34 | preprocess=preprocess, transform=ToTensor()) 35 | data_loader = sDataLoader(coco_data, batch_size=batch_size, 36 | shuffle=shuffle, num_workers=num_workers) 37 | 38 | elif subnet == 'detection_subnet': 39 | if training: 40 | anno_path = os.path.join(mask_dir, 'annotations', 'person_keypoints_train2017.json') 41 | else: 42 | anno_path = os.path.join(mask_dir, 'annotations', 'person_keypoints_val2017.json') 43 | coco = COCO(anno_path) 44 | images_ids = coco.getImgIds() 45 | 46 | data_indexes = [] 47 | for count in range(num_samples): 48 | if int(data[count]['image_id']) in images_ids: 49 | data_indexes.append(count) 50 | 51 | coco_data = Cocobbox(root=data_dir, mask_dir=mask_dir, index_list=data_indexes, 52 | data=data, inp_size=inp_size, feat_stride=feat_stride, coco=coco, 53 | preprocess=preprocess, training=True if training else False) 54 | 55 | data_loader = sDataLoader(coco_data, batch_size=batch_size, shuffle=shuffle, 56 | num_workers=num_workers, collate_fn=bbox_collater) 57 | 58 | return data_loader 59 | -------------------------------------------------------------------------------- /lib/nms/src/nms.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh) { 5 | // boxes has to be sorted 6 | THArgCheck(THLongTensor_isContiguous(keep_out), 0, "keep_out must be contiguous"); 7 | THArgCheck(THLongTensor_isContiguous(boxes), 2, "boxes must be contiguous"); 8 | THArgCheck(THLongTensor_isContiguous(order), 3, "order must be contiguous"); 9 | THArgCheck(THLongTensor_isContiguous(areas), 4, "areas must be contiguous"); 10 | // Number of ROIs 11 | long boxes_num = THFloatTensor_size(boxes, 0); 12 | long boxes_dim = THFloatTensor_size(boxes, 1); 13 | 14 | long * keep_out_flat = THLongTensor_data(keep_out); 15 | float * boxes_flat = THFloatTensor_data(boxes); 16 | long * order_flat = THLongTensor_data(order); 17 | float * areas_flat = THFloatTensor_data(areas); 18 | 19 | THByteTensor* suppressed = THByteTensor_newWithSize1d(boxes_num); 20 | THByteTensor_fill(suppressed, 0); 21 | unsigned char * suppressed_flat = THByteTensor_data(suppressed); 22 | 23 | // nominal indices 24 | int i, j; 25 | // sorted indices 26 | int _i, _j; 27 | // temp variables for box i's (the box currently under consideration) 28 | float ix1, iy1, ix2, iy2, iarea; 29 | // variables for computing overlap with box j (lower scoring box) 30 | float xx1, yy1, xx2, yy2; 31 | float w, h; 32 | float inter, ovr; 33 | 34 | long num_to_keep = 0; 35 | for (_i=0; _i < boxes_num; ++_i) { 36 | i = order_flat[_i]; 37 | if (suppressed_flat[i] == 1) { 38 | continue; 39 | } 40 | keep_out_flat[num_to_keep++] = i; 41 | ix1 = boxes_flat[i * boxes_dim]; 42 | iy1 = boxes_flat[i * boxes_dim + 1]; 43 | ix2 = boxes_flat[i * boxes_dim + 2]; 44 | iy2 = boxes_flat[i * boxes_dim + 3]; 45 | iarea = areas_flat[i]; 46 | for (_j = _i + 1; _j < boxes_num; ++_j) { 47 | j = order_flat[_j]; 48 | if (suppressed_flat[j] == 1) { 49 | continue; 50 | } 51 | xx1 = fmaxf(ix1, boxes_flat[j * boxes_dim]); 52 | yy1 = fmaxf(iy1, boxes_flat[j * boxes_dim + 1]); 53 | xx2 = fminf(ix2, boxes_flat[j * boxes_dim + 2]); 54 | yy2 = fminf(iy2, boxes_flat[j * boxes_dim + 3]); 55 | w = fmaxf(0.0, xx2 - xx1 + 1); 56 | h = fmaxf(0.0, yy2 - yy1 + 1); 57 | inter = w * h; 58 | ovr = inter / (iarea + areas_flat[j] - inter); 59 | if (ovr >= nms_overlap_thresh) { 60 | suppressed_flat[j] = 1; 61 | } 62 | } 63 | } 64 | 65 | long *num_out_flat = THLongTensor_data(num_out); 66 | *num_out_flat = num_to_keep; 67 | THByteTensor_free(suppressed); 68 | return 1; 69 | } -------------------------------------------------------------------------------- /datasets/data_parallel.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import torch 3 | from torch.nn import DataParallel 4 | from torch.autograd import Variable 5 | from torch.nn.parallel._functions import Scatter, Gather 6 | 7 | 8 | class ScatterList(list): 9 | pass 10 | 11 | 12 | class ConstList(list): 13 | pass 14 | 15 | 16 | class ListDataParallel(DataParallel): 17 | def scatter(self, inputs, kwargs, device_ids): 18 | return pose_scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) 19 | 20 | def gather(self, outputs, output_device): 21 | return pose_gather(outputs, output_device, dim=self.dim) 22 | 23 | 24 | def scatter(inputs, target_gpus, dim=0): 25 | r""" 26 | Slices variables into approximately equal chunks and 27 | distributes them across given GPUs. Duplicates 28 | references to objects that are not variables. Does not 29 | support Tensors. 30 | """ 31 | def scatter_map(obj): 32 | if isinstance(obj, Variable): 33 | return Scatter.apply(target_gpus, None, dim, obj) 34 | assert not torch.is_tensor(obj), "Tensors not supported in scatter." 35 | if isinstance(obj, ScatterList): 36 | assert len(obj) == len(target_gpus) 37 | return [obj[i] for i in range(len(target_gpus))] 38 | if isinstance(obj, tuple) and len(obj) > 0: 39 | return list(zip(*map(scatter_map, obj))) 40 | if isinstance(obj, list) and len(obj) > 0: 41 | return list(map(list, zip(*map(scatter_map, obj)))) 42 | if isinstance(obj, dict) and len(obj) > 0: 43 | return list(map(type(obj), zip(*map(scatter_map, obj.items())))) 44 | return [obj for targets in target_gpus] 45 | 46 | return scatter_map(inputs) 47 | 48 | 49 | def pose_scatter_kwargs(inputs, kwargs, target_gpus, dim=0): 50 | r"""Scatter with support for kwargs dictionary""" 51 | inputs = scatter(inputs, target_gpus, dim) if inputs else [] 52 | kwargs = scatter(kwargs, target_gpus, dim) if kwargs else [] 53 | if len(inputs) < len(kwargs): 54 | inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) 55 | elif len(kwargs) < len(inputs): 56 | kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) 57 | inputs = tuple(inputs) 58 | kwargs = tuple(kwargs) 59 | return inputs, kwargs 60 | 61 | 62 | def pose_gather(outputs, target_device, dim=0): 63 | r""" 64 | Gathers variables from different GPUs on a specified device 65 | (-1 means the CPU). 66 | """ 67 | def gather_map(outputs): 68 | if isinstance(outputs, Variable): 69 | if target_device == -1: 70 | return outputs.cpu() 71 | return outputs.cuda(target_device) 72 | 73 | out = outputs[0] 74 | if isinstance(out, Variable): 75 | return Gather.apply(target_device, dim, *outputs) 76 | if out is None: 77 | return None 78 | 79 | if isinstance(out, str): 80 | return out 81 | if isinstance(out, ConstList): 82 | return out 83 | if isinstance(out, ScatterList): 84 | return tuple(map(gather_map, itertools.chain(*outputs))) 85 | 86 | return type(out)(map(gather_map, zip(*outputs))) 87 | return gather_map(outputs) 88 | -------------------------------------------------------------------------------- /lib/nms/src/cuda/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #include 12 | #include 13 | #include 14 | #include "nms_kernel.h" 15 | 16 | __device__ inline float devIoU(float const * const a, float const * const b) { 17 | float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]); 18 | float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]); 19 | float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f); 20 | float interS = width * height; 21 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 22 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 23 | return interS / (Sa + Sb - interS); 24 | } 25 | 26 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 27 | const float *dev_boxes, unsigned long long *dev_mask) { 28 | const int row_start = blockIdx.y; 29 | const int col_start = blockIdx.x; 30 | 31 | // if (row_start > col_start) return; 32 | 33 | const int row_size = 34 | fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 35 | const int col_size = 36 | fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 37 | 38 | __shared__ float block_boxes[threadsPerBlock * 5]; 39 | if (threadIdx.x < col_size) { 40 | block_boxes[threadIdx.x * 5 + 0] = 41 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 42 | block_boxes[threadIdx.x * 5 + 1] = 43 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 44 | block_boxes[threadIdx.x * 5 + 2] = 45 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 46 | block_boxes[threadIdx.x * 5 + 3] = 47 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 48 | block_boxes[threadIdx.x * 5 + 4] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 50 | } 51 | __syncthreads(); 52 | 53 | if (threadIdx.x < row_size) { 54 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 55 | const float *cur_box = dev_boxes + cur_box_idx * 5; 56 | int i = 0; 57 | unsigned long long t = 0; 58 | int start = 0; 59 | if (row_start == col_start) { 60 | start = threadIdx.x + 1; 61 | } 62 | for (i = start; i < col_size; i++) { 63 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 64 | t |= 1ULL << i; 65 | } 66 | } 67 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 68 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 69 | } 70 | } 71 | 72 | 73 | void _nms(int boxes_num, float * boxes_dev, 74 | unsigned long long * mask_dev, float nms_overlap_thresh) { 75 | 76 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 77 | DIVUP(boxes_num, threadsPerBlock)); 78 | dim3 threads(threadsPerBlock); 79 | nms_kernel<<>>(boxes_num, 80 | nms_overlap_thresh, 81 | boxes_dev, 82 | mask_dev); 83 | } 84 | 85 | #ifdef __cplusplus 86 | } 87 | #endif 88 | -------------------------------------------------------------------------------- /training/multipose_prn_train.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | root_path = os.path.realpath(__file__).split('/training/multipose_prn_train.py')[0] 3 | os.chdir(root_path) 4 | sys.path.append(root_path) 5 | 6 | import torch 7 | import torch.backends.cudnn as cudnn 8 | from pycocotools.coco import COCO 9 | from torch.utils.data import DataLoader 10 | from training.trainer import Trainer 11 | from datasets.coco_data.prn_data_pipeline import PRN_CocoDataset 12 | from network.posenet import poseNet 13 | from training.batch_processor import batch_processor 14 | from torch.optim.lr_scheduler import ReduceLROnPlateau 15 | 16 | # Hyper-params 17 | coco_root = '/data/COCO/' 18 | backbone='resnet101' # 'resnet50' 19 | opt = 'adam' 20 | inp_size = 480 # input size 480*480 21 | feat_stride = 4 22 | node_count = 1024 # Hidden Layer Node Count 23 | coeff = 2 # Coefficient of bbox size 24 | threshold = 0.21 # BBOX threshold 25 | num_of_keypoints = 3 # Minimum number of keypoints for each bbox in training 26 | 27 | # model parameters in MultiPoseNet 28 | prn_para = ['prn'] 29 | 30 | ##################################################################### 31 | # Set Training parameters 32 | params = Trainer.TrainParams() 33 | params.exp_name = 'prn_subnet/' 34 | params.subnet_name = 'prn_subnet' 35 | params.save_dir = './extra/models/{}'.format(params.exp_name) 36 | params.ckpt = './demo/models/ckpt_baseline_resnet101.h5' 37 | params.ignore_opt_state = True 38 | 39 | params.max_epoch = 40 40 | params.init_lr = 1.0e-3 41 | params.lr_decay = 0.9 42 | 43 | params.gpus = [0] 44 | params.batch_size = 8 * len(params.gpus) 45 | params.val_nbatch_end_epoch = 2000 46 | 47 | params.print_freq = 1000 48 | 49 | # model 50 | if backbone == 'resnet101': 51 | model = poseNet(101, prn_node_count=node_count, prn_coeff=coeff) 52 | elif backbone == 'resnet50': 53 | model = poseNet(50, prn_node_count=node_count, prn_coeff=coeff) 54 | 55 | # Train Key-point Subnet, Fix the weights in detection subnet (RetinaNet) 56 | for name, module in model.named_children(): 57 | if name not in prn_para: 58 | for para in module.parameters(): 59 | para.requires_grad = False 60 | 61 | print("Loading dataset...") 62 | # load training data 63 | coco_train = COCO(os.path.join(coco_root, 'annotations/person_keypoints_train2017.json')) 64 | train_data = DataLoader(dataset=PRN_CocoDataset( 65 | coco_train, num_of_keypoints=num_of_keypoints, coeff=coeff, threshold=threshold, 66 | inp_size=inp_size, feat_stride=feat_stride),batch_size=params.batch_size, num_workers=4, shuffle=True) 67 | print('train dataset len: {}'.format(len(train_data.dataset))) 68 | 69 | # load validation data 70 | valid_data = None 71 | if params.val_nbatch > 0: 72 | coco_val = COCO(os.path.join(coco_root, 'annotations/person_keypoints_val2017.json')) 73 | valid_data = DataLoader(dataset=PRN_CocoDataset( 74 | coco_val, num_of_keypoints=num_of_keypoints, coeff=coeff, threshold=threshold, 75 | inp_size=inp_size, feat_stride=feat_stride), batch_size=params.batch_size, num_workers=4, shuffle=True) 76 | print('val dataset len: {}'.format(len(valid_data.dataset))) 77 | 78 | trainable_vars = [param for param in model.parameters() if param.requires_grad] 79 | if opt == 'adam': 80 | print("Training with adam") 81 | params.optimizer = torch.optim.Adam( 82 | trainable_vars, lr=params.init_lr) 83 | 84 | cudnn.benchmark = True 85 | params.lr_scheduler = ReduceLROnPlateau(params.optimizer, 'min', factor=params.lr_decay, patience=2, verbose=True) 86 | trainer = Trainer(model, params, batch_processor, train_data, valid_data) 87 | trainer.train() 88 | -------------------------------------------------------------------------------- /datasets/coco_data/prn_gaussian.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from skimage.filters import gaussian 3 | 4 | sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89] * 100) 5 | 6 | 7 | def multivariate_gaussian(N, sigma=2): 8 | t = 4 9 | X = np.linspace(-t, t, N) 10 | Y = np.linspace(-t, t, N) 11 | X, Y = np.meshgrid(X, Y) 12 | pos = np.empty(X.shape + (2,)) 13 | pos[:, :, 0] = X 14 | pos[:, :, 1] = Y 15 | mu = np.array([0., 0.]) 16 | sigma = np.array([[sigma, 0], [0, sigma]]) 17 | n = mu.shape[0] 18 | Sigma_det = np.linalg.det(sigma) 19 | Sigma_inv = np.linalg.inv(sigma) 20 | N = np.sqrt((2 * np.pi) ** n * Sigma_det) 21 | fac = np.einsum('...k,kl,...l->...', pos - mu, Sigma_inv, pos - mu) 22 | return np.exp(-fac / 2) / N 23 | 24 | 25 | def crop_paste(img, c, N=13, sigma=2): 26 | Z = multivariate_gaussian(N, sigma) 27 | 28 | H = img.shape[1] 29 | W = img.shape[0] 30 | 31 | h = (Z.shape[0] - 1) / 2 32 | 33 | N = Z.shape[0] 34 | x1 = (c[0] - h) 35 | y1 = (c[1] - h) 36 | 37 | x2 = (c[0] + h) + 1 38 | y2 = (c[1] + h) + 1 39 | 40 | zx1 = 0 41 | zy1 = 0 42 | zx2 = N + 1 43 | zy2 = N + 1 44 | 45 | if x1 < 0: 46 | x1 = 0 47 | zx1 = 0 - (c[0] - h) 48 | 49 | if y1 < 0: 50 | y1 = 0 51 | zy1 = 0 - (c[1] - h) 52 | 53 | if x2 > W - 1: 54 | x2 = W - 1 55 | zx2 = x2 - x1 + 1 56 | x2 = W 57 | 58 | if y2 > H - 1: 59 | y2 = H - 1 60 | zy2 = y2 - y1 + 1 61 | y2 = H 62 | 63 | img[x1:x2, y1:y2] = np.maximum(Z[zx1:zx2, zy1:zy2], img[x1:x2, y1:y2]) 64 | 65 | 66 | ''' 67 | def gaussian(img, N = 13, sigma=2): 68 | cs = np.where(img==1) 69 | img = np.zeros_like(img) 70 | for c in zip(cs[0], cs[1]): 71 | crop_paste(img, c, N, sigma) 72 | return img 73 | ''' 74 | 75 | 76 | def gaussian_multi_input_mp(inp): 77 | ''' 78 | :param inp: Multi person ground truth heatmap input (17 ch) Each channel contains multiple joints. 79 | :return: out: Gaussian augmented output. Values are between 0. and 1. 80 | ''' 81 | 82 | h, w, ch = inp.shape 83 | out = np.zeros_like(inp) 84 | for i in range(ch): 85 | layer = inp[:, :, i] 86 | ind = np.argwhere(layer == 1) 87 | b = [] 88 | if len(ind) > 0: 89 | for j in ind: 90 | t = np.zeros((h, w)) 91 | t[j[0], j[1]] = 1 92 | t = gaussian(t, sigma=2, mode='constant') 93 | t = t * (1 / t.max()) 94 | b.append(t) 95 | 96 | out[:, :, i] = np.maximum.reduce(b) 97 | else: 98 | out[:, :, i] = np.zeros((h, w)) 99 | return out 100 | 101 | 102 | def gaussian_multi_output(inp): 103 | ''' 104 | :param inp: Single person ground truth heatmap input (17 ch) Each channel contains one joint. 105 | :return: out: Gaussian augmented output. Values are between 0. and 1. 106 | ''' 107 | h, w, ch = inp.shape 108 | out = np.zeros_like(inp) 109 | for i in range(ch): 110 | j = np.argwhere(inp[:, :, i] == 1) 111 | if len(j) == 0: 112 | out[:, :, i] = np.zeros((h, w)) 113 | continue 114 | j = j[0] 115 | t = np.zeros((h, w)) 116 | t[j[0], j[1]] = 1 117 | t = gaussian(t, sigma=5, mode='constant') 118 | out[:, :, i] = t * (1 / t.max()) 119 | return out 120 | 121 | 122 | def crop(img, c, N=13): 123 | H = img.shape[1] 124 | W = img.shape[0] 125 | 126 | h = (N - 1) / 2 127 | 128 | x1 = int(c[0] - h) 129 | y1 = int(c[1] - h) 130 | 131 | x2 = int(c[0] + h) + 1 132 | y2 = int(c[1] + h) + 1 133 | 134 | if x1 < 0: 135 | x1 = 0 136 | 137 | if y1 < 0: 138 | y1 = 0 139 | 140 | if x2 > W - 1: 141 | x2 = W 142 | 143 | if y2 > H - 1: 144 | y2 = H 145 | 146 | return img[x1:x2, y1:y2] 147 | 148 | -------------------------------------------------------------------------------- /training/multipose_detection_train.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | root_path = os.path.realpath(__file__).split('/training/multipose_detection_train.py')[0] 3 | os.chdir(root_path) 4 | sys.path.append(root_path) 5 | 6 | import torch 7 | from torch.optim.lr_scheduler import ReduceLROnPlateau 8 | 9 | from training.batch_processor import batch_processor 10 | from network.posenet import poseNet 11 | from datasets.coco import get_loader 12 | from training.trainer import Trainer 13 | 14 | # Hyper-params 15 | coco_root = '/data/COCO/' 16 | backbone = 'resnet101' # 'resnet50' 17 | opt = 'adam' 18 | weight_decay = 0.000 19 | inp_size = 608 # input size 608*608 20 | feat_stride = 4 21 | 22 | # model parameters in MultiPoseNet 23 | fpn_resnet_para = ['conv1', 'bn1', 'layer1', 'layer2', 'layer3', 'layer4'] 24 | fpn_retinanet_para = ['conv6', 'conv7', 'latlayer1', 'latlayer2', 25 | 'latlayer3', 'toplayer0', 'toplayer1', 'toplayer2'] 26 | fpn_keypoint_para = ['toplayer', 'flatlayer1', 'flatlayer2', 27 | 'flatlayer3', 'smooth1', 'smooth2', 'smooth3'] 28 | retinanet_para = ['regressionModel', 'classificationModel'] 29 | keypoint_para = ['convt1', 'convt2', 'convt3', 'convt4', 'convs1', 'convs2', 'convs3', 'convs4', 'upsample1', 30 | 'upsample2', 'upsample3', 'conv2', 'convfin', 'convfin_k2', 'convfin_k3', 'convfin_k4', 'convfin_k5'] 31 | prn_para = ['prn'] 32 | 33 | ##################################################################### 34 | # train detection subnet 35 | data_dir = coco_root+'images/' 36 | mask_dir = coco_root 37 | json_path = coco_root+'COCO.json' 38 | 39 | # Set Training parameters 40 | params = Trainer.TrainParams() 41 | params.exp_name = 'res101_detection_subnet/' 42 | params.subnet_name = 'detection_subnet' 43 | params.save_dir = './extra/models/{}'.format(params.exp_name) 44 | params.ckpt = './demo/models/ckpt_baseline_resnet101.h5' 45 | params.ignore_opt_state = True 46 | 47 | params.max_epoch = 50 48 | params.init_lr = 1.e-5 49 | params.lr_decay = 0.1 50 | 51 | params.gpus = [0] 52 | params.batch_size = 25 * len(params.gpus) 53 | params.val_nbatch_end_epoch = 2000 54 | 55 | params.print_freq = 50 56 | 57 | # model 58 | if backbone == 'resnet101': 59 | model = poseNet(101) 60 | elif backbone == 'resnet50': 61 | model = poseNet(50) 62 | 63 | # Train detection subnet (RetinaNet), Fix the weights in backbone (ResNet) ans Key-point Subnet 64 | for name, module in model.fpn.named_children(): 65 | if name in fpn_resnet_para: 66 | for para in module.parameters(): 67 | para.requires_grad = False 68 | for name, module in model.fpn.named_children(): 69 | if name in fpn_keypoint_para: 70 | for para in module.parameters(): 71 | para.requires_grad = False 72 | for name, module in model.named_children(): 73 | if name in keypoint_para: 74 | for para in module.parameters(): 75 | para.requires_grad = False 76 | for name, module in model.named_children(): 77 | if name in prn_para: 78 | for para in module.parameters(): 79 | para.requires_grad = False 80 | 81 | print("Loading dataset...") 82 | # load training data 83 | train_data = get_loader(json_path, data_dir, mask_dir, inp_size, feat_stride, 84 | preprocess='resnet', batch_size=params.batch_size, training=True, 85 | shuffle=True, num_workers=8, subnet=params.subnet_name) 86 | print('train dataset len: {}'.format(len(train_data.dataset))) 87 | 88 | # load validation data 89 | valid_data = get_loader(json_path, data_dir, mask_dir, inp_size, feat_stride, 90 | preprocess='resnet', batch_size=params.batch_size-10*len(params.gpus), training=False, 91 | shuffle=False, num_workers=8, subnet=params.subnet_name) 92 | print('val dataset len: {}'.format(len(valid_data.dataset))) 93 | 94 | trainable_vars = [param for param in model.parameters() if param.requires_grad] 95 | if opt == 'adam': 96 | print("Training with adam") 97 | params.optimizer = torch.optim.Adam( 98 | trainable_vars, lr=params.init_lr, weight_decay=weight_decay) 99 | 100 | params.lr_scheduler = ReduceLROnPlateau( 101 | params.optimizer, 'min', factor=params.lr_decay, patience=3, verbose=True) 102 | trainer = Trainer(model, params, batch_processor, train_data, valid_data) 103 | trainer.train() 104 | -------------------------------------------------------------------------------- /network/net_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.nn as nn 4 | import numpy as np 5 | from copy import deepcopy 6 | try: 7 | import cPickle as pickle 8 | except ImportError: 9 | import pickle 10 | from lib.utils.log import logger 11 | 12 | def set_optimizer_state_devices(state, device_id=None): 13 | """ 14 | set state in optimizer to a device. move to cpu if device_id==None 15 | :param state: optimizer.state 16 | :param device_id: None or a number 17 | :return: 18 | """ 19 | for k, v in state.items(): 20 | for k2 in v.keys(): 21 | if hasattr(v[k2], 'cuda'): 22 | if device_id is None: 23 | v[k2] = v[k2].cpu() 24 | else: 25 | v[k2] = v[k2].cuda(device_id) 26 | 27 | return state 28 | 29 | 30 | def save_net(fname, net, epoch=-1, optimizers=None, rm_prev_opt=False, max_n_ckpts=-1): 31 | import h5py 32 | with h5py.File(fname, mode='w') as h5f: 33 | for k, v in net.state_dict().items(): 34 | h5f.create_dataset(k, data=v.cpu().numpy()) 35 | h5f.attrs['epoch'] = epoch 36 | 37 | if optimizers is not None: 38 | state_dicts = [] 39 | for optimizer in optimizers: 40 | state_dict = deepcopy(optimizer.state_dict()) 41 | state_dict['state'] = set_optimizer_state_devices(state_dict['state'], device_id=None) 42 | state_dicts.append(state_dict) 43 | 44 | state_file = fname + '.optimizer_state.pk' 45 | with open(state_file, 'wb') as f: 46 | pickle.dump(state_dicts, f) 47 | 48 | # remove 49 | if rm_prev_opt: 50 | root = os.path.split(fname)[0] 51 | for filename in os.listdir(root): 52 | filename = os.path.join(root, filename) 53 | if filename.endswith('.optimizer_state.pk') and filename != state_file: 54 | logger.info(('Remove {}'.format(filename))) 55 | os.remove(filename) 56 | 57 | # remove ckpt 58 | if max_n_ckpts > 0: 59 | root = os.path.split(fname)[0] 60 | ckpts = [fname for fname in os.listdir(root) if os.path.splitext(fname)[-1] == '.h5'] 61 | ckpts = sorted(ckpts, key=lambda name: int(os.path.splitext(name)[0].split('_')[-1])) 62 | if len(ckpts) > max_n_ckpts: 63 | for ckpt in ckpts[0:-max_n_ckpts]: 64 | filename = os.path.join(root, ckpt) 65 | logger.info('Remove {}'.format(filename)) 66 | os.remove(filename) 67 | 68 | 69 | def load_net(fname, net, prefix='', load_state_dict=False): 70 | import h5py 71 | with h5py.File(fname, mode='r') as h5f: 72 | h5f_is_module = True 73 | for k in h5f.keys(): 74 | if not str(k).startswith('module.'): 75 | h5f_is_module = False 76 | break 77 | if prefix == '' and not isinstance(net, nn.DataParallel) and h5f_is_module: 78 | prefix = 'module.' 79 | 80 | for k, v in net.state_dict().items(): 81 | k = prefix + k 82 | if k in h5f: 83 | param = torch.from_numpy(np.asarray(h5f[k])) 84 | if v.size() != param.size(): 85 | logger.warning('Inconsistent shape: {}, {}'.format(v.size(), param.size())) 86 | else: 87 | v.copy_(param) 88 | else: 89 | logger.warning('No layer: {}'.format(k)) 90 | 91 | epoch = h5f.attrs['epoch'] if 'epoch' in h5f.attrs else -1 92 | 93 | if not load_state_dict: 94 | if 'learning_rates' in h5f.attrs: 95 | lr = h5f.attrs['learning_rates'] 96 | else: 97 | lr = h5f.attrs.get('lr', -1) 98 | lr = np.asarray([lr] if lr > 0 else [], dtype=np.float) 99 | 100 | return epoch, lr 101 | 102 | state_file = fname + '.optimizer_state.pk' 103 | if os.path.isfile(state_file): 104 | with open(state_file, 'rb') as f: 105 | state_dicts = pickle.load(f) 106 | if not isinstance(state_dicts, list): 107 | state_dicts = [state_dicts] 108 | else: 109 | state_dicts = None 110 | return epoch, state_dicts 111 | 112 | 113 | -------------------------------------------------------------------------------- /network/anchors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | class Anchors(nn.Module): 7 | def __init__(self, pyramid_levels=None, strides=None, sizes=None, ratios=None, scales=None): 8 | super(Anchors, self).__init__() 9 | 10 | if pyramid_levels is None: 11 | self.pyramid_levels = [3, 4, 5, 6, 7] 12 | if strides is None: 13 | self.strides = [2 ** x for x in self.pyramid_levels] 14 | if sizes is None: 15 | self.sizes = [2 ** (x + 2) for x in self.pyramid_levels] 16 | if ratios is None: 17 | self.ratios = np.array([0.5, 1, 2]) 18 | if scales is None: 19 | self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]) 20 | 21 | def forward(self, image): 22 | 23 | image_shape = image.shape[2:] 24 | image_shape = np.array(image_shape) 25 | image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels] 26 | 27 | # compute anchors over all pyramid levels 28 | all_anchors = np.zeros((0, 4)).astype(np.float32) 29 | 30 | for idx, p in enumerate(self.pyramid_levels): 31 | anchors = generate_anchors(base_size=self.sizes[idx], ratios=self.ratios, scales=self.scales) 32 | shifted_anchors = shift(image_shapes[idx], self.strides[idx], anchors) 33 | all_anchors = np.append(all_anchors, shifted_anchors, axis=0) 34 | 35 | all_anchors = np.expand_dims(all_anchors, axis=0) 36 | 37 | return torch.from_numpy(all_anchors.astype(np.float32)).cuda() 38 | 39 | def generate_anchors(base_size=16, ratios=None, scales=None): 40 | """ 41 | Generate anchor (reference) windows by enumerating aspect ratios X 42 | scales w.r.t. a reference window. 43 | """ 44 | 45 | if ratios is None: 46 | ratios = np.array([0.5, 1, 2]) 47 | 48 | if scales is None: 49 | scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]) 50 | 51 | num_anchors = len(ratios) * len(scales) 52 | 53 | # initialize output anchors 54 | anchors = np.zeros((num_anchors, 4)) 55 | 56 | # scale base_size 57 | anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T 58 | 59 | # compute areas of anchors 60 | areas = anchors[:, 2] * anchors[:, 3] 61 | 62 | # correct for ratios 63 | anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales))) 64 | anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales)) 65 | 66 | # transform from (x_ctr, y_ctr, w, h) -> (x1, y1, x2, y2) 67 | anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T 68 | anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T 69 | 70 | return anchors 71 | 72 | def compute_shape(image_shape, pyramid_levels): 73 | """Compute shapes based on pyramid levels. 74 | 75 | :param image_shape: 76 | :param pyramid_levels: 77 | :return: 78 | """ 79 | image_shape = np.array(image_shape[:2]) 80 | image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels] 81 | return image_shapes 82 | 83 | 84 | def anchors_for_shape( 85 | image_shape, 86 | pyramid_levels=None, 87 | ratios=None, 88 | scales=None, 89 | strides=None, 90 | sizes=None, 91 | shapes_callback=None, 92 | ): 93 | 94 | image_shapes = compute_shape(image_shape, pyramid_levels) 95 | 96 | # compute anchors over all pyramid levels 97 | all_anchors = np.zeros((0, 4)) 98 | for idx, p in enumerate(pyramid_levels): 99 | anchors = generate_anchors(base_size=sizes[idx], ratios=ratios, scales=scales) 100 | shifted_anchors = shift(image_shapes[idx], strides[idx], anchors) 101 | all_anchors = np.append(all_anchors, shifted_anchors, axis=0) 102 | 103 | return all_anchors 104 | 105 | 106 | def shift(shape, stride, anchors): 107 | shift_x = (np.arange(0, shape[1]) + 0.5) * stride 108 | shift_y = (np.arange(0, shape[0]) + 0.5) * stride 109 | 110 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 111 | 112 | shifts = np.vstack(( 113 | shift_x.ravel(), shift_y.ravel(), 114 | shift_x.ravel(), shift_y.ravel() 115 | )).transpose() 116 | 117 | # add A anchors (1, A, 4) to 118 | # cell K shifts (K, 1, 4) to get 119 | # shift anchors (K, A, 4) 120 | # reshape to (K*A, 4) shifted anchors 121 | A = anchors.shape[0] 122 | K = shifts.shape[0] 123 | all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) 124 | all_anchors = all_anchors.reshape((K * A, 4)) 125 | 126 | return all_anchors 127 | 128 | -------------------------------------------------------------------------------- /training/multipose_keypoint_train.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | root_path = os.path.realpath(__file__).split('/training/multipose_keypoint_train.py')[0] 3 | os.chdir(root_path) 4 | sys.path.append(root_path) 5 | 6 | import torch 7 | import torch.utils.model_zoo as model_zoo 8 | from torch.optim.lr_scheduler import ReduceLROnPlateau 9 | 10 | from training.batch_processor import batch_processor 11 | from network.posenet import poseNet 12 | from datasets.coco import get_loader 13 | from training.trainer import Trainer 14 | 15 | # Hyper-params 16 | coco_root = '/data/COCO/' 17 | backbone = 'resnet101' # 'resnet50' 18 | opt = 'adam' 19 | weight_decay = 0.000 20 | inp_size = 480 # input size 480*480 21 | feat_stride = 4 22 | 23 | model_urls = { 24 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 25 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 26 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 27 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 28 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 29 | } 30 | 31 | # model parameters in MultiPoseNet 32 | fpn_resnet_para = ['conv1', 'bn1', 'layer1', 'layer2', 'layer3', 'layer4'] 33 | fpn_retinanet_para = ['conv6', 'conv7', 'latlayer1', 'latlayer2', 34 | 'latlayer3', 'toplayer0', 'toplayer1', 'toplayer2'] 35 | fpn_keypoint_para = ['toplayer', 'flatlayer1', 'flatlayer2', 36 | 'flatlayer3', 'smooth1', 'smooth2', 'smooth3'] 37 | retinanet_para = ['regressionModel', 'classificationModel'] 38 | keypoint_para = ['convt1', 'convt2', 'convt3', 'convt4', 'convs1', 'convs2', 'convs3', 'convs4', 'upsample1', 39 | 'upsample2', 'upsample3', 'conv2', 'convfin', 'convfin_k2', 'convfin_k3', 'convfin_k4', 'convfin_k5'] 40 | prn_para = ['prn'] 41 | 42 | ##################################################################### 43 | # train keypoint subnet 44 | data_dir = coco_root+'images/' 45 | mask_dir = coco_root 46 | json_path = coco_root+'COCO.json' 47 | 48 | # Set Training parameters 49 | params = Trainer.TrainParams() 50 | params.exp_name = 'res101_keypoint_subnet/' 51 | params.subnet_name = 'keypoint_subnet' 52 | params.save_dir = './extra/models/{}'.format(params.exp_name) 53 | params.ckpt = None # None checkpoint file to load 54 | params.ignore_opt_state = False 55 | 56 | params.max_epoch = 80 57 | params.init_lr = 1.e-4 58 | params.lr_decay = 0.1 59 | 60 | params.gpus = [0] 61 | params.batch_size = 6 * len(params.gpus) 62 | params.val_nbatch_end_epoch = 2000 63 | 64 | params.print_freq = 50 65 | 66 | # model 67 | if backbone == 'resnet101': 68 | model = poseNet(101) 69 | elif backbone == 'resnet50': 70 | model = poseNet(50) 71 | 72 | # load pretrained 73 | if params.ckpt is None: 74 | model.fpn.load_state_dict(model_zoo.load_url( 75 | model_urls[backbone]), strict=False) 76 | 77 | # Train Key-point Subnet, Fix the weights in detection subnet (RetinaNet) 78 | for name, module in model.fpn.named_children(): 79 | if name in fpn_retinanet_para: 80 | for para in module.parameters(): 81 | para.requires_grad = False 82 | for name, module in model.named_children(): 83 | if name in retinanet_para: 84 | for para in module.parameters(): 85 | para.requires_grad = False 86 | for name, module in model.named_children(): 87 | if name in prn_para: 88 | for para in module.parameters(): 89 | para.requires_grad = False 90 | 91 | print("Loading dataset...") 92 | # load training data 93 | train_data = get_loader(json_path, data_dir, mask_dir, inp_size, feat_stride, 94 | preprocess='resnet', batch_size=params.batch_size, training=True, 95 | shuffle=True, num_workers=8, subnet=params.subnet_name) 96 | print('train dataset len: {}'.format(len(train_data.dataset))) 97 | 98 | # load validation data 99 | valid_data = None 100 | if params.val_nbatch > 0: 101 | valid_data = get_loader(json_path, data_dir, mask_dir, inp_size, feat_stride, 102 | preprocess='resnet', batch_size=params.batch_size-3*len(params.gpus), training=False, 103 | shuffle=False, num_workers=8, subnet=params.subnet_name) 104 | print('val dataset len: {}'.format(len(valid_data.dataset))) 105 | 106 | trainable_vars = [param for param in model.parameters() if param.requires_grad] 107 | if opt == 'adam': 108 | print("Training with adam") 109 | params.optimizer = torch.optim.Adam( 110 | trainable_vars, lr=params.init_lr, weight_decay=weight_decay) 111 | 112 | params.lr_scheduler = ReduceLROnPlateau( 113 | params.optimizer, 'min', factor=params.lr_decay, patience=3, verbose=True) 114 | trainer = Trainer(model, params, batch_processor, train_data, valid_data) 115 | trainer.train() 116 | -------------------------------------------------------------------------------- /multipose_environment.yaml: -------------------------------------------------------------------------------- 1 | name: MultiPose 2 | channels: 3 | - defaults 4 | dependencies: 5 | - cuda90=1.0=h6433d27_0 6 | - pytorch=0.4.0=py36_cuda9.0.176_cudnn7.1.2_1 7 | - torchvision=0.2.1=py36_1 8 | - backcall=0.1.0=py36_0 9 | - blas=1.0=mkl 10 | - bleach=2.1.4=py36_0 11 | - bokeh=0.12.16=py36_0 12 | - bzip2=1.0.6=h14c3975_5 13 | - ca-certificates=2018.03.07=0 14 | - cairo=1.14.12=h8948797_3 15 | - certifi=2018.8.24=py36_1 16 | - cffi=1.11.5=py36h9745a5d_0 17 | - click=6.7=py36h5253387_0 18 | - cloudpickle=0.5.3=py36_0 19 | - cudatoolkit=9.0=h13b8566_0 20 | - cycler=0.10.0=py36h93f1223_0 21 | - cython=0.28.5=py36hf484d3e_0 22 | - cytoolz=0.9.0.1=py36h14c3975_0 23 | - dask=0.17.5=py36_0 24 | - dask-core=0.17.5=py36_0 25 | - dbus=1.13.2=h714fa37_1 26 | - decorator=4.3.0=py36_0 27 | - distributed=1.21.8=py36_0 28 | - entrypoints=0.2.3=py36_2 29 | - expat=2.2.5=he0dffb1_0 30 | - ffmpeg=4.0=hcdf2ecd_0 31 | - fontconfig=2.13.0=h9420a91_0 32 | - freeglut=3.0.0=hf484d3e_5 33 | - freetype=2.9.1=h8a8886c_0 34 | - glib=2.56.1=h000015b_0 35 | - gmp=6.1.2=h6c8ec71_1 36 | - graphite2=1.3.11=h16798f4_2 37 | - gst-plugins-base=1.14.0=hbbd80ab_1 38 | - gstreamer=1.14.0=hb453b48_1 39 | - h5py=2.8.0=py36h989c5e5_3 40 | - harfbuzz=1.8.4=hec2c2bc_0 41 | - hdf5=1.10.2=hba1933b_1 42 | - heapdict=1.0.0=py36_2 43 | - html5lib=1.0.1=py36_0 44 | - icu=58.2=h9c2bf20_1 45 | - imageio=2.3.0=py36_0 46 | - intel-openmp=2018.0.0=8 47 | - ipykernel=4.9.0=py36_0 48 | - ipython=6.5.0=py36_0 49 | - ipython_genutils=0.2.0=py36_0 50 | - ipywidgets=7.4.1=py36_0 51 | - jasper=2.0.14=h07fcdf6_1 52 | - jedi=0.12.1=py36_0 53 | - jinja2=2.10=py36ha16c418_0 54 | - jpeg=9b=h024ee3a_2 55 | - jsonschema=2.6.0=py36_0 56 | - jupyter=1.0.0=py36_6 57 | - jupyter_client=5.2.3=py36_0 58 | - jupyter_console=5.2.0=py36_1 59 | - jupyter_core=4.4.0=py36_0 60 | - kiwisolver=1.0.1=py36h764f252_0 61 | - libedit=3.1.20170329=h6b74fdf_2 62 | - libffi=3.2.1=hd88cf55_4 63 | - libgcc-ng=8.2.0=hdf63c60_1 64 | - libgfortran-ng=7.2.0=hdf63c60_3 65 | - libglu=9.0.0=hf484d3e_1 66 | - libopencv=3.4.2=h8fa1ad8_0 67 | - libopus=1.2.1=hb9ed12e_0 68 | - libpng=1.6.34=hb9fc6fc_0 69 | - libprotobuf=3.5.2=h6f1eeef_0 70 | - libsodium=1.0.16=h1bed415_0 71 | - libstdcxx-ng=7.2.0=hdf63c60_3 72 | - libtiff=4.0.9=he85c1e1_1 73 | - libuuid=1.0.3=h1bed415_2 74 | - libvpx=1.7.0=h439df22_0 75 | - libxcb=1.13=h1bed415_1 76 | - libxml2=2.9.8=h26e45fe_1 77 | - locket=0.2.0=py36h787c0ad_1 78 | - markupsafe=1.0=py36hd9260cd_1 79 | - matplotlib=2.2.3=py36hb69df0a_0 80 | - mistune=0.8.3=py36h14c3975_1 81 | - mkl=2018.0.2=1 82 | - mkl_fft=1.0.1=py36h3010b51_0 83 | - mkl_random=1.0.1=py36h629b387_0 84 | - msgpack-python=0.5.6=py36h6bb024c_0 85 | - nbconvert=5.3.1=py36_0 86 | - nbformat=4.4.0=py36_0 87 | - ncurses=6.1=hf484d3e_0 88 | - networkx=2.1=py36_0 89 | - ninja=1.8.2=py36h6bb024c_1 90 | - notebook=5.5.0=py36_0 91 | - numpy=1.14.3=py36hcd700cb_1 92 | - numpy-base=1.14.3=py36h9be14a7_1 93 | - olefile=0.45.1=py36_0 94 | - opencv=3.4.2=py36h6fd60c2_0 95 | - openssl=1.0.2p=h14c3975_0 96 | - packaging=17.1=py36_0 97 | - pandas=0.23.0=py36h637b7d7_0 98 | - pandoc=2.2.3.2=0 99 | - pandocfilters=1.4.2=py36_1 100 | - parso=0.3.1=py36_0 101 | - partd=0.3.8=py36h36fd896_0 102 | - pcre=8.42=h439df22_0 103 | - pexpect=4.6.0=py36_0 104 | - pickleshare=0.7.4=py36_0 105 | - pillow=5.2.0=py36heded4f4_0 106 | - pip=10.0.1=py36_0 107 | - pixman=0.34.0=hceecf20_3 108 | - progress=1.4=py36_0 109 | - prompt_toolkit=1.0.15=py36_0 110 | - psutil=5.4.5=py36h14c3975_0 111 | - ptyprocess=0.6.0=py36_0 112 | - py-opencv=3.4.2=py36h8fa1ad8_0 113 | - pycparser=2.18=py36hf9f622e_1 114 | - pygments=2.2.0=py36_0 115 | - pyparsing=2.2.0=py36hee85983_1 116 | - pyqt=5.9.2=py36h751905a_0 117 | - python=3.6.5=hc3d631a_2 118 | - python-dateutil=2.7.3=py36_0 119 | - pytz=2018.4=py36_0 120 | - pywavelets=0.5.2=py36he602eb0_0 121 | - pyyaml=3.12=py36hafb9ca4_1 122 | - pyzmq=17.1.2=py36h14c3975_0 123 | - qt=5.9.6=h52aff34_0 124 | - qtconsole=4.4.1=py36_0 125 | - readline=7.0=ha6073c6_4 126 | - scikit-image=0.13.1=py36h14c3975_1 127 | - scipy=1.1.0=py36hfc37229_0 128 | - send2trash=1.5.0=py36_0 129 | - setuptools=39.1.0=py36_0 130 | - simplegeneric=0.8.1=py36_2 131 | - sip=4.19.8=py36hf484d3e_0 132 | - six=1.11.0=py36h372c433_1 133 | - sortedcontainers=1.5.10=py36_0 134 | - sqlite=3.23.1=he433501_0 135 | - tblib=1.3.2=py36h34cf8b6_0 136 | - terminado=0.8.1=py36_1 137 | - testpath=0.3.1=py36_0 138 | - tk=8.6.7=hc745277_3 139 | - toolz=0.9.0=py36_0 140 | - tornado=5.0.2=py36_0 141 | - tqdm=4.23.3=py36_0 142 | - traitlets=4.3.2=py36_0 143 | - wcwidth=0.1.7=py36_0 144 | - webencodings=0.5.1=py36_1 145 | - wheel=0.31.1=py36_0 146 | - widgetsnbextension=3.4.1=py36_0 147 | - xz=5.2.4=h14c3975_4 148 | - yaml=0.1.7=had09818_2 149 | - zeromq=4.2.5=hf484d3e_1 150 | - zict=0.1.3=py36h3a3bf81_0 151 | - zlib=1.2.11=ha838bed_2 152 | prefix: [path/to/anaconda]/envs/MultiPose 153 | 154 | -------------------------------------------------------------------------------- /datasets/coco_data/prn_data_pipeline.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | from skimage.filters import gaussian 4 | from torch.utils.data import Dataset 5 | from datasets.coco_data.heatmap import putGaussianMaps 6 | 7 | params_transform = dict() 8 | params_transform['sigma'] = 7.0 9 | 10 | class PRN_CocoDataset(Dataset): 11 | def __init__(self, coco_train, num_of_keypoints, coeff, threshold, inp_size, feat_stride): 12 | self.coco_train = coco_train 13 | self.num_of_keypoints = num_of_keypoints 14 | self.anns = self.get_anns(self.coco_train) 15 | self.bbox_height = coeff * 28 16 | self.bbox_width = coeff * 18 17 | self.threshold = threshold 18 | 19 | params_transform['crop_size_x'] = inp_size/feat_stride 20 | params_transform['crop_size_y'] = inp_size/feat_stride 21 | params_transform['stride'] = 1 22 | 23 | def __len__(self): 24 | return len(self.anns) 25 | 26 | def __getitem__(self, item): 27 | ann_data = self.anns[item] 28 | 29 | input, label = self.get_data(ann_data, self.coco_train) 30 | 31 | return input, label 32 | 33 | def get_data(self, ann_data, coco): 34 | weights = np.zeros((self.bbox_height, self.bbox_width, 17)) 35 | output = np.zeros((self.bbox_height, self.bbox_width, 17)) 36 | 37 | bbox = ann_data['bbox'] 38 | x = int(bbox[0]) 39 | y = int(bbox[1]) 40 | w = float(bbox[2]) 41 | h = float(bbox[3]) 42 | 43 | x_scale = float(self.bbox_width) / math.ceil(w) 44 | y_scale = float(self.bbox_height) / math.ceil(h) 45 | 46 | kpx = ann_data['keypoints'][0::3] 47 | kpy = ann_data['keypoints'][1::3] 48 | kpv = ann_data['keypoints'][2::3] 49 | 50 | 51 | for j in range(17): 52 | if kpv[j] > 0: 53 | x0 = int((kpx[j] - x) * x_scale) 54 | y0 = int((kpy[j] - y) * y_scale) 55 | 56 | if x0 >= self.bbox_width and y0 >= self.bbox_height: 57 | output[self.bbox_height - 1, self.bbox_width - 1, j] = 1 58 | elif x0 >= self.bbox_width: 59 | output[y0, self.bbox_width - 1, j] = 1 60 | elif y0 >= self.bbox_height: 61 | try: 62 | output[self.bbox_height - 1, x0, j] = 1 63 | except: 64 | output[self.bbox_height - 1, 0, j] = 1 65 | elif x0 < 0 and y0 < 0: 66 | output[0, 0, j] = 1 67 | elif x0 < 0: 68 | output[y0, 0, j] = 1 69 | elif y0 < 0: 70 | output[0, x0, j] = 1 71 | else: 72 | output[y0, x0, j] = 1 73 | 74 | img_id = ann_data['image_id'] 75 | img_data = coco.loadImgs(img_id)[0] 76 | ann_data = coco.loadAnns(coco.getAnnIds(img_data['id'])) 77 | 78 | for ann in ann_data: 79 | kpx = ann['keypoints'][0::3] 80 | kpy = ann['keypoints'][1::3] 81 | kpv = ann['keypoints'][2::3] 82 | 83 | for j in range(17): 84 | if kpv[j] > 0: 85 | if (kpx[j] > bbox[0] - bbox[2] * self.threshold and kpx[j] < bbox[0] + bbox[2] * (1 + self.threshold)): 86 | if (kpy[j] > bbox[1] - bbox[3] * self.threshold and kpy[j] < bbox[1] + bbox[3] * (1 + self.threshold)): 87 | x0 = int((kpx[j] - x) * x_scale) 88 | y0 = int((kpy[j] - y) * y_scale) 89 | 90 | if x0 >= self.bbox_width and y0 >= self.bbox_height: 91 | weights[self.bbox_height - 1, self.bbox_width - 1, j] = 1 92 | elif x0 >= self.bbox_width: 93 | weights[y0, self.bbox_width - 1, j] = 1 94 | elif y0 >= self.bbox_height: 95 | weights[self.bbox_height - 1, x0, j] = 1 96 | elif x0 < 0 and y0 < 0: 97 | weights[0, 0, j] = 1 98 | elif x0 < 0: 99 | weights[y0, 0, j] = 1 100 | elif y0 < 0: 101 | weights[0, x0, j] = 1 102 | else: 103 | weights[y0, x0, j] = 1 104 | 105 | for t in range(17): 106 | weights[:, :, t] = gaussian(weights[:, :, t]) 107 | output = gaussian(output, sigma=2, mode='constant', multichannel=True) 108 | our_order = [0, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3] 109 | weights = weights[:, :, our_order] 110 | output = output[:, :, our_order] 111 | return weights, output 112 | 113 | def get_anns(self, coco): 114 | #:param coco: COCO instance 115 | #:return: anns: List of annotations that contain person with at least 6 keypoints 116 | ann_ids = coco.getAnnIds() 117 | anns = [] 118 | for i in ann_ids: 119 | ann = coco.loadAnns(i)[0] 120 | if ann['iscrowd'] == 0 and ann['num_keypoints'] > self.num_of_keypoints: 121 | anns.append(ann) # ann 122 | sorted_list = sorted(anns, key=lambda k: k['num_keypoints'], reverse=True) 123 | return sorted_list 124 | -------------------------------------------------------------------------------- /network/fpn.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | '''RetinaFPN in PyTorch.''' 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | from torch.autograd import Variable 8 | 9 | class Bottleneck(nn.Module): 10 | expansion = 4 11 | 12 | def __init__(self, in_planes, planes, stride=1): 13 | super(Bottleneck, self).__init__() 14 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 15 | self.bn1 = nn.BatchNorm2d(planes) 16 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 17 | self.bn2 = nn.BatchNorm2d(planes) 18 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 19 | self.bn3 = nn.BatchNorm2d(self.expansion*planes) 20 | 21 | self.downsample = nn.Sequential() 22 | if stride != 1 or in_planes != self.expansion*planes: 23 | self.downsample = nn.Sequential( 24 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 25 | nn.BatchNorm2d(self.expansion*planes) 26 | ) 27 | 28 | def forward(self, x): 29 | out = F.relu(self.bn1(self.conv1(x))) 30 | out = F.relu(self.bn2(self.conv2(out))) 31 | out = self.bn3(self.conv3(out)) 32 | out += self.downsample(x) 33 | out = F.relu(out) 34 | return out 35 | 36 | 37 | class FPN(nn.Module): 38 | def __init__(self, block, num_blocks): 39 | super(FPN, self).__init__() 40 | self.in_planes = 64 41 | 42 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 43 | self.bn1 = nn.BatchNorm2d(64) 44 | 45 | # Bottom-up layers 46 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) 47 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) 48 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) 49 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) 50 | 51 | # fpn for detection subnet (RetinaNet) P6,P7 52 | self.conv6 = nn.Conv2d(2048, 256, kernel_size=3, stride=2, padding=1) # p6 53 | self.conv7 = nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1) # p7 54 | 55 | # pure fpn layers for detection subnet (RetinaNet) 56 | # Lateral layers 57 | self.latlayer1 = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0) # c5 -> p5 58 | self.latlayer2 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0) # c4 -> p4 59 | self.latlayer3 = nn.Conv2d(512, 256, kernel_size=1, stride=1, padding=0) # c3 -> p3 60 | # smooth 61 | self.toplayer0 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) # smooth p5 62 | self.toplayer1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) # smooth p4 63 | self.toplayer2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) # smooth p3 64 | 65 | # pure fpn layers for keypoint subnet 66 | # Lateral layers 67 | self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0) # c5 -> p5 68 | self.flatlayer1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0) # c4 -> p4 69 | self.flatlayer2 = nn.Conv2d(512, 256, kernel_size=1, stride=1, padding=0) # c3 -> p3 70 | self.flatlayer3 = nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0) # c2 -> p2 71 | # smooth 72 | self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) # smooth p4 73 | self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) # smooth p3 74 | self.smooth3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) # smooth p2 75 | 76 | def _make_layer(self, block, planes, num_blocks, stride): 77 | strides = [stride] + [1]*(num_blocks-1) 78 | layers = [] 79 | for stride in strides: 80 | layers.append(block(self.in_planes, planes, stride)) 81 | self.in_planes = planes * block.expansion 82 | return nn.Sequential(*layers) 83 | 84 | def _upsample_add(self, x, y): 85 | '''Upsample and add two feature maps. 86 | 87 | Args: 88 | x: top feature map to be upsampled. 89 | y: lateral feature map. 90 | 91 | Returns: 92 | added feature map. 93 | ''' 94 | _,_,H,W = y.size() 95 | return F.upsample(x, size=(H,W), mode='nearest', align_corners=None) + y # bilinear, False 96 | 97 | def forward(self, x): 98 | # Bottom-up 99 | c1 = F.relu(self.bn1(self.conv1(x))) 100 | c1 = F.max_pool2d(c1, kernel_size=3, stride=2, padding=1) 101 | c2 = self.layer1(c1) 102 | c3 = self.layer2(c2) 103 | c4 = self.layer3(c3) 104 | c5 = self.layer4(c4) 105 | 106 | # pure fpn for detection subnet, RetinaNet 107 | p6 = self.conv6(c5) 108 | p7 = self.conv7(F.relu(p6)) 109 | p5 = self.latlayer1(c5) 110 | p4 = self._upsample_add(p5, self.latlayer2(c4)) 111 | p3 = self._upsample_add(p4, self.latlayer3(c3)) 112 | p5 = self.toplayer0(p5) 113 | p4 = self.toplayer1(p4) 114 | p3 = self.toplayer2(p3) 115 | 116 | # pure fpn for keypoints estimation 117 | fp5 = self.toplayer(c5) 118 | fp4 = self._upsample_add(fp5,self.flatlayer1(c4)) 119 | fp3 = self._upsample_add(fp4,self.flatlayer2(c3)) 120 | fp2 = self._upsample_add(fp3,self.flatlayer3(c2)) 121 | # Smooth 122 | fp4 = self.smooth1(fp4) 123 | fp3 = self.smooth2(fp3) 124 | fp2 = self.smooth3(fp2) 125 | 126 | return [[fp2,fp3,fp4,fp5],[p3, p4, p5, p6, p7]] 127 | 128 | def FPN50(): 129 | # [3,4,6,3] -> resnet50 130 | return FPN(Bottleneck, [3,4,6,3]) 131 | 132 | def FPN101(): 133 | # [3,4,23,3] -> resnet101 134 | return FPN(Bottleneck, [3,4,23,3]) 135 | -------------------------------------------------------------------------------- /network/losses.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | def calc_iou(a, b): 6 | area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1]) 7 | 8 | iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0]) 9 | ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1]) 10 | 11 | iw = torch.clamp(iw, min=0) 12 | ih = torch.clamp(ih, min=0) 13 | 14 | ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area - iw * ih 15 | 16 | ua = torch.clamp(ua, min=1e-8) 17 | 18 | intersection = iw * ih 19 | 20 | IoU = intersection / ua 21 | 22 | return IoU 23 | 24 | class FocalLoss(nn.Module): 25 | #def __init__(self): 26 | 27 | def forward(self, classifications, regressions, anchors, annotations): 28 | alpha = 0.25 29 | gamma = 2.0 30 | batch_size = classifications.shape[0] 31 | classification_losses = [] 32 | regression_losses = [] 33 | 34 | anchor = anchors[0, :, :] 35 | 36 | anchor_widths = anchor[:, 2] - anchor[:, 0] 37 | anchor_heights = anchor[:, 3] - anchor[:, 1] 38 | anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths 39 | anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights 40 | 41 | for j in range(batch_size): 42 | 43 | classification = classifications[j, :, :] 44 | regression = regressions[j, :, :] 45 | 46 | bbox_annotation = annotations[j, :, :] 47 | bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] 48 | 49 | if bbox_annotation.shape[0] == 0: 50 | regression_losses.append(torch.tensor(0, requires_grad=True).float().cuda()) 51 | classification_losses.append(torch.tensor(0, requires_grad=True).float().cuda()) 52 | 53 | continue 54 | 55 | classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) 56 | 57 | IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations 58 | 59 | IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1 60 | 61 | #import pdb 62 | #pdb.set_trace() 63 | 64 | # compute the loss for classification 65 | targets = torch.ones(classification.shape) * -1 66 | targets = targets.cuda() 67 | 68 | targets[torch.lt(IoU_max, 0.4), :] = 0 69 | 70 | positive_indices = torch.ge(IoU_max, 0.5) 71 | 72 | num_positive_anchors = positive_indices.sum() 73 | 74 | assigned_annotations = bbox_annotation[IoU_argmax, :] 75 | 76 | targets[positive_indices, :] = 0 77 | targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1 78 | 79 | alpha_factor = torch.ones(targets.shape).cuda() * alpha 80 | 81 | alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) 82 | focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) 83 | focal_weight = alpha_factor * torch.pow(focal_weight, gamma) 84 | 85 | bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) 86 | 87 | # cls_loss = focal_weight * torch.pow(bce, gamma) 88 | cls_loss = focal_weight * bce 89 | 90 | cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda()) 91 | 92 | classification_losses.append(cls_loss.sum()/torch.clamp(num_positive_anchors.float(), min=1.0)) 93 | 94 | # compute the loss for regression 95 | 96 | if positive_indices.sum() > 0: 97 | assigned_annotations = assigned_annotations[positive_indices, :] 98 | 99 | anchor_widths_pi = anchor_widths[positive_indices] 100 | anchor_heights_pi = anchor_heights[positive_indices] 101 | anchor_ctr_x_pi = anchor_ctr_x[positive_indices] 102 | anchor_ctr_y_pi = anchor_ctr_y[positive_indices] 103 | 104 | gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0] 105 | gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1] 106 | gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths 107 | gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights 108 | 109 | # clip widths to 1 110 | gt_widths = torch.clamp(gt_widths, min=1) 111 | gt_heights = torch.clamp(gt_heights, min=1) 112 | 113 | targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi 114 | targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi 115 | targets_dw = torch.log(gt_widths / anchor_widths_pi) 116 | targets_dh = torch.log(gt_heights / anchor_heights_pi) 117 | 118 | targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh)) 119 | targets = targets.t() 120 | 121 | targets = targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda() 122 | 123 | 124 | negative_indices = 1 - positive_indices 125 | 126 | regression_diff = torch.abs(targets - regression[positive_indices, :]) 127 | 128 | regression_loss = torch.where( 129 | torch.le(regression_diff, 1.0 / 9.0), 130 | 0.5 * 9.0 * torch.pow(regression_diff, 2), 131 | regression_diff - 0.5 / 9.0 132 | ) 133 | regression_losses.append(regression_loss.mean()) 134 | else: 135 | regression_losses.append(torch.tensor(0).float().cuda()) 136 | 137 | return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True) 138 | 139 | 140 | -------------------------------------------------------------------------------- /lib/core/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import yaml 7 | 8 | import numpy as np 9 | from easydict import EasyDict as edict 10 | 11 | 12 | config = edict() 13 | 14 | config.OUTPUT_DIR = '' 15 | config.LOG_DIR = '' 16 | config.DATA_DIR = '' 17 | config.GPUS = '0' 18 | config.WORKERS = 4 19 | config.PRINT_FREQ = 20 20 | 21 | # Cudnn related params 22 | config.CUDNN = edict() 23 | config.CUDNN.BENCHMARK = True 24 | config.CUDNN.DETERMINISTIC = False 25 | config.CUDNN.ENABLED = True 26 | 27 | # pose_resnet related params 28 | POSE_RESNET = edict() 29 | POSE_RESNET.NUM_LAYERS = 50 30 | POSE_RESNET.DECONV_WITH_BIAS = False 31 | POSE_RESNET.NUM_DECONV_LAYERS = 3 32 | POSE_RESNET.NUM_DECONV_FILTERS = [256, 256, 256] 33 | POSE_RESNET.NUM_DECONV_KERNELS = [4, 4, 4] 34 | POSE_RESNET.FINAL_CONV_KERNEL = 1 35 | POSE_RESNET.TARGET_TYPE = 'gaussian' 36 | POSE_RESNET.HEATMAP_SIZE = [64, 64] # width * height, ex: 24 * 32 37 | POSE_RESNET.SIGMA = 2 38 | 39 | MODEL_EXTRAS = { 40 | 'pose_resnet': POSE_RESNET, 41 | } 42 | 43 | # common params for NETWORK 44 | config.MODEL = edict() 45 | config.MODEL.NAME = 'pose_resnet' 46 | config.MODEL.INIT_WEIGHTS = True 47 | config.MODEL.PRETRAINED = '' 48 | config.MODEL.NUM_JOINTS = 16 49 | config.MODEL.IMAGE_SIZE = [256, 256] # width * height, ex: 192 * 256 50 | config.MODEL.EXTRA = MODEL_EXTRAS[config.MODEL.NAME] 51 | 52 | config.LOSS = edict() 53 | config.LOSS.USE_TARGET_WEIGHT = True 54 | 55 | # DATASET related params 56 | config.DATASET = edict() 57 | config.DATASET.ROOT = '' 58 | config.DATASET.DATASET = 'mpii' 59 | config.DATASET.TRAIN_SET = 'train' 60 | config.DATASET.TEST_SET = 'valid' 61 | config.DATASET.DATA_FORMAT = 'jpg' 62 | config.DATASET.HYBRID_JOINTS_TYPE = '' 63 | config.DATASET.SELECT_DATA = False 64 | 65 | # training data augmentation 66 | config.DATASET.FLIP = True 67 | config.DATASET.SCALE_FACTOR = 0.25 68 | config.DATASET.ROT_FACTOR = 30 69 | 70 | # train 71 | config.TRAIN = edict() 72 | 73 | config.TRAIN.LR_FACTOR = 0.1 74 | config.TRAIN.LR_STEP = [90, 110] 75 | config.TRAIN.LR = 0.001 76 | 77 | config.TRAIN.OPTIMIZER = 'adam' 78 | config.TRAIN.MOMENTUM = 0.9 79 | config.TRAIN.WD = 0.0001 80 | config.TRAIN.NESTEROV = False 81 | config.TRAIN.GAMMA1 = 0.99 82 | config.TRAIN.GAMMA2 = 0.0 83 | 84 | config.TRAIN.BEGIN_EPOCH = 0 85 | config.TRAIN.END_EPOCH = 140 86 | 87 | config.TRAIN.RESUME = False 88 | config.TRAIN.CHECKPOINT = '' 89 | 90 | config.TRAIN.BATCH_SIZE = 32 91 | config.TRAIN.SHUFFLE = True 92 | 93 | # testing 94 | config.TEST = edict() 95 | 96 | # size of images for each device 97 | config.TEST.BATCH_SIZE = 32 98 | # Test Model Epoch 99 | config.TEST.FLIP_TEST = False 100 | config.TEST.POST_PROCESS = True 101 | config.TEST.SHIFT_HEATMAP = True 102 | 103 | config.TEST.USE_GT_BBOX = False 104 | # nms 105 | config.TEST.OKS_THRE = 0.5 106 | config.TEST.IN_VIS_THRE = 0.0 107 | config.TEST.COCO_BBOX_FILE = '' 108 | config.TEST.BBOX_THRE = 1.0 109 | config.TEST.MODEL_FILE = '' 110 | config.TEST.IMAGE_THRE = 0.0 111 | config.TEST.NMS_THRE = 1.0 112 | 113 | # debug 114 | config.DEBUG = edict() 115 | config.DEBUG.DEBUG = False 116 | config.DEBUG.SAVE_BATCH_IMAGES_GT = False 117 | config.DEBUG.SAVE_BATCH_IMAGES_PRED = False 118 | config.DEBUG.SAVE_HEATMAPS_GT = False 119 | config.DEBUG.SAVE_HEATMAPS_PRED = False 120 | 121 | 122 | def _update_dict(k, v): 123 | if k == 'DATASET': 124 | if 'MEAN' in v and v['MEAN']: 125 | v['MEAN'] = np.array([eval(x) if isinstance(x, str) else x 126 | for x in v['MEAN']]) 127 | if 'STD' in v and v['STD']: 128 | v['STD'] = np.array([eval(x) if isinstance(x, str) else x 129 | for x in v['STD']]) 130 | if k == 'MODEL': 131 | if 'EXTRA' in v and 'HEATMAP_SIZE' in v['EXTRA']: 132 | if isinstance(v['EXTRA']['HEATMAP_SIZE'], int): 133 | v['EXTRA']['HEATMAP_SIZE'] = np.array( 134 | [v['EXTRA']['HEATMAP_SIZE'], v['EXTRA']['HEATMAP_SIZE']]) 135 | else: 136 | v['EXTRA']['HEATMAP_SIZE'] = np.array( 137 | v['EXTRA']['HEATMAP_SIZE']) 138 | if 'IMAGE_SIZE' in v: 139 | if isinstance(v['IMAGE_SIZE'], int): 140 | v['IMAGE_SIZE'] = np.array([v['IMAGE_SIZE'], v['IMAGE_SIZE']]) 141 | else: 142 | v['IMAGE_SIZE'] = np.array(v['IMAGE_SIZE']) 143 | for vk, vv in v.items(): 144 | if vk in config[k]: 145 | config[k][vk] = vv 146 | else: 147 | raise ValueError("{}.{} not exist in config.py".format(k, vk)) 148 | 149 | 150 | def update_config(config_file): 151 | exp_config = None 152 | with open(config_file) as f: 153 | exp_config = edict(yaml.load(f)) 154 | for k, v in exp_config.items(): 155 | if k in config: 156 | if isinstance(v, dict): 157 | _update_dict(k, v) 158 | else: 159 | if k == 'SCALES': 160 | config[k][0] = (tuple(v)) 161 | else: 162 | config[k] = v 163 | else: 164 | raise ValueError("{} not exist in config.py".format(k)) 165 | 166 | 167 | def gen_config(config_file): 168 | cfg = dict(config) 169 | for k, v in cfg.items(): 170 | if isinstance(v, edict): 171 | cfg[k] = dict(v) 172 | 173 | with open(config_file, 'w') as f: 174 | yaml.dump(dict(cfg), f, default_flow_style=False) 175 | 176 | 177 | def update_dir(model_dir, log_dir, data_dir): 178 | if model_dir: 179 | config.OUTPUT_DIR = model_dir 180 | 181 | if log_dir: 182 | config.LOG_DIR = log_dir 183 | 184 | if data_dir: 185 | config.DATA_DIR = data_dir 186 | 187 | config.DATASET.ROOT = os.path.join( 188 | config.DATA_DIR, config.DATASET.ROOT) 189 | 190 | config.TEST.COCO_BBOX_FILE = os.path.join( 191 | config.DATA_DIR, config.TEST.COCO_BBOX_FILE) 192 | 193 | config.MODEL.PRETRAINED = os.path.join( 194 | config.DATA_DIR, config.MODEL.PRETRAINED) 195 | 196 | 197 | def get_model_name(cfg): 198 | name = cfg.MODEL.NAME 199 | full_name = cfg.MODEL.NAME 200 | extra = cfg.MODEL.EXTRA 201 | if name in ['pose_resnet']: 202 | name = '{model}_{num_layers}'.format( 203 | model=name, 204 | num_layers=extra.NUM_LAYERS) 205 | deconv_suffix = ''.join( 206 | 'd{}'.format(num_filters) 207 | for num_filters in extra.NUM_DECONV_FILTERS) 208 | full_name = '{height}x{width}_{name}_{deconv_suffix}'.format( 209 | height=cfg.MODEL.IMAGE_SIZE[1], 210 | width=cfg.MODEL.IMAGE_SIZE[0], 211 | name=name, 212 | deconv_suffix=deconv_suffix) 213 | else: 214 | raise ValueError('Unkown model: {}'.format(cfg.MODEL)) 215 | 216 | return name, full_name 217 | 218 | 219 | if __name__ == '__main__': 220 | import sys 221 | name, full_name = gen_config(sys.argv[1]) 222 | print(name) 223 | print(full_name) 224 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Introduction 2 | 3 | This is a pytorch implementation of [MultiPoseNet](https://arxiv.org/abs/1807.04067) ( ECCV 2018, Muhammed Kocabas et al.) 4 | 5 | ![baseline checkpoint result](./demo/output/pic1_canvas.png) 6 | 7 | [![License](https://img.shields.io/github/license/mashape/apistatus.svg)](https://opensource.org/licenses/MIT) 8 | 9 | ### Contents 10 | 11 | 0. [**Update**](#update) 12 | 13 | 1. [Requirements](#requirements) 14 | 2. [Training](#training) 15 | 3. [Validation](#validation) 16 | 4. [Demo](#demo) 17 | 5. [Result](#result) 18 | 6. [Acknowledgements](#acknowledgements) 19 | 7. [Citation](#citation) 20 | 21 | ### Demo 22 | 23 | Run inference on your own pictures. 24 | 25 | - Prepare checkpoint: 26 | - Download our baseline model ([Google Drive](https://drive.google.com/open?id=1Y38q5mIY2XL7mmdaBrF06beYcZZO6v2Z), [Tsinghua Cloud](https://cloud.tsinghua.edu.cn/f/8b7f780fe1df46febe73/), backbone: resnet101) or use your own model. 27 | - Specify the checkpoints file path `params.ckpt` in file `multipose_test.py`. 28 | - Specify the pictures file path `testdata_dir` and results file path `testresult_dir` in file `multipose_test.py`. 29 | 30 | - Run: 31 | ```python 32 | python ./evaluate/multipose_test.py # inference on your own pictures 33 | python ./evaluate/multipose_coco_eval.py # COCO evaluation 34 | ``` 35 | 36 | ### Result 37 | 38 | - mAP (baseline checkpoint, temporarily) 39 | 40 | ``` 41 | Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.590 42 | Average Precision (AP) @[ IoU=0.50 | area= all | maxDets= 20 ] = 0.791 43 | Average Precision (AP) @[ IoU=0.75 | area= all | maxDets= 20 ] = 0.644 44 | Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.565 45 | Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.636 46 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.644 47 | Average Recall (AR) @[ IoU=0.50 | area= all | maxDets= 20 ] = 0.810 48 | Average Recall (AR) @[ IoU=0.75 | area= all | maxDets= 20 ] = 0.689 49 | Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.601 50 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.709 51 | ``` 52 | 53 | ### Requirements 54 | 55 | #### Prerequisites 56 | - **Disable cudnn for batch_norm**: (See: [@Microsoft / human-pose-estimation.pytorch#installation](https://github.com/Microsoft/human-pose-estimation.pytorch#installation)) 57 | 58 | ```bash 59 | # PYTORCH=/path/to/pytorch 60 | # for pytorch v0.4.0 61 | sed -i "1194s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py 62 | # for pytorch v0.4.1 63 | sed -i "1254s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py 64 | 65 | # Note that instructions like # PYTORCH=/path/to/pytorch indicate that you should pick 66 | # a path where you'd like to have pytorch installed and then set an environment 67 | # variable (PYTORCH in this case) accordingly. 68 | ``` 69 | 70 | - If you are using Anaconda, we suggest you create a new conda environment :`conda env create -f multipose_environment.yaml`. Maybe you should change the `channels:` and `prefix:` setting in `multipose_environment.yaml` to fit your own Anaconda environment. 71 | - `source activate Multipose` 72 | - `pip install pycocotools` 73 | 74 | - You can also follow `dependencies` setting in `multipose_environment.yaml` to build your own Python environment. 75 | - Pytorch = 0.4.0, Python = 3.6 76 | - pycocotools=2.0.0, numpy=1.14.3, scikit-image=0.13.1, opencv=3.4.2 77 | - ...... 78 | 79 | - Build the NMS extension 80 | ```bash 81 | cd ./lib 82 | bash build.sh 83 | cd .. 84 | ``` 85 | 86 | #### Data preparation 87 | 88 | **You can skip this step if you just want to run inference on your own pictures using our baseline checkpoint** 89 | 90 | - For Training Keypoint Estimation Subnet, we followed [ZheC/Realtime_Multi-Person_Pose_Estimation](https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation)'s first 4 Training Steps prepared our COCO2014 dataset (train2014, val2014 and mask2014). 91 | - We also use COCO2017 dataset to train Person Detection Subnet. 92 | 93 | Make them look like this: 94 | 95 | ```bash 96 | ${COCO_ROOT} 97 | --annotations 98 | --instances_train2017.json 99 | --instances_val2017.json 100 | --person_keypoints_train2017.json 101 | --person_keypoints_val2017.json 102 | --images 103 | --train2014 104 | --val2014 105 | --train2017 106 | --val2017 107 | --mask2014 108 | --COCO.json 109 | ``` 110 | 111 | ### Training 112 | 113 | - Prepare 114 | - Change the hyper-parameter `coco_root` to your own COCO path. 115 | - You can change the parameter `params.gpus` to define which GPU device you want to use, such as `params.gpus = [0,1,2,3]`. 116 | - The trained model will be saved in `params.save_dir` folder every epoch. 117 | - Run: 118 | ```python 119 | python ./training/multipose_keypoint_train.py # train keypoint subnet 120 | python ./training/multipose_detection_train.py # train detection subnet 121 | python ./training/multipose_prn_train.py # train PRN subnet 122 | ``` 123 | 124 | ### Validation 125 | 126 | - Prepare checkpoint: 127 | - Download our baseline model ([Google Drive](https://drive.google.com/open?id=1Y38q5mIY2XL7mmdaBrF06beYcZZO6v2Z), [Tsinghua Cloud](https://cloud.tsinghua.edu.cn/f/8b7f780fe1df46febe73/), backbone: resnet101) or use your own model. 128 | - Specify the checkpoints file path `params.ckpt` in file `multipose_*_val.py`. 129 | 130 | - Run: 131 | ```python 132 | python ./evaluate/multipose_keypoint_val.py # validate keypoint subnet on first 2644 of val2014 marked by 'isValidation = 1', as our minval dataset. 133 | python ./evaluate/multipose_detection_val.py # validate detection subnet on val2017 134 | python ./evaluate/multipose_prn_val.py # validate PRN subnet on val2017 135 | ``` 136 | 137 | ### To Do 138 | 139 | - [x] Keypoint Estimation Subnet for 17 human keypoints annotated in [COCO dataset](http://cocodataset.org/) 140 | - [x] Keypoint Estimation Subnet with intermediate supervision 141 | - [x] Combine Keypoint Estimation Subnet with Person Detection Subnet(RetinaNet) 142 | - [x] Combine Keypoint Estimation Subnet with [Pose Residual Network](https://github.com/salihkaragoz/pose-residual-network-pytorch/tree/master) 143 | - [ ] Keypoint Estimation Subnet with person segmentation mask 144 | 145 | ### Update 146 | 147 | - 180925: 148 | - Add Person Detection Subnet (RetinaNet) in `posenet.py`. 149 | - Add NMS extension in `./lib`. 150 | 151 | - 180930: 152 | - Add the training code `multipose_detection_train.py` for RetinaNet. 153 | - Add `multipose_keypoint_*.py` and `multipose_detection_*.py` for Keypoint Estimation Subnet and Person Detection Subnet respectively. Remove `multipose_resnet_*.py`. 154 | 155 | - 1801003: 156 | - Add the training code `multipose_prn_train.py` for PRN. 157 | - Add `multipose_coco_eval.py` for COCO evaluation. 158 | 159 | - 181115: 160 | - New dataloader for detection subnet, remove `RetinaNet_data_pipeline.py` 161 | - Add intermediate supervision in Keypoint Estimation Subnet 162 | - Enable batch_norm for Keypoint Estimation Subnet. 163 | - New prerequisites: [Disable cudnn for batch_norm](https://github.com/LiMeng95/MultiPoseNet.pytorch#prerequisites) 164 | - New checkpoint ([Google Drive](https://drive.google.com/open?id=1Y38q5mIY2XL7mmdaBrF06beYcZZO6v2Z), [Tsinghua Cloud](https://cloud.tsinghua.edu.cn/f/8b7f780fe1df46febe73/), backbone: resnet101) 165 | 166 | ### Acknowledgements 167 | 168 | - [@ZheC Realtime_Multi-Person_Pose_Estimation](https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation) : The first 4 Training Steps to generate our own COCO dataset. 169 | - Thanks [@IcewineChen](https://github.com/IcewineChen/pytorch-MultiPoseNet) for the implement of `posenet`. 170 | - Thanks [@yhenon](https://github.com/yhenon/pytorch-retinanet) for the implement of RetinaNet in PyTorch. 171 | - [@Microsoft / human-pose-estimation.pytorch#installation](https://github.com/Microsoft/human-pose-estimation.pytorch#installation) :Disable cudnn for batch_norm 172 | -------------------------------------------------------------------------------- /network/joint_utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import math 3 | import numpy as np 4 | from scipy.ndimage.filters import gaussian_filter, maximum_filter 5 | from scipy.ndimage.morphology import generate_binary_structure 6 | 7 | # Color code used to plot different joints and limbs (eg: joint_type=3 and 8 | # limb_type=3 will use colors[3]) 9 | colors = [ 10 | [255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], 11 | [85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255], 12 | [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255], 13 | [255, 0, 255], [255, 0, 170], [255, 0, 85], [255, 0, 0]] 14 | limbSeq = [[0,1], [1,2], [2,3], [0,4], [4,5], [5,6], [0,7], [7,8], [8,9], [0,10], [10,11], [11,12], \ 15 | [0,13], [13,15], [0,14],[14,16]] 16 | NUM_JOINTS = 18 17 | 18 | 19 | def find_peaks(param, img): 20 | """ 21 | Given a (grayscale) image, find local maxima whose value is above a given 22 | threshold (param['thre1']) 23 | :param img: Input image (2d array) where we want to find peaks 24 | :return: 2d np.array containing the [x,y] coordinates of each peak found 25 | in the image 26 | """ 27 | 28 | peaks_binary = (maximum_filter(img, footprint=generate_binary_structure( 29 | 2, 1)) == img) * (img > param['thre1']) 30 | # Note reverse ([::-1]): we return [[x y], [x y]...] instead of [[y x], [y 31 | # x]...] 32 | return np.array(np.nonzero(peaks_binary)[::-1]).T 33 | 34 | 35 | def compute_resized_coords(coords, resizeFactor): 36 | """ 37 | Given the index/coordinates of a cell in some input array (e.g. image), 38 | provides the new coordinates if that array was resized by making it 39 | resizeFactor times bigger. 40 | E.g.: image of size 3x3 is resized to 6x6 (resizeFactor=2), we'd like to 41 | know the new coordinates of cell [1,2] -> Function would return [2.5,4.5] 42 | :param coords: Coordinates (indices) of a cell in some input array 43 | :param resizeFactor: Resize coefficient = shape_dest/shape_source. E.g.: 44 | resizeFactor=2 means the destination array is twice as big as the 45 | original one 46 | :return: Coordinates in an array of size 47 | shape_dest=resizeFactor*shape_source, expressing the array indices of the 48 | closest point to 'coords' if an image of size shape_source was resized to 49 | shape_dest 50 | """ 51 | 52 | # 1) Add 0.5 to coords to get coordinates of center of the pixel (e.g. 53 | # index [0,0] represents the pixel at location [0.5,0.5]) 54 | # 2) Transform those coordinates to shape_dest, by multiplying by resizeFactor 55 | # 3) That number represents the location of the pixel center in the new array, 56 | # so subtract 0.5 to get coordinates of the array index/indices (revert 57 | # step 1) 58 | return (np.array(coords, dtype=float) + 0.5) * resizeFactor - 0.5 59 | 60 | 61 | def NMS(param, heatmaps, upsampFactor=1., bool_refine_center=True, bool_gaussian_filt=False): 62 | """ 63 | NonMaximaSuppression: find peaks (local maxima) in a set of grayscale images 64 | :param heatmaps: set of grayscale images on which to find local maxima (3d np.array, 65 | with dimensions image_height x image_width x num_heatmaps) 66 | :param upsampFactor: Size ratio between CPM heatmap output and the input image size. 67 | Eg: upsampFactor=16 if original image was 480x640 and heatmaps are 30x40xN 68 | :param bool_refine_center: Flag indicating whether: 69 | - False: Simply return the low-res peak found upscaled by upsampFactor (subject to grid-snap) 70 | - True: (Recommended, very accurate) Upsample a small patch around each low-res peak and 71 | fine-tune the location of the peak at the resolution of the original input image 72 | :param bool_gaussian_filt: Flag indicating whether to apply a 1d-GaussianFilter (smoothing) 73 | to each upsampled patch before fine-tuning the location of each peak. 74 | :return: a NUM_JOINTS x 4 np.array where each row represents a joint type (0=nose, 1=neck...) 75 | and the columns indicate the {x,y} position, the score (probability) and a unique id (counter) 76 | """ 77 | # MODIFIED BY CARLOS: Instead of upsampling the heatmaps to heatmap_avg and 78 | # then performing NMS to find peaks, this step can be sped up by ~25-50x by: 79 | # (9-10ms [with GaussFilt] or 5-6ms [without GaussFilt] vs 250-280ms on RoG 80 | # 1. Perform NMS at (low-res) CPM's output resolution 81 | # 1.1. Find peaks using scipy.ndimage.filters.maximum_filter 82 | # 2. Once a peak is found, take a patch of 5x5 centered around the peak, upsample it, and 83 | # fine-tune the position of the actual maximum. 84 | # '-> That's equivalent to having found the peak on heatmap_avg, but much faster because we only 85 | # upsample and scan the 5x5 patch instead of the full (e.g.) 480x640 86 | 87 | joint_list_per_joint_type = [] 88 | cnt_total_joints = 0 89 | 90 | # For every peak found, win_size specifies how many pixels in each 91 | # direction from the peak we take to obtain the patch that will be 92 | # upsampled. Eg: win_size=1 -> patch is 3x3; win_size=2 -> 5x5 93 | # (for BICUBIC interpolation to be accurate, win_size needs to be >=2!) 94 | win_size = 2 95 | 96 | for joint in range(NUM_JOINTS): 97 | map_orig = heatmaps[:, :, joint] 98 | peak_coords = find_peaks(param, map_orig) 99 | peaks = np.zeros((len(peak_coords), 4)) 100 | for i, peak in enumerate(peak_coords): 101 | if bool_refine_center: 102 | x_min, y_min = np.maximum(0, peak - win_size) 103 | x_max, y_max = np.minimum( 104 | np.array(map_orig.T.shape) - 1, peak + win_size) 105 | 106 | # Take a small patch around each peak and only upsample that 107 | # tiny region 108 | patch = map_orig[y_min:y_max + 1, x_min:x_max + 1] 109 | map_upsamp = cv2.resize( 110 | patch, None, fx=upsampFactor, fy=upsampFactor, interpolation=cv2.INTER_CUBIC) 111 | 112 | # Gaussian filtering takes an average of 0.8ms/peak (and there might be 113 | # more than one peak per joint!) -> For now, skip it (it's 114 | # accurate enough) 115 | map_upsamp = gaussian_filter( 116 | map_upsamp, sigma=3) if bool_gaussian_filt else map_upsamp 117 | 118 | # Obtain the coordinates of the maximum value in the patch 119 | location_of_max = np.unravel_index( 120 | map_upsamp.argmax(), map_upsamp.shape) 121 | # Remember that peaks indicates [x,y] -> need to reverse it for 122 | # [y,x] 123 | location_of_patch_center = compute_resized_coords( 124 | peak[::-1] - [y_min, x_min], upsampFactor) 125 | # Calculate the offset wrt to the patch center where the actual 126 | # maximum is 127 | refined_center = (location_of_max - location_of_patch_center) 128 | peak_score = map_upsamp[location_of_max] 129 | else: 130 | refined_center = [0, 0] 131 | # Flip peak coordinates since they are [x,y] instead of [y,x] 132 | peak_score = map_orig[tuple(peak[::-1])] 133 | peaks[i, :] = tuple([int(round(x)) for x in compute_resized_coords( 134 | peak_coords[i], upsampFactor) + refined_center[::-1]]) + (peak_score, cnt_total_joints) 135 | cnt_total_joints += 1 136 | joint_list_per_joint_type.append(peaks) 137 | 138 | return joint_list_per_joint_type 139 | 140 | 141 | def get_joint_list(img_orig, param, heatmaps, scale): 142 | 143 | joint_list_per_joint_type = NMS(param, 144 | heatmaps, img_orig.shape[0] / float(heatmaps.shape[0])) 145 | 146 | for peaks in joint_list_per_joint_type: 147 | peaks[:, :2] = peaks[:, :2]*scale 148 | 149 | joint_list = np.array([tuple(peak) + (joint_type,) for joint_type, joint_peaks 150 | in enumerate(joint_list_per_joint_type) for peak in joint_peaks]) 151 | 152 | return joint_list 153 | 154 | 155 | def draw(canvas, joints, bbox): 156 | 157 | x1 = int(bbox[0]) 158 | y1 = int(bbox[1]) 159 | x2 = int(bbox[0]+bbox[2]) 160 | y2 = int(bbox[1]+bbox[3]) 161 | cv2.rectangle(canvas, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) 162 | 163 | for i in range(17): 164 | if int(joints[i][2]) == 0: 165 | continue 166 | x = int(joints[i][0]) 167 | y = int(joints[i][1]) 168 | cv2.circle(canvas, (x, y), 4, colors[i], thickness=-1) 169 | 170 | # cur_canvas = canvas.copy() 171 | stickwidth = 2 172 | for i in range(16): 173 | if joints[limbSeq[i][0]][2] == 0 or joints[limbSeq[i][1]][2] == 0: 174 | continue 175 | X = (int(joints[limbSeq[i][0]][0]), int(joints[limbSeq[i][1]][0])) 176 | Y = (int(joints[limbSeq[i][0]][1]), int(joints[limbSeq[i][1]][1])) 177 | mX = np.mean(X) 178 | mY = np.mean(Y) 179 | length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 180 | angle = math.degrees(math.atan2(Y[0] - Y[1], X[0] - X[1])) 181 | polygon = cv2.ellipse2Poly((int(mX), int(mY)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) 182 | cv2.fillConvexPoly(canvas, polygon, colors[i]) 183 | 184 | return canvas 185 | 186 | def plot_result(img_orig, result): 187 | 188 | for idx, person_data in enumerate(result): 189 | 190 | bbox = person_data['bbox'] 191 | keypoints = person_data['keypoints'] 192 | 193 | x = keypoints[0::3] 194 | y = keypoints[1::3] 195 | v = keypoints[2::3] 196 | 197 | joints = [] 198 | for i in range(len(x)): 199 | joints.append([x[i], y[i], v[i]]) 200 | 201 | img_orig = draw(img_orig, joints, bbox) 202 | return img_orig -------------------------------------------------------------------------------- /datasets/coco_data/ImageAugmentation.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | import random 4 | import sys 5 | 6 | import cv2 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | from scipy import misc, ndimage 10 | 11 | 12 | """The purpose of Augmentor is to automate image augmentation 13 | in order to expand datasets as input for our algorithms. 14 | :aut_scale : Scales them by dice2 (<1, so it is zoom out). 15 | :aug_croppad centerB: int with shape (2,), centerB will point to centerA. 16 | :aug_flip: Mirrors the image around a vertical line running through its center. 17 | :aug_rotate: Rotates the image. The angle of rotation, in degrees, 18 | is specified by a random integer value that is included 19 | in the transform argument. 20 | 21 | :param params_transform: store the value of stride and crop_szie_y, crop_size_x 22 | """ 23 | 24 | 25 | def aug_scale(meta, img, mask_miss, params_transform): 26 | dice = random.random() # (0,1) 27 | if (dice > params_transform['scale_prob']): 28 | 29 | scale_multiplier = 1 30 | else: 31 | dice2 = random.random() 32 | # linear shear into [scale_min, scale_max] 33 | scale_multiplier = ( 34 | params_transform['scale_max'] - params_transform['scale_min']) * dice2 + \ 35 | params_transform['scale_min'] 36 | scale_abs = params_transform['target_dist'] / meta['scale_provided'] 37 | scale = scale_abs * scale_multiplier 38 | img = cv2.resize(img, (0, 0), fx=scale, fy=scale, 39 | interpolation=cv2.INTER_CUBIC) 40 | 41 | mask_miss = cv2.resize(mask_miss, (0, 0), fx=scale, 42 | fy=scale, interpolation=cv2.INTER_CUBIC) 43 | #mask_all = cv2.resize(mask_all, (0, 0), fx=scale, 44 | # fy=scale, interpolation=cv2.INTER_CUBIC) 45 | 46 | # modify meta data 47 | meta['objpos'] *= scale 48 | meta['joint_self'][:, :2] *= scale 49 | if (meta['numOtherPeople'] != 0): 50 | meta['objpos_other'] *= scale 51 | meta['joint_others'][:, :, :2] *= scale 52 | return meta, img, mask_miss#, mask_all 53 | 54 | 55 | def aug_croppad(meta, img, mask_miss, params_transform): 56 | dice_x = random.random() 57 | dice_y = random.random() 58 | crop_x = int(params_transform['crop_size_x']) 59 | crop_y = int(params_transform['crop_size_y']) 60 | x_offset = int((dice_x - 0.5) * 2 * 61 | params_transform['center_perterb_max']) 62 | y_offset = int((dice_y - 0.5) * 2 * 63 | params_transform['center_perterb_max']) 64 | 65 | center = meta['objpos'] + np.array([x_offset, y_offset]) 66 | center = center.astype(int) 67 | 68 | # pad up and down 69 | pad_v = np.ones((crop_y, img.shape[1], 3), dtype=np.uint8) * 128 70 | pad_v_mask_miss = np.ones( 71 | (crop_y, mask_miss.shape[1]), dtype=np.uint8) * 255 72 | 73 | img = np.concatenate((pad_v, img, pad_v), axis=0) 74 | mask_miss = np.concatenate( 75 | (pad_v_mask_miss, mask_miss, pad_v_mask_miss), axis=0) 76 | #mask_all = np.concatenate( 77 | # (pad_v_mask_miss, mask_all, pad_v_mask_miss), axis=0) 78 | 79 | # pad right and left 80 | pad_h = np.ones((img.shape[0], crop_x, 3), dtype=np.uint8) * 128 81 | pad_h_mask_miss = np.ones( 82 | (mask_miss.shape[0], crop_x), dtype=np.uint8) * 255 83 | 84 | img = np.concatenate((pad_h, img, pad_h), axis=1) 85 | mask_miss = np.concatenate( 86 | (pad_h_mask_miss, mask_miss, pad_h_mask_miss), axis=1) 87 | #mask_all = np.concatenate( 88 | # (pad_h_mask_miss, mask_all, pad_h_mask_miss), axis=1) 89 | 90 | img = img[center[1] + int(crop_y / 2):center[1] + int(crop_y / 2) + crop_y, 91 | center[0] + int(crop_x / 2):center[0] + int(crop_x / 2) + crop_x, :] 92 | 93 | mask_miss = mask_miss[center[1] + int(crop_y / 2):center[1] + int(crop_y / 2) + crop_y + 1, center[0] +int(crop_x / 2):center[0] + int(crop_x / 2) + crop_x + 1] 94 | #mask_all = mask_all[center[1] + int(crop_y / 2):center[1] + int(crop_y / 2) + crop_y + 1, center[0] + int(crop_x / 2):center[0] + int(crop_x / 2) + crop_x + 1] 95 | 96 | offset_left = crop_x / 2 - center[0] 97 | offset_up = crop_y / 2 - center[1] 98 | 99 | offset = np.array([offset_left, offset_up]) 100 | meta['objpos'] += offset 101 | meta['joint_self'][:, :2] += offset 102 | mask = np.logical_or.reduce((meta['joint_self'][:, 0] >= crop_x, 103 | meta['joint_self'][:, 0] < 0, 104 | meta['joint_self'][:, 1] >= crop_y, 105 | meta['joint_self'][:, 1] < 0)) 106 | 107 | meta['joint_self'][mask == True, 2] = 2 108 | if (meta['numOtherPeople'] != 0): 109 | meta['objpos_other'] += offset 110 | meta['joint_others'][:, :, :2] += offset 111 | mask = np.logical_or.reduce((meta['joint_others'][:, :, 0] >= crop_x, 112 | meta['joint_others'][:, :, 0] < 0, 113 | meta['joint_others'][:, :, 1] >= crop_y, 114 | meta['joint_others'][:, :, 1] < 0)) 115 | 116 | meta['joint_others'][mask == True, 2] = 2 117 | 118 | return meta, img, mask_miss#, mask_all 119 | 120 | 121 | def aug_flip(meta, img, mask_miss, params_transform): 122 | mode = params_transform['mode'] 123 | num_other_people = meta['numOtherPeople'] 124 | dice = random.random() 125 | doflip = dice <= params_transform['flip_prob'] 126 | 127 | if doflip: 128 | img = img.copy() 129 | cv2.flip(src=img, flipCode=1, dst=img) 130 | w = img.shape[1] 131 | 132 | mask_miss = mask_miss.copy() 133 | #mask_all = mask_all.copy() 134 | cv2.flip(src=mask_miss, flipCode=1, dst=mask_miss) 135 | #cv2.flip(src=mask_all, flipCode=1, dst=mask_all) 136 | 137 | ''' 138 | The order in this work: 139 | (0-'nose' 1-'neck' 2-'right_shoulder' 3-'right_elbow' 4-'right_wrist' 140 | 5-'left_shoulder' 6-'left_elbow' 7-'left_wrist' 8-'right_hip' 141 | 9-'right_knee' 10-'right_ankle' 11-'left_hip' 12-'left_knee' 142 | 13-'left_ankle' 14-'right_eye' 15-'left_eye' 16-'right_ear' 143 | 17-'left_ear' ) 144 | ''' 145 | meta['objpos'][0] = w - 1 - meta['objpos'][0] 146 | meta['joint_self'][:, 0] = w - 1 - meta['joint_self'][:, 0] 147 | # print meta['joint_self'] 148 | meta['joint_self'] = meta['joint_self'][[0, 1, 5, 6, 149 | 7, 2, 3, 4, 11, 12, 13, 8, 9, 10, 15, 14, 17, 16]] 150 | if (num_other_people != 0): 151 | meta['objpos_other'][:, 0] = w - 1 - meta['objpos_other'][:, 0] 152 | meta['joint_others'][:, :, 0] = w - \ 153 | 1 - meta['joint_others'][:, :, 0] 154 | for i in range(num_other_people): 155 | meta['joint_others'][i] = meta['joint_others'][i][[ 156 | 0, 1, 5, 6, 7, 2, 3, 4, 11, 12, 13, 8, 9, 10, 15, 14, 17, 16]] 157 | 158 | return meta, img, mask_miss#, mask_all 159 | 160 | 161 | def rotatepoint(p, R): 162 | point = np.zeros((3, 1)) 163 | point[0] = p[0] 164 | point[1] = p[1] 165 | point[2] = 1 166 | 167 | new_point = R.dot(point) 168 | 169 | p[0] = new_point[0] 170 | 171 | p[1] = new_point[1] 172 | return p 173 | 174 | 175 | # The correct way to rotation an image 176 | # http://www.pyimagesearch.com/2017/01/02/rotate-images-correctly-with-opencv-and-python/ 177 | 178 | 179 | def rotate_bound(image, angle, bordervalue): 180 | # grab the dimensions of the image and then determine the 181 | # center 182 | (h, w) = image.shape[:2] 183 | (cX, cY) = (w // 2, h // 2) 184 | 185 | # grab the rotation matrix (applying the negative of the 186 | # angle to rotate clockwise), then grab the sine and cosine 187 | # (i.e., the rotation components of the matrix) 188 | M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0) 189 | cos = np.abs(M[0, 0]) 190 | sin = np.abs(M[0, 1]) 191 | 192 | # compute the new bounding dimensions of the image 193 | nW = int((h * sin) + (w * cos)) 194 | nH = int((h * cos) + (w * sin)) 195 | 196 | # adjust the rotation matrix to take into account translation 197 | M[0, 2] += (nW / 2) - cX 198 | M[1, 2] += (nH / 2) - cY 199 | 200 | # perform the actual rotation and return the image 201 | return cv2.warpAffine(image, M, (nW, nH), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, 202 | borderValue=bordervalue), M 203 | 204 | 205 | def aug_rotate(meta, img, mask_miss, params_transform, type="random", input=0, fillType="nearest", constant=0): 206 | dice = random.random() 207 | degree = (dice - 0.5) * 2 * \ 208 | params_transform['max_rotate_degree'] # degree [-40,40] 209 | 210 | img_rot, R = rotate_bound(img, np.copy(degree), (128, 128, 128)) 211 | 212 | # Not sure it will cause mask_miss to rotate rightly, just avoid it fails 213 | # by np.copy(). 214 | mask_miss_rot, _ = rotate_bound(mask_miss, np.copy(degree), (255)) 215 | #mask_all_rot, _ = rotate_bound(mask_all, np.copy(degree), (255)) 216 | 217 | # modify meta data 218 | meta['objpos'] = rotatepoint(meta['objpos'], R) 219 | 220 | for i in range(18): 221 | meta['joint_self'][i, :] = rotatepoint(meta['joint_self'][i, :], R) 222 | 223 | for j in range(meta['numOtherPeople']): 224 | 225 | meta['objpos_other'][j, :] = rotatepoint(meta['objpos_other'][j, :], R) 226 | 227 | for i in range(18): 228 | meta['joint_others'][j, i, :] = rotatepoint( 229 | meta['joint_others'][j, i, :], R) 230 | 231 | return meta, img_rot, mask_miss_rot#, mask_all_rot 232 | 233 | 234 | def aug_scale_bbox(meta, img, params_transform): 235 | dice = random.random() # (0,1) 236 | if (dice > params_transform['scale_prob']): 237 | 238 | scale_multiplier = 1 239 | else: 240 | dice2 = random.random() 241 | # linear shear into [scale_min, scale_max] 242 | scale_multiplier = ( 243 | params_transform['scale_max'] - params_transform['scale_min']) * dice2 + \ 244 | params_transform['scale_min'] 245 | scale_abs = params_transform['target_dist'] / meta['scale_provided'] 246 | scale = scale_abs * scale_multiplier 247 | img = cv2.resize(img, (0, 0), fx=scale, fy=scale, 248 | interpolation=cv2.INTER_CUBIC) 249 | 250 | meta['objpos'] *= scale 251 | adjust_instance_list = [] 252 | for m in meta['instance_mask_list']: 253 | m = cv2.resize(m, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_AREA) 254 | adjust_instance_list.append(m) 255 | meta['instance_mask_list'] = adjust_instance_list 256 | 257 | return meta, img 258 | 259 | 260 | def aug_croppad_bbox(meta, img, params_transform): 261 | dice_x = random.random() 262 | dice_y = random.random() 263 | crop_x = int(params_transform['crop_size_x']) 264 | crop_y = int(params_transform['crop_size_y']) 265 | x_offset = int((dice_x - 0.5) * 2 * 266 | params_transform['center_perterb_max']) 267 | y_offset = int((dice_y - 0.5) * 2 * 268 | params_transform['center_perterb_max']) 269 | 270 | center = meta['objpos'] + np.array([x_offset, y_offset]) 271 | center = center.astype(int) 272 | 273 | # pad up and down 274 | pad_v = np.ones((crop_y, img.shape[1], 3), dtype=np.uint8) * 128 275 | mask = meta['instance_mask_list'][0] 276 | pad_v_instance_mask = np.zeros( 277 | (crop_y, mask.shape[1]), dtype=np.uint8) 278 | 279 | img = np.concatenate((pad_v, img, pad_v), axis=0) 280 | adjust_instance_list = [] 281 | for m in meta['instance_mask_list']: 282 | m = np.concatenate((pad_v_instance_mask, m, pad_v_instance_mask), axis=0) 283 | adjust_instance_list.append(m) 284 | meta['instance_mask_list'] = adjust_instance_list 285 | 286 | # pad right and left 287 | pad_h = np.ones((img.shape[0], crop_x, 3), dtype=np.uint8) * 128 288 | mask = meta['instance_mask_list'][0] 289 | pad_h_instance_mask = np.zeros( 290 | (mask.shape[0], crop_x), dtype=np.uint8) 291 | 292 | img = np.concatenate((pad_h, img, pad_h), axis=1) 293 | adjust_instance_list = [] 294 | for m in meta['instance_mask_list']: 295 | m = np.concatenate((pad_h_instance_mask, m, pad_h_instance_mask), axis=1) 296 | m = m[center[1] + int(crop_y / 2):center[1] + int(crop_y / 2) + crop_y + 1, 297 | center[0] + int(crop_x / 2):center[0] + int(crop_x / 2) + crop_x + 1] 298 | adjust_instance_list.append(m) 299 | meta['instance_mask_list'] = adjust_instance_list 300 | 301 | img = img[center[1] + int(crop_y / 2):center[1] + int(crop_y / 2) + crop_y, 302 | center[0] + int(crop_x / 2):center[0] + int(crop_x / 2) + crop_x, :] 303 | 304 | return meta, img 305 | 306 | 307 | def aug_flip_bbox(meta, img, params_transform): 308 | dice = random.random() 309 | doflip = dice <= params_transform['flip_prob'] 310 | 311 | if doflip: 312 | img = img.copy() 313 | cv2.flip(src=img, flipCode=1, dst=img) 314 | 315 | adjust_instance_list = [] 316 | for m in meta['instance_mask_list']: 317 | m = m.copy() 318 | cv2.flip(src=m, flipCode=1, dst=m) 319 | adjust_instance_list.append(m) 320 | meta['instance_mask_list'] = adjust_instance_list 321 | 322 | return meta, img 323 | 324 | 325 | def aug_rotate_bbox(meta, img, params_transform, type="random", input=0, fillType="nearest", constant=0): 326 | dice = random.random() 327 | degree = (dice - 0.5) * 2 * \ 328 | params_transform['max_rotate_degree'] # degree [-40,40] 329 | 330 | img_rot, _ = rotate_bound(img, np.copy(degree), (128, 128, 128)) 331 | 332 | # Not sure it will cause mask_miss to rotate rightly, just avoid it fails 333 | # by np.copy(). 334 | adjust_instance_list = [] 335 | for m in meta['instance_mask_list']: 336 | m, _ = rotate_bound(m, np.copy(degree), (0)) 337 | adjust_instance_list.append(m) 338 | meta['instance_mask_list'] = adjust_instance_list 339 | 340 | return meta, img_rot -------------------------------------------------------------------------------- /training/trainer.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | import sys 5 | import datetime 6 | import numpy as np 7 | from collections import OrderedDict 8 | import shutil 9 | 10 | #import encoding 11 | import torch.nn as nn 12 | from torch.optim.lr_scheduler import ReduceLROnPlateau, _LRScheduler 13 | from torch.optim.optimizer import Optimizer 14 | 15 | from lib.utils.log import logger 16 | from lib.utils.timer import Timer 17 | from lib.utils.path import mkdir 18 | import lib.utils.meter as meter_utils 19 | import network.net_utils as net_utils 20 | from datasets.data_parallel import ListDataParallel 21 | 22 | 23 | def get_learning_rates(optimizer): 24 | lrs = [pg['lr'] for pg in optimizer.param_groups] 25 | lrs = np.asarray(lrs, dtype=np.float) 26 | return lrs 27 | 28 | 29 | def default_visualization_fn(writer, step, log_dict): 30 | """ 31 | Visualization with tensorboard 32 | :type writer: SummaryWriter 33 | :type step: int 34 | :type log_dict: dict 35 | :return: 36 | """ 37 | for k, v in log_dict.items(): 38 | if isinstance(v, (float, int)): 39 | writer.add_scalar(k, v, step) 40 | elif isinstance(v, np.ndarray): 41 | writer.add_image(k, v, step) 42 | 43 | 44 | class TrainParams(object): 45 | # required params 46 | exp_name = 'experiment_name' 47 | subnet_name = 'keypoint_subnet' 48 | batch_size = 32 49 | max_epoch = 30 50 | optimizer = None 51 | 52 | # learning rate scheduler 53 | lr_scheduler = None # should be an instance of ReduceLROnPlateau or _LRScheduler 54 | max_grad_norm = np.inf 55 | 56 | # params based on your local env 57 | gpus = [0] 58 | save_dir = None # default `save_dir` is `outputs/{exp_name}` 59 | 60 | # loading existing checkpoint 61 | ckpt = None # path to the ckpt file, will load the last ckpt in the `save_dir` if `None` 62 | re_init = False # ignore ckpt if `True` 63 | zero_epoch = False # force `last_epoch` to zero 64 | ignore_opt_state = False # ignore the saved optimizer states 65 | 66 | # saving checkpoints 67 | save_freq_epoch = 1 # save one ckpt per `save_freq_epoch` epochs 68 | save_freq_step = sys.maxsize # save one ckpt per `save_freq_setp` steps, default value is inf 69 | save_nckpt_max = sys.maxsize # max number of saved ckpts 70 | 71 | # validation during training 72 | val_freq = 500 # run validation per `val_freq` steps 73 | val_nbatch = 10 # number of batches to be validated 74 | val_nbatch_end_epoch = 200 # max number of batches to be validated after each epoch 75 | 76 | # visualization 77 | print_freq = 20 # print log per `print_freq` steps 78 | use_tensorboard = False # use tensorboardX if True 79 | visualization_fn = None # custom function to handle `log_dict`, default value is `default_visualization_fn` 80 | 81 | def update(self, params_dict): 82 | state_dict = self.state_dict() 83 | for k, v in params_dict.items(): 84 | if k in state_dict or hasattr(self, k): 85 | setattr(self, k, v) 86 | else: 87 | logger.warning('Unknown option: {}: {}'.format(k, v)) 88 | 89 | def state_dict(self): 90 | state_dict = OrderedDict() 91 | for k in TrainParams.__dict__.keys(): 92 | if not k.startswith('_'): 93 | state_dict[k] = getattr(self, k) 94 | del state_dict['update'] 95 | del state_dict['state_dict'] 96 | 97 | return state_dict 98 | 99 | def __str__(self): 100 | state_dict = self.state_dict() 101 | text = 'TrainParams {\n' 102 | for k, v in state_dict.items(): 103 | text += '\t{}: {}\n'.format(k, v) 104 | text += '}\n' 105 | return text 106 | 107 | 108 | class Trainer(object): 109 | 110 | TrainParams = TrainParams 111 | 112 | # hooks 113 | on_start_epoch_hooks = [] 114 | on_end_epoch_hooks = [] 115 | 116 | def __init__(self, model, train_params, batch_processor, train_data, val_data=None): 117 | assert isinstance(train_params, TrainParams) 118 | self.params = train_params 119 | 120 | # Data loaders 121 | self.train_data = train_data 122 | self.val_data = val_data # sDataLoader.copy(val_data) if isinstance(val_data, DataLoader) else val_data 123 | # self.val_stream = self.val_data.get_stream() if self.val_data else None 124 | 125 | self.batch_processor = batch_processor 126 | self.batch_per_epoch = len(self.train_data) 127 | 128 | # set CUDA_VISIBLE_DEVICES=gpus 129 | gpus = ','.join([str(x) for x in self.params.gpus]) 130 | os.environ['CUDA_VISIBLE_DEVICES'] = gpus 131 | self.params.gpus = tuple(range(len(self.params.gpus))) 132 | logger.info('Set CUDA_VISIBLE_DEVICES to {}...'.format(gpus)) 133 | 134 | # Optimizer and learning rate 135 | self.last_epoch = 0 136 | self.optimizer = self.params.optimizer # type: Optimizer 137 | if not isinstance(self.optimizer, Optimizer): 138 | logger.error('optimizer should be an instance of Optimizer, ' 139 | 'but got {}'.format(type(self.optimizer))) 140 | raise ValueError 141 | self.lr_scheduler = self.params.lr_scheduler # type: ReduceLROnPlateau or _LRScheduler 142 | if self.lr_scheduler and not isinstance(self.lr_scheduler, (ReduceLROnPlateau, _LRScheduler)): 143 | logger.error('lr_scheduler should be an instance of _LRScheduler or ReduceLROnPlateau, ' 144 | 'but got {}'.format(type(self.lr_scheduler))) 145 | raise ValueError 146 | logger.info('Set lr_scheduler to {}'.format(type(self.lr_scheduler))) 147 | 148 | self.log_values = OrderedDict() 149 | self.batch_timer = Timer() 150 | self.data_timer = Timer() 151 | 152 | # load model 153 | self.model = model 154 | ckpt = self.params.ckpt 155 | if not self.params.save_dir: 156 | self.params.save_dir = os.path.join('outputs', self.params.exp_name) 157 | mkdir(self.params.save_dir) 158 | logger.info('Set output dir to {}'.format(self.params.save_dir)) 159 | if ckpt is None: 160 | # find the last ckpt 161 | ckpts = [fname for fname in os.listdir(self.params.save_dir) if os.path.splitext(fname)[-1] == '.h5'] 162 | ckpt = os.path.join( 163 | self.params.save_dir, sorted(ckpts, key=lambda name: int(os.path.splitext(name)[0].split('_')[-1]))[-1] 164 | ) if len(ckpts) > 0 else None 165 | 166 | if ckpt is not None and not self.params.re_init: 167 | self._load_ckpt(ckpt) 168 | logger.info('Load ckpt from {}'.format(ckpt)) 169 | 170 | self.model = ListDataParallel(self.model, device_ids=self.params.gpus) 171 | self.model = self.model.cuda(self.params.gpus[0]) 172 | self.model.train() 173 | if self.params.subnet_name != 'keypoint_subnet': 174 | self.model.module.freeze_bn() # nn.BatchNorm2d.eval() if not 'keypoint_subnet' 175 | 176 | def train(self): 177 | best_loss = np.inf 178 | for epoch in range(self.last_epoch, self.params.max_epoch): 179 | self.last_epoch += 1 180 | logger.info('Start training epoch {}'.format(self.last_epoch)) 181 | 182 | for fun in self.on_start_epoch_hooks: 183 | fun(self) 184 | 185 | # adjust learning rate 186 | if isinstance(self.lr_scheduler, _LRScheduler): 187 | cur_lrs = get_learning_rates(self.optimizer) 188 | self.lr_scheduler.step(self.last_epoch) 189 | logger.info('Set learning rates from {} to {}'.format(cur_lrs, get_learning_rates(self.optimizer))) 190 | 191 | train_loss = self._train_one_epoch() 192 | 193 | for fun in self.on_end_epoch_hooks: 194 | fun(self) 195 | 196 | # save model 197 | if (self.last_epoch % self.params.save_freq_epoch == 0) or (self.last_epoch == self.params.max_epoch - 1): 198 | save_name = 'ckpt_{}.h5'.format(self.last_epoch) 199 | save_to = os.path.join(self.params.save_dir, save_name) 200 | self._save_ckpt(save_to) 201 | 202 | # find best model 203 | if self.params.val_nbatch_end_epoch > 0: 204 | val_loss = self._val_one_epoch(self.params.val_nbatch_end_epoch) 205 | if val_loss < best_loss: 206 | best_file = os.path.join(self.params.save_dir, 207 | 'ckpt_{}_{:.5f}.h5.best'.format(self.last_epoch, val_loss)) 208 | shutil.copyfile(save_to, best_file) 209 | logger.info('Found a better ckpt ({:.5f} -> {:.5f}), ' 210 | 'saved to {}'.format(best_loss, val_loss, best_file)) 211 | best_loss = val_loss 212 | 213 | if isinstance(self.lr_scheduler, ReduceLROnPlateau): 214 | self.lr_scheduler.step(val_loss, self.last_epoch) 215 | 216 | def _save_ckpt(self, save_to): 217 | model = self.model.module if isinstance(self.model, nn.DataParallel) else self.model 218 | net_utils.save_net(save_to, model, epoch=self.last_epoch, 219 | optimizers=[self.optimizer], rm_prev_opt=True, max_n_ckpts=self.params.save_nckpt_max) 220 | logger.info('Save ckpt to {}'.format(save_to)) 221 | 222 | def _load_ckpt(self, ckpt): 223 | epoch, state_dicts = net_utils.load_net(ckpt, self.model, load_state_dict=True) 224 | if not self.params.ignore_opt_state and not self.params.zero_epoch and epoch >= 0: 225 | self.last_epoch = epoch 226 | logger.info('Set last epoch to {}'.format(self.last_epoch)) 227 | if state_dicts is not None: 228 | self.optimizer.load_state_dict(state_dicts[0]) 229 | net_utils.set_optimizer_state_devices(self.optimizer.state, self.params.gpus[0]) 230 | logger.info('Load optimizer state from checkpoint, ' 231 | 'new learning rate: {}'.format(get_learning_rates(self.optimizer))) 232 | 233 | def _train_one_epoch(self): 234 | self.batch_timer.clear() 235 | self.data_timer.clear() 236 | self.batch_timer.tic() 237 | self.data_timer.tic() 238 | total_loss = meter_utils.AverageValueMeter() 239 | for step, batch in enumerate(self.train_data): 240 | inputs, gts, _ = self.batch_processor(self, batch) 241 | 242 | self.data_timer.toc() 243 | 244 | # forward 245 | output, saved_for_loss = self.model(*inputs) 246 | 247 | loss, saved_for_log = self.model.module.build_loss(saved_for_loss, *gts) 248 | 249 | # backward 250 | self.optimizer.zero_grad() 251 | loss.backward() 252 | total_loss.add(loss.item()) 253 | 254 | # clip grad 255 | if not np.isinf(self.params.max_grad_norm): 256 | max_norm = nn.utils.clip_grad_norm(self.model.parameters(), self.params.max_grad_norm, float('inf')) 257 | saved_for_log['max_grad'] = max_norm 258 | 259 | self.optimizer.step(None) 260 | 261 | self._process_log(saved_for_log, self.log_values) 262 | self.batch_timer.toc() 263 | 264 | # print log 265 | reset = False 266 | 267 | if step % self.params.print_freq == 0: 268 | self._print_log(step, self.log_values, title='Training', max_n_batch=self.batch_per_epoch) 269 | reset = True 270 | 271 | if step % self.params.save_freq_step == 0 and step > 0: 272 | save_to = os.path.join(self.params.save_dir, 273 | 'ckpt_{}.h5.ckpt'.format((self.last_epoch - 1) * self.batch_per_epoch + step)) 274 | self._save_ckpt(save_to) 275 | 276 | if reset: 277 | self._reset_log(self.log_values) 278 | 279 | self.data_timer.tic() 280 | self.batch_timer.tic() 281 | 282 | total_loss, std = total_loss.value() 283 | return total_loss 284 | 285 | def _val_one_epoch(self, n_batch): 286 | training_mode = self.model.training 287 | self.model.eval() 288 | logs = OrderedDict() 289 | sum_loss = meter_utils.AverageValueMeter() 290 | logger.info('Val on validation set...') 291 | 292 | self.batch_timer.clear() 293 | self.data_timer.clear() 294 | self.batch_timer.tic() 295 | self.data_timer.tic() 296 | for step, batch in enumerate(self.val_data): 297 | self.data_timer.toc() 298 | if step > n_batch: 299 | break 300 | 301 | inputs, gts, _ = self.batch_processor(self, batch) 302 | _, saved_for_loss = self.model(*inputs) 303 | self.batch_timer.toc() 304 | 305 | loss, saved_for_log = self.model.module.build_loss(saved_for_loss, *gts) 306 | sum_loss.add(loss.item()) 307 | self._process_log(saved_for_log, logs) 308 | 309 | if step % self.params.print_freq == 0 or step == len(self.val_data)-1: 310 | self._print_log(step, logs, 'Validation', max_n_batch=min(n_batch, len(self.val_data))) 311 | 312 | self.data_timer.tic() 313 | self.batch_timer.tic() 314 | 315 | mean, std = sum_loss.value() 316 | logger.info('Validation loss: mean: {}, std: {}'.format(mean, std)) 317 | self.model.train(mode=training_mode) 318 | if self.params.subnet_name != 'keypoint_subnet': 319 | self.model.module.freeze_bn() 320 | return mean 321 | 322 | def _process_log(self, src_dict, dest_dict): 323 | for k, v in src_dict.items(): 324 | if isinstance(v, (int, float)): 325 | dest_dict.setdefault(k, meter_utils.AverageValueMeter()) 326 | dest_dict[k].add(float(v)) 327 | else: 328 | dest_dict[k] = v 329 | 330 | def _print_log(self, step, log_values, title='', max_n_batch=None): 331 | log_str = '{}\n'.format(self.params.exp_name) 332 | log_str += '{}: epoch {}'.format(title, self.last_epoch) 333 | 334 | if max_n_batch: 335 | log_str += '[{}/{}], lr: {}'.format(step, max_n_batch, get_learning_rates(self.optimizer)) 336 | 337 | i = 0 338 | # global_step = step + (self.last_epoch - 1) * self.batch_per_epoch 339 | for k, v in log_values.items(): 340 | if isinstance(v, meter_utils.AverageValueMeter): 341 | mean, std = v.value() 342 | log_str += '\n\t{}: {:.10f}'.format(k, mean) 343 | i += 1 344 | 345 | if max_n_batch: 346 | # print time 347 | data_time = self.data_timer.duration + 1e-6 348 | batch_time = self.batch_timer.duration + 1e-6 349 | rest_seconds = int((max_n_batch - step) * batch_time) 350 | log_str += '\n\t({:.2f}/{:.2f}s,' \ 351 | ' fps:{:.1f}, rest: {})'.format(data_time, batch_time, 352 | self.params.batch_size / batch_time, 353 | str(datetime.timedelta(seconds=rest_seconds))) 354 | self.batch_timer.clear() 355 | self.data_timer.clear() 356 | 357 | logger.info(log_str) 358 | 359 | def _reset_log(self, log_values): 360 | for k, v in log_values.items(): 361 | if isinstance(v, meter_utils.AverageValueMeter): 362 | v.reset() 363 | -------------------------------------------------------------------------------- /network/posenet.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # keypoint subnet + detection subnet(RetinaNet) + PRN 3 | import math 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | from torch.autograd import Variable 9 | from collections import OrderedDict 10 | from network.fpn import FPN50, FPN101 11 | from torch.nn import init 12 | 13 | from network.utils import BBoxTransform, ClipBoxes 14 | from network.anchors import Anchors 15 | import network.losses as losses 16 | from lib.nms.pth_nms import pth_nms 17 | 18 | 19 | def nms(dets, thresh): 20 | "Dispatch to either CPU or GPU NMS implementations.\ 21 | Accept dets as tensor""" 22 | return pth_nms(dets, thresh) 23 | 24 | 25 | class Concat(nn.Module): 26 | def __init__(self): 27 | super(Concat, self).__init__() 28 | 29 | def forward(self, up1, up2, up3, up4): 30 | return torch.cat((up1, up2, up3, up4), 1) 31 | 32 | 33 | class RegressionModel(nn.Module): 34 | def __init__(self, num_features_in, num_anchors=9, feature_size=256): 35 | super(RegressionModel, self).__init__() 36 | 37 | self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1) 38 | self.act1 = nn.ReLU() 39 | 40 | self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 41 | self.act2 = nn.ReLU() 42 | 43 | self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 44 | self.act3 = nn.ReLU() 45 | 46 | self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 47 | self.act4 = nn.ReLU() 48 | 49 | self.output = nn.Conv2d(feature_size, num_anchors * 4, kernel_size=3, padding=1) 50 | 51 | def forward(self, x): 52 | out = self.conv1(x) 53 | out = self.act1(out) 54 | 55 | out = self.conv2(out) 56 | out = self.act2(out) 57 | 58 | out = self.conv3(out) 59 | out = self.act3(out) 60 | 61 | out = self.conv4(out) 62 | out = self.act4(out) 63 | 64 | out = self.output(out) 65 | 66 | # out is B x C x W x H, with C = 4*num_anchors 67 | out = out.permute(0, 2, 3, 1) 68 | 69 | return out.contiguous().view(out.shape[0], -1, 4) 70 | 71 | 72 | class ClassificationModel(nn.Module): 73 | def __init__(self, num_features_in, num_anchors=9, num_classes=80, prior=0.01, feature_size=256): 74 | super(ClassificationModel, self).__init__() 75 | 76 | self.num_classes = num_classes 77 | self.num_anchors = num_anchors 78 | 79 | self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1) 80 | self.act1 = nn.ReLU() 81 | 82 | self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 83 | self.act2 = nn.ReLU() 84 | 85 | self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 86 | self.act3 = nn.ReLU() 87 | 88 | self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 89 | self.act4 = nn.ReLU() 90 | 91 | self.output = nn.Conv2d(feature_size, num_anchors * num_classes, kernel_size=3, padding=1) 92 | self.output_act = nn.Sigmoid() 93 | 94 | def forward(self, x): 95 | out = self.conv1(x) 96 | out = self.act1(out) 97 | 98 | out = self.conv2(out) 99 | out = self.act2(out) 100 | 101 | out = self.conv3(out) 102 | out = self.act3(out) 103 | 104 | out = self.conv4(out) 105 | out = self.act4(out) 106 | 107 | out = self.output(out) 108 | out = self.output_act(out) 109 | 110 | # out is B x C x W x H, with C = n_classes + n_anchors 111 | out1 = out.permute(0, 2, 3, 1) 112 | 113 | batch_size, width, height, channels = out1.shape 114 | 115 | out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes) 116 | 117 | return out2.contiguous().view(x.shape[0], -1, self.num_classes) 118 | 119 | 120 | class Flatten(nn.Module): 121 | def forward(self, input): 122 | return input.view(input.size(0), -1) 123 | 124 | 125 | class Add(nn.Module): 126 | def forward(self, input1, input2): 127 | return torch.add(input1, input2) 128 | 129 | 130 | class PRN(nn.Module): 131 | def __init__(self,node_count, coeff): 132 | super(PRN, self).__init__() 133 | self.flatten = Flatten() 134 | self.height = coeff*28 135 | self.width = coeff*18 136 | self.dens1 = nn.Linear(self.height*self.width*17, node_count) 137 | self.bneck = nn.Linear(node_count, node_count) 138 | self.dens2 = nn.Linear(node_count, self.height*self.width*17) 139 | self.drop = nn.Dropout() 140 | self.add = Add() 141 | self.softmax = nn.Softmax(dim=1) 142 | 143 | def forward(self, x): 144 | res = self.flatten(x) 145 | out = self.drop(F.relu(self.dens1(res))) 146 | out = self.drop(F.relu(self.bneck(out))) 147 | out = F.relu(self.dens2(out)) 148 | out = self.add(out, res) 149 | out = self.softmax(out) 150 | out = out.view(out.size()[0], self.height, self.width, 17) 151 | 152 | return out 153 | 154 | class poseNet(nn.Module): 155 | def __init__(self, layers, prn_node_count=1024, prn_coeff=2): 156 | super(poseNet, self).__init__() 157 | if layers == 101: 158 | self.fpn = FPN101() 159 | if layers == 50: 160 | self.fpn = FPN50() 161 | 162 | ################################################################################## 163 | # keypoints subnet 164 | # intermediate supervision 165 | self.convfin_k2 = nn.Conv2d(256, 19, kernel_size=1, stride=1, padding=0) 166 | self.convfin_k3 = nn.Conv2d(256, 19, kernel_size=1, stride=1, padding=0) 167 | self.convfin_k4 = nn.Conv2d(256, 19, kernel_size=1, stride=1, padding=0) 168 | self.convfin_k5 = nn.Conv2d(256, 19, kernel_size=1, stride=1, padding=0) 169 | 170 | # 2 conv(kernel=3x3),change channels from 256 to 128 171 | self.convt1 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1) 172 | self.convt2 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1) 173 | self.convt3 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1) 174 | self.convt4 = nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1) 175 | self.convs1 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) 176 | self.convs2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) 177 | self.convs3 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) 178 | self.convs4 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) 179 | 180 | self.upsample1 = nn.Upsample(scale_factor=8, mode='nearest', align_corners=None) 181 | self.upsample2 = nn.Upsample(scale_factor=4, mode='nearest', align_corners=None) 182 | self.upsample3 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None) 183 | # self.upsample4 = nn.Upsample(size=(120,120),mode='bilinear',align_corners=True) 184 | 185 | self.concat = Concat() 186 | self.conv2 = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1) 187 | self.convfin = nn.Conv2d(256, 18, kernel_size=1, stride=1, padding=0) 188 | 189 | ################################################################################## 190 | # detection subnet 191 | self.regressionModel = RegressionModel(256) 192 | self.classificationModel = ClassificationModel(256, num_classes=1) 193 | self.anchors = Anchors() 194 | self.regressBoxes = BBoxTransform() 195 | self.clipBoxes = ClipBoxes() 196 | self.focalLoss = losses.FocalLoss() 197 | 198 | ################################################################################## 199 | # prn subnet 200 | self.prn = PRN(prn_node_count, prn_coeff) 201 | 202 | ################################################################################## 203 | # initialize weights 204 | self._initialize_weights_norm() 205 | prior = 0.01 206 | self.classificationModel.output.weight.data.fill_(0) 207 | self.classificationModel.output.bias.data.fill_(-math.log((1.0 - prior) / prior)) 208 | self.regressionModel.output.weight.data.fill_(0) 209 | self.regressionModel.output.bias.data.fill_(0) 210 | 211 | self.freeze_bn() # from retinanet 212 | 213 | def _initialize_weights_norm(self): 214 | for m in self.modules(): 215 | if isinstance(m, nn.Conv2d): 216 | init.normal_(m.weight, std=0.01) 217 | if m.bias is not None: # resnet101 conv2d doesn't add bias 218 | init.constant_(m.bias, 0.0) 219 | 220 | def freeze_bn(self): 221 | '''Freeze BatchNorm layers.''' 222 | for layer in self.modules(): 223 | if isinstance(layer, nn.BatchNorm2d): 224 | layer.eval() 225 | 226 | def forward(self, x): 227 | 228 | img_batch, subnet_name = x 229 | 230 | if subnet_name == 'keypoint_subnet': 231 | return self.keypoint_forward(img_batch) 232 | elif subnet_name == 'detection_subnet': 233 | return self.detection_forward(img_batch) 234 | elif subnet_name == 'prn_subnet': 235 | return self.prn_forward(img_batch) 236 | else: # entire_net 237 | features = self.fpn(img_batch) 238 | p2, p3, p4, p5 = features[0] # fpn features for keypoint subnet 239 | features = features[1] # fpn features for detection subnet 240 | 241 | ################################################################################## 242 | # keypoints subnet 243 | p5 = self.convt1(p5) 244 | p5 = self.convs1(p5) 245 | p4 = self.convt2(p4) 246 | p4 = self.convs2(p4) 247 | p3 = self.convt3(p3) 248 | p3 = self.convs3(p3) 249 | p2 = self.convt4(p2) 250 | p2 = self.convs4(p2) 251 | 252 | p5 = self.upsample1(p5) 253 | p4 = self.upsample2(p4) 254 | p3 = self.upsample3(p3) 255 | 256 | concat = self.concat(p5, p4, p3, p2) 257 | predict_keypoint = self.convfin(F.relu(self.conv2(concat))) 258 | del p5, p4, p3, p2, concat 259 | 260 | ################################################################################## 261 | # detection subnet 262 | regression = torch.cat([self.regressionModel(feature) for feature in features], dim=1) 263 | classification = torch.cat([self.classificationModel(feature) for feature in features], dim=1) 264 | anchors = self.anchors(img_batch) 265 | 266 | transformed_anchors = self.regressBoxes(anchors, regression) 267 | transformed_anchors = self.clipBoxes(transformed_anchors, img_batch) 268 | 269 | scores = torch.max(classification, dim=2, keepdim=True)[0] 270 | 271 | scores_over_thresh = (scores > 0.05)[0, :, 0]#0.05 272 | 273 | if scores_over_thresh.sum() == 0: 274 | # no boxes to NMS, just return 275 | return predict_keypoint, [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)] 276 | 277 | classification = classification[:, scores_over_thresh, :] 278 | transformed_anchors = transformed_anchors[:, scores_over_thresh, :] 279 | scores = scores[:, scores_over_thresh, :] 280 | 281 | anchors_nms_idx = nms(torch.cat([transformed_anchors, scores], dim=2)[0, :, :], 0.5) # threshold = 0.5, inpsize=480 282 | 283 | nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(dim=1) 284 | 285 | return predict_keypoint, [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]] 286 | 287 | 288 | def keypoint_forward(self, img_batch): 289 | saved_for_loss = [] 290 | 291 | p2, p3, p4, p5 = self.fpn(img_batch)[0] # fpn features for keypoint subnet 292 | 293 | ################################################################################## 294 | # keypoints subnet 295 | # intermediate supervision 296 | saved_for_loss.append(self.convfin_k2(p2)) 297 | saved_for_loss.append(self.upsample3(self.convfin_k3(p3))) 298 | saved_for_loss.append(self.upsample2(self.convfin_k4(p4))) 299 | saved_for_loss.append(self.upsample1(self.convfin_k5(p5))) 300 | 301 | # 302 | p5 = self.convt1(p5) 303 | p5 = self.convs1(p5) 304 | p4 = self.convt2(p4) 305 | p4 = self.convs2(p4) 306 | p3 = self.convt3(p3) 307 | p3 = self.convs3(p3) 308 | p2 = self.convt4(p2) 309 | p2 = self.convs4(p2) 310 | 311 | p5 = self.upsample1(p5) 312 | p4 = self.upsample2(p4) 313 | p3 = self.upsample3(p3) 314 | 315 | predict_keypoint = self.convfin(F.relu(self.conv2(self.concat(p5, p4, p3, p2)))) 316 | saved_for_loss.append(predict_keypoint) 317 | 318 | return predict_keypoint, saved_for_loss 319 | 320 | def detection_forward(self, img_batch): 321 | saved_for_loss = [] 322 | 323 | features = self.fpn(img_batch)[1] # fpn features for detection subnet 324 | 325 | ################################################################################## 326 | # detection subnet 327 | regression = torch.cat([self.regressionModel(feature) for feature in features], dim=1) 328 | classification = torch.cat([self.classificationModel(feature) for feature in features], dim=1) 329 | anchors = self.anchors(img_batch) 330 | 331 | saved_for_loss.append(classification) 332 | saved_for_loss.append(regression) 333 | saved_for_loss.append(anchors) 334 | 335 | return [], saved_for_loss 336 | 337 | def prn_forward(self, img_batch): 338 | saved_for_loss = [] 339 | 340 | res = self.prn.flatten(img_batch) 341 | out = self.prn.drop(F.relu(self.prn.dens1(res))) 342 | out = self.prn.drop(F.relu(self.prn.bneck(out))) 343 | out = F.relu(self.prn.dens2(out)) 344 | out = self.prn.add(out,res) 345 | out = self.prn.softmax(out) 346 | out = out.view(out.size()[0], self.prn.height, self.prn.width, 17) 347 | 348 | saved_for_loss.append(out) 349 | 350 | return out, saved_for_loss 351 | 352 | @staticmethod 353 | def build_loss(saved_for_loss, *args): 354 | 355 | subnet_name = args[0] 356 | 357 | if subnet_name == 'keypoint_subnet': 358 | return build_keypoint_loss(saved_for_loss, args[1], args[2]) 359 | elif subnet_name == 'detection_subnet': 360 | return build_detection_loss(saved_for_loss, args[1]) 361 | elif subnet_name == 'prn_subnet': 362 | return build_prn_loss(saved_for_loss, args[1]) 363 | else: 364 | return 0 365 | 366 | 367 | def build_keypoint_loss(saved_for_loss, heat_temp, heat_weight): 368 | 369 | names = build_names() 370 | saved_for_log = OrderedDict() 371 | criterion = nn.MSELoss(size_average=True).cuda() 372 | total_loss = 0 373 | div1 = 1. 374 | #div2 = 100. 375 | 376 | for j in range(5): 377 | 378 | pred1 = saved_for_loss[j][:, :18, :, :] * heat_weight 379 | gt1 = heat_weight * heat_temp 380 | 381 | #pred2 = saved_for_loss[j][:, 18:, :, :] 382 | #gt2 = mask_all 383 | 384 | # Compute losses 385 | loss1 = criterion(pred1, gt1)/div1 # heatmap_loss 386 | #loss2 = criterion(pred2, gt2)/div2 # mask_loss 387 | total_loss += loss1 388 | #total_loss += loss2 389 | 390 | # Get value from Tensor and save for log 391 | saved_for_log[names[j*2]] = loss1.item() 392 | #saved_for_log[names[j*2+1]] = loss2.item() 393 | 394 | saved_for_log['max_ht'] = torch.max( 395 | saved_for_loss[-1].data[:, :18, :, :]).item() 396 | saved_for_log['min_ht'] = torch.min( 397 | saved_for_loss[-1].data[:, :18, :, :]).item() 398 | #saved_for_log['max_mask'] = torch.max( 399 | # saved_for_loss[-1].data[:, 18:, :, :]).item() 400 | #saved_for_log['min_mask'] = torch.min( 401 | # saved_for_loss[-1].data[:, 18:, :, :]).item() 402 | 403 | return total_loss, saved_for_log 404 | 405 | def build_detection_loss(saved_for_loss, anno): 406 | ''' 407 | :param saved_for_loss: [classifications, regressions, anchors] 408 | :param anno: annotations 409 | :return: classification_loss, regression_loss 410 | ''' 411 | saved_for_log = OrderedDict() 412 | 413 | # Compute losses 414 | focalLoss = losses.FocalLoss() 415 | classification_loss, regression_loss = focalLoss(*saved_for_loss, anno) 416 | classification_loss = classification_loss.mean() 417 | regression_loss = regression_loss.mean() 418 | total_loss = classification_loss + regression_loss 419 | 420 | # Get value from Tensor and save for log 421 | saved_for_log['total_loss'] = total_loss.item() 422 | saved_for_log['classification_loss'] = classification_loss.item() 423 | saved_for_log['regression_loss'] = regression_loss.item() 424 | 425 | return total_loss, saved_for_log 426 | 427 | def build_prn_loss(saved_for_loss, label): 428 | ''' 429 | :param saved_for_loss: [out] 430 | :param label: label 431 | :return: prn loss 432 | ''' 433 | saved_for_log = OrderedDict() 434 | 435 | criterion = nn.BCELoss(size_average=True).cuda() 436 | total_loss = 0 437 | 438 | # Compute losses 439 | loss1 = criterion(saved_for_loss[0], label) 440 | total_loss += loss1 441 | 442 | # Get value from Tensor and save for log 443 | saved_for_log['PRN loss'] = loss1.item() 444 | 445 | return total_loss, saved_for_log 446 | 447 | def build_names(): 448 | names = [] 449 | for j in range(2, 6): 450 | names.append('heatmap_loss_k%d' % j) 451 | names.append('seg_loss_k%d' % j) 452 | names.append('heatmap_loss') 453 | names.append('seg_loss') 454 | return names 455 | 456 | -------------------------------------------------------------------------------- /datasets/coco_data/COCO_data_pipeline.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import os 3 | 4 | import cv2 5 | import numpy as np 6 | 7 | import torch 8 | from datasets.coco_data.heatmap import putGaussianMaps 9 | from datasets.coco_data.ImageAugmentation import (aug_croppad, aug_flip, aug_rotate, aug_scale, 10 | aug_croppad_bbox, aug_flip_bbox, aug_rotate_bbox, aug_scale_bbox) 11 | from datasets.coco_data.preprocessing import resnet_preprocess 12 | from torch.utils.data import DataLoader, Dataset 13 | from functools import partial, reduce 14 | 15 | from pycocotools.coco import COCO, maskUtils 16 | 17 | ''' 18 | train2014 : 82783 simages 19 | val2014 : 40504 images 20 | 21 | first 2644 of val2014 marked by 'isValidation = 1', as our minval dataset. 22 | So all training data have 82783+40504-2644 = 120643 samples 23 | ''' 24 | 25 | params_transform = dict() 26 | params_transform['mode'] = 5 27 | # === aug_scale === 28 | params_transform['scale_min'] = 0.8 29 | params_transform['scale_max'] = 1.2 30 | params_transform['scale_prob'] = 1 31 | params_transform['target_dist'] = 0.6 32 | # === aug_rotate === 33 | params_transform['max_rotate_degree'] = 40 34 | 35 | # === 36 | params_transform['center_perterb_max'] = 40 37 | 38 | # === aug_flip === 39 | params_transform['flip_prob'] = 0.3 40 | 41 | params_transform['np'] = 56 42 | params_transform['sigma'] = 7.0 43 | 44 | def annToRLE(ann, height, width): 45 | """ 46 | Convert annotation which can be polygons, uncompressed RLE to RLE. 47 | :return: binary mask (numpy 2D array) 48 | """ 49 | segm = ann['segmentation'] 50 | if isinstance(segm, list): 51 | # polygon -- a single object might consist of multiple parts 52 | # we merge all parts into one mask rle code 53 | rles = maskUtils.frPyObjects(segm, height, width) 54 | rle = maskUtils.merge(rles) 55 | elif isinstance(segm['counts'], list): 56 | # uncompressed RLE 57 | rle = maskUtils.frPyObjects(segm, height, width) 58 | else: 59 | # rle 60 | rle = ann['segmentation'] 61 | return rle 62 | 63 | 64 | def annToMask(ann, height, width): 65 | """ 66 | Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. 67 | :return: binary mask (numpy 2D array) 68 | """ 69 | rle = annToRLE(ann, height, width) 70 | m = maskUtils.decode(rle) 71 | return m 72 | 73 | class Cocokeypoints(Dataset): 74 | def __init__(self, root, mask_dir, index_list, data, inp_size, feat_stride, preprocess='resnet', transform=None, 75 | target_transform=None): 76 | 77 | params_transform['crop_size_x'] = inp_size 78 | params_transform['crop_size_y'] = inp_size 79 | params_transform['stride'] = feat_stride 80 | 81 | # add preprocessing as a choice, so we don't modify it manually. 82 | self.preprocess = preprocess 83 | self.data = data 84 | self.mask_dir = mask_dir 85 | self.numSample = len(index_list) 86 | self.index_list = index_list 87 | self.root = root 88 | self.transform = transform 89 | self.target_transform = target_transform 90 | 91 | def get_anno(self, meta_data): 92 | """ 93 | get meta information 94 | """ 95 | anno = dict() 96 | anno['dataset'] = meta_data['dataset'] 97 | anno['img_height'] = int(meta_data['img_height']) 98 | anno['img_width'] = int(meta_data['img_width']) 99 | 100 | anno['isValidation'] = meta_data['isValidation'] 101 | anno['people_index'] = int(meta_data['people_index']) 102 | anno['annolist_index'] = int(meta_data['annolist_index']) 103 | 104 | # (b) objpos_x (float), objpos_y (float) 105 | anno['objpos'] = np.array(meta_data['objpos']) 106 | anno['scale_provided'] = meta_data['scale_provided'] 107 | anno['joint_self'] = np.array(meta_data['joint_self']) 108 | 109 | anno['numOtherPeople'] = int(meta_data['numOtherPeople']) 110 | anno['num_keypoints_other'] = np.array( 111 | meta_data['num_keypoints_other']) 112 | anno['joint_others'] = np.array(meta_data['joint_others']) 113 | anno['objpos_other'] = np.array(meta_data['objpos_other']) 114 | anno['scale_provided_other'] = meta_data['scale_provided_other'] 115 | anno['bbox_other'] = meta_data['bbox_other'] 116 | anno['segment_area_other'] = meta_data['segment_area_other'] 117 | 118 | if anno['numOtherPeople'] == 1: 119 | anno['joint_others'] = np.expand_dims(anno['joint_others'], 0) 120 | anno['objpos_other'] = np.expand_dims(anno['objpos_other'], 0) 121 | return anno 122 | 123 | def add_neck(self, meta): 124 | ''' 125 | MS COCO annotation order: 126 | 0: nose 1: l eye 2: r eye 3: l ear 4: r ear 127 | 5: l shoulder 6: r shoulder 7: l elbow 8: r elbow 128 | 9: l wrist 10: r wrist 11: l hip 12: r hip 13: l knee 129 | 14: r knee 15: l ankle 16: r ankle 130 | 131 | The order in this work: 132 | (0-'nose' 1-'neck' 2-'right_shoulder' 3-'right_elbow' 4-'right_wrist' 133 | 5-'left_shoulder' 6-'left_elbow' 7-'left_wrist' 8-'right_hip' 134 | 9-'right_knee' 10-'right_ankle' 11-'left_hip' 12-'left_knee' 135 | 13-'left_ankle' 14-'right_eye' 15-'left_eye' 16-'right_ear' 136 | 17-'left_ear' ) 137 | ''' 138 | our_order = [0, 17, 6, 8, 10, 5, 7, 9, 139 | 12, 14, 16, 11, 13, 15, 2, 1, 4, 3] 140 | # Index 6 is right shoulder and Index 5 is left shoulder 141 | right_shoulder = meta['joint_self'][6, :] 142 | left_shoulder = meta['joint_self'][5, :] 143 | neck = (right_shoulder + left_shoulder) / 2 144 | if right_shoulder[2] == 2 or left_shoulder[2] == 2: 145 | neck[2] = 2 146 | elif right_shoulder[2] == 1 or left_shoulder[2] == 1: 147 | neck[2] = 1 148 | else: 149 | neck[2] = right_shoulder[2] * left_shoulder[2] 150 | 151 | neck = neck.reshape(1, len(neck)) 152 | neck = np.round(neck) 153 | meta['joint_self'] = np.vstack((meta['joint_self'], neck)) 154 | meta['joint_self'] = meta['joint_self'][our_order, :] 155 | temp = [] 156 | 157 | for i in range(meta['numOtherPeople']): 158 | right_shoulder = meta['joint_others'][i, 6, :] 159 | left_shoulder = meta['joint_others'][i, 5, :] 160 | neck = (right_shoulder + left_shoulder) / 2 161 | if (right_shoulder[2] == 2 or left_shoulder[2] == 2): 162 | neck[2] = 2 163 | elif (right_shoulder[2] == 1 or left_shoulder[2] == 1): 164 | neck[2] = 1 165 | else: 166 | neck[2] = right_shoulder[2] * left_shoulder[2] 167 | neck = neck.reshape(1, len(neck)) 168 | neck = np.round(neck) 169 | single_p = np.vstack((meta['joint_others'][i], neck)) 170 | single_p = single_p[our_order, :] 171 | temp.append(single_p) 172 | meta['joint_others'] = np.array(temp) 173 | 174 | return meta 175 | 176 | def remove_illegal_joint(self, meta): 177 | crop_x = int(params_transform['crop_size_x']) 178 | crop_y = int(params_transform['crop_size_y']) 179 | mask = np.logical_or.reduce((meta['joint_self'][:, 0] >= crop_x, 180 | meta['joint_self'][:, 0] < 0, 181 | meta['joint_self'][:, 1] >= crop_y, 182 | meta['joint_self'][:, 1] < 0)) 183 | # out_bound = np.nonzero(mask) 184 | # print(mask.shape) 185 | meta['joint_self'][mask == True, :] = (1, 1, 2) 186 | if (meta['numOtherPeople'] != 0): 187 | mask = np.logical_or.reduce((meta['joint_others'][:, :, 0] >= crop_x, 188 | meta['joint_others'][:, :, 0] < 0, 189 | meta['joint_others'][:, 190 | :, 1] >= crop_y, 191 | meta['joint_others'][:, :, 1] < 0)) 192 | meta['joint_others'][mask == True, :] = (1, 1, 2) 193 | 194 | return meta 195 | 196 | def get_ground_truth(self, meta, mask_miss): 197 | 198 | number_keypoints = 18 199 | 200 | stride = params_transform['stride'] 201 | mode = params_transform['mode'] 202 | crop_size_y = params_transform['crop_size_y'] 203 | crop_size_x = params_transform['crop_size_x'] 204 | num_parts = params_transform['np'] 205 | nop = meta['numOtherPeople'] 206 | grid_y = int(crop_size_y / stride) 207 | grid_x = int(crop_size_x / stride) 208 | channels = (num_parts + 1) * 2 209 | heatmaps = np.zeros((grid_y, grid_x, number_keypoints)) 210 | 211 | mask_miss = cv2.resize(mask_miss, (0, 0), fx=1.0 / stride, fy=1.0 / stride, 212 | interpolation=cv2.INTER_CUBIC).astype(np.float32) 213 | mask_miss = mask_miss / 255. 214 | mask_miss = np.expand_dims(mask_miss, axis=2) 215 | heat_mask = np.repeat(mask_miss, number_keypoints, axis=2) # 19 216 | 217 | #mask_all = cv2.resize(mask_all, (0, 0), fx=1.0 / stride, fy=1.0 / stride, 218 | # interpolation=cv2.INTER_CUBIC).astype(np.float32) 219 | #mask_all = mask_all / 255. 220 | #mask_all = np.expand_dims(mask_all, axis=2) 221 | 222 | # confidance maps for body parts 223 | for i in range(number_keypoints): 224 | if (meta['joint_self'][i, 2] <= 1): 225 | center = meta['joint_self'][i, :2] 226 | gaussian_map = heatmaps[:, :, i] 227 | heatmaps[:, :, i] = putGaussianMaps( 228 | center, gaussian_map, params_transform=params_transform) 229 | for j in range(nop): 230 | if (meta['joint_others'][j, i, 2] <= 1): 231 | center = meta['joint_others'][j, i, :2] 232 | gaussian_map = heatmaps[:, :, i] 233 | heatmaps[:, :, i] = putGaussianMaps( 234 | center, gaussian_map, params_transform=params_transform) 235 | 236 | return heat_mask, heatmaps 237 | 238 | def __getitem__(self, index): 239 | idx = self.index_list[index] 240 | img = cv2.imread(os.path.join(self.root, self.data[idx]['img_paths'])) 241 | img_idx = self.data[idx]['img_paths'][-16:-3] 242 | # print img.shape 243 | if "COCO_val" in self.data[idx]['dataset']: 244 | mask_miss = cv2.imread( 245 | self.mask_dir + 'mask2014/val2014_mask_miss_' + img_idx + 'png', 0) 246 | #mask_all = cv2.imread( 247 | # self.mask_dir + 'mask2014/val2014_mask_all_' + img_idx + 'png', 0) 248 | elif "COCO" in self.data[idx]['dataset']: 249 | mask_miss = cv2.imread( 250 | self.mask_dir + 'mask2014/train2014_mask_miss_' + img_idx + 'png', 0) 251 | #mask_all = cv2.imread( 252 | # self.mask_dir + 'mask2014/train2014_mask_all_' + img_idx + 'png', 0) 253 | meta_data = self.get_anno(self.data[idx]) 254 | 255 | meta_data = self.add_neck(meta_data) 256 | 257 | augmentations = [ 258 | partial(aug_meth, params_transform=params_transform) 259 | for aug_meth in [ 260 | aug_scale, 261 | aug_rotate, 262 | aug_croppad, 263 | aug_flip 264 | ] 265 | ] 266 | 267 | meta_data, img, mask_miss = reduce( 268 | lambda md_i_mm_ma, f: f(*md_i_mm_ma), 269 | augmentations, 270 | (meta_data, img, mask_miss) 271 | ) 272 | 273 | meta_data = self.remove_illegal_joint(meta_data) 274 | 275 | heat_mask, heatmaps = self.get_ground_truth( 276 | meta_data, mask_miss) 277 | 278 | # image preprocessing, which comply the model 279 | # trianed on Imagenet dataset 280 | if self.preprocess == 'resnet': 281 | img = resnet_preprocess(img) 282 | 283 | img = torch.from_numpy(img) 284 | heatmaps = torch.from_numpy( 285 | heatmaps.transpose((2, 0, 1)).astype(np.float32)) 286 | heat_mask = torch.from_numpy( 287 | heat_mask.transpose((2, 0, 1)).astype(np.float32)) 288 | #mask_all = torch.from_numpy( 289 | # mask_all.transpose((2, 0, 1)).astype(np.float32)) 290 | 291 | return img, heatmaps, heat_mask#, mask_all 292 | 293 | def __len__(self): 294 | return self.numSample 295 | 296 | class Cocobbox(Dataset): 297 | def __init__(self, root, mask_dir, index_list, data, inp_size, feat_stride, coco, 298 | preprocess='resnet', training=True): 299 | 300 | params_transform['crop_size_x'] = inp_size 301 | params_transform['crop_size_y'] = inp_size 302 | params_transform['stride'] = feat_stride 303 | 304 | # add preprocessing as a choice, so we don't modify it manually. 305 | self.preprocess = preprocess 306 | self.data = data 307 | self.index_list = index_list 308 | self.numSample = len(self.index_list) 309 | self.training = training 310 | 311 | if self.training: 312 | img_path = os.path.join(root, 'train2017') 313 | else: 314 | img_path = os.path.join(root, 'val2017') 315 | 316 | self.instance_info_list, self.image_path_list = self.get_instance_info_list(img_path, coco) 317 | 318 | def get_instance_info_list(self, img_path, coco): 319 | 320 | instance_info_list = [] 321 | image_path_list = [] 322 | 323 | for idx in self.index_list: 324 | image_info = coco.loadImgs(int(self.data[idx]['image_id']))[0] 325 | image_path = os.path.join(img_path, image_info['file_name']) 326 | if not os.path.exists(image_path): 327 | print( 328 | "[skip] json annotation found, but cannot found image: {}".format(image_path)) 329 | continue 330 | image_path_list.append(image_path) 331 | 332 | annos_ids = coco.getAnnIds(imgIds=self.data[idx]['image_id']) 333 | annos_info = coco.loadAnns(annos_ids) 334 | instance_info = {} 335 | instance_info["anns"] = annos_info 336 | instance_info["height"] = image_info["height"] 337 | instance_info["width"] = image_info["width"] 338 | instance_info_list.append(instance_info) 339 | 340 | return instance_info_list, image_path_list 341 | 342 | def get_instance_mask(self, instance_info): 343 | height = instance_info['height'] 344 | width = instance_info['width'] 345 | anns = instance_info['anns'] 346 | 347 | instance_masks = [] 348 | class_ids = [] 349 | for anno in anns: 350 | class_id = 1 351 | m = annToMask(anno, height, width) 352 | # Some objects are so small that they're less than 1 pixel area 353 | # and end up rounded out. Skip those objects. 354 | if m.max() < 1: 355 | continue 356 | # Is it a crowd? If so, use a negative class ID. 357 | if anno['iscrowd']: 358 | # Use negative class ID for crowds 359 | class_id = -1 360 | # For crowd masks, annToMask() sometimes returns a mask 361 | # smaller than the given dimensions. If so, resize it. 362 | if m.shape[0] != height or m.shape[1] != width: 363 | m = np.ones([height, width], dtype=bool) 364 | instance_masks.append(m) 365 | class_ids.append(class_id) 366 | return instance_masks, class_ids 367 | 368 | def get_anno(self, meta_data, instance_info): 369 | """ 370 | get meta information 371 | """ 372 | anno = dict() 373 | 374 | # (b) objpos_x (float), objpos_y (float) 375 | anno['objpos'] = np.array(meta_data['objpos']) 376 | anno['scale_provided'] = meta_data['scale_provided'] 377 | 378 | anno['instance_mask_list'], anno['instance_cls_list'] = self.get_instance_mask(instance_info) 379 | 380 | return anno 381 | 382 | def get_ground_truth(self, meta, instance_info): 383 | extracted_bbox = [] 384 | 385 | for m_idx, m in enumerate(meta['instance_mask_list']): 386 | if meta['instance_cls_list'][m_idx] == -1: # is_crowd = 1 387 | if instance_info['anns'][m_idx]['iscrowd'] != 1: 388 | print('is_crowd error') 389 | continue 390 | horizontal_indicies = np.where(np.any(m, axis=0))[0] 391 | vertical_indicies = np.where(np.any(m, axis=1))[0] 392 | if horizontal_indicies.shape[0]: 393 | x1, x2 = horizontal_indicies[[0, -1]] 394 | y1, y2 = vertical_indicies[[0, -1]] 395 | # x2 and y2 should not be part of the box. Increment by 1. 396 | x2 += 1 397 | y2 += 1 398 | bbox_cls = 0 399 | else: 400 | # No mask for this instance. Might happen due to 401 | # resizing or cropping. Set bbox to zeros 402 | x1, x2, y1, y2, bbox_cls = -1, -1, -1, -1, -1 403 | extracted_bbox.append([x1, y1, x2, y2, bbox_cls]) 404 | 405 | return extracted_bbox 406 | 407 | def __getitem__(self, index): 408 | img = cv2.imread(self.image_path_list[index]) 409 | 410 | idx = self.index_list[index] 411 | meta_data = self.get_anno(self.data[idx], self.instance_info_list[index]) 412 | 413 | augmentations = [ 414 | partial(aug_meth, params_transform=params_transform) 415 | for aug_meth in [ 416 | aug_scale_bbox, 417 | aug_rotate_bbox, 418 | aug_croppad_bbox, 419 | aug_flip_bbox 420 | ] 421 | ] 422 | 423 | meta_data, img = reduce( 424 | lambda md_i_mm_ma, f: f(*md_i_mm_ma), 425 | augmentations, 426 | (meta_data, img) 427 | ) 428 | 429 | extracted_bbox = self.get_ground_truth(meta_data, self.instance_info_list[index]) 430 | 431 | # image preprocessing, which comply the model 432 | # trianed on Imagenet dataset 433 | if self.preprocess == 'resnet': 434 | img = resnet_preprocess(img) 435 | 436 | img = torch.from_numpy(img) 437 | bbox = torch.from_numpy(np.array(extracted_bbox).astype(np.float32)) 438 | 439 | return img, bbox 440 | 441 | def __len__(self): 442 | return self.numSample 443 | 444 | def bbox_collater(data): 445 | imgs = torch.stack([s[0] for s in data], 0) 446 | bbox = [s[1] for s in data] 447 | 448 | max_num_annots = max(bb.shape[0] for bb in bbox) 449 | 450 | bbox_padded = torch.ones((len(bbox), max_num_annots, 5)) * -1 451 | #print(annot_padded.shape) 452 | if max_num_annots > 0: 453 | for idx, bb in enumerate(bbox): 454 | #print(annot.shape) 455 | if bb.shape[0] > 0: 456 | bbox_padded[idx, :bb.shape[0], :] = bb 457 | 458 | return imgs, bbox_padded -------------------------------------------------------------------------------- /evaluate/tester.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | import cv2 5 | import math 6 | import datetime 7 | import numpy as np 8 | import json 9 | from collections import OrderedDict 10 | from network.joint_utils import get_joint_list, plot_result 11 | from tqdm import tqdm 12 | 13 | import torch 14 | import torch.nn as nn 15 | from lib.utils.log import logger 16 | import lib.utils.meter as meter_utils 17 | import network.net_utils as net_utils 18 | from lib.utils.timer import Timer 19 | from datasets.coco_data.preprocessing import resnet_preprocess 20 | from datasets.coco_data.prn_gaussian import gaussian, crop 21 | 22 | from pycocotools.coco import COCO 23 | from pycocotools.cocoeval import COCOeval 24 | 25 | 26 | def _factor_closest(num, factor, is_ceil=True): 27 | """Returns the closest integer to `num` that is divisible by `factor` 28 | 29 | Actually, that's a lie. By default, we return the closest factor _greater_ 30 | than the input. If, however, you set `it_ceil` to `False`, we return the 31 | closest factor _less_ than the input. 32 | """ 33 | num = float(num) / factor 34 | num = np.ceil(num) if is_ceil else np.floor(num) 35 | return int(num) * factor 36 | 37 | 38 | def crop_with_factor(im, dest_size, factor=32, pad_val=0, basedon='min'): 39 | """Scale and pad an image to the desired size and divisibility 40 | 41 | Scale the specified dimension of the input image to `dest_size` then pad 42 | the result until it is cleanly divisible by `factor`. 43 | 44 | Args: 45 | im (Image): The input image. 46 | dest_size (int): The desired size of the unpadded, scaled image's 47 | dimension specified by `basedon`. 48 | factor (int): Pad the scaled image until it is factorable 49 | pad_val (number): Value to pad with. 50 | basedon (string): Specifies which dimension to base the scaling on. 51 | Valid inputs are 'min', 'max', 'w', and 'h'. Defaults to 'min'. 52 | 53 | Returns: 54 | A tuple of three elements: 55 | - The scaled and padded image. 56 | - The scaling factor. 57 | - The size of the non-padded part of the resulting image. 58 | """ 59 | # Compute the scaling factor. 60 | im_size_min, im_size_max = np.min(im.shape[0:2]), np.max(im.shape[0:2]) 61 | im_base = {'min': im_size_min, 62 | 'max': im_size_max, 63 | 'w': im.shape[1], 64 | 'h': im.shape[0]} 65 | im_scale = float(dest_size) / im_base.get(basedon, im_size_min) 66 | 67 | # Scale the image. 68 | im = cv2.resize(im, None, fx=im_scale, fy=im_scale) 69 | 70 | # Compute the padded image shape. Ensure it's divisible by factor. 71 | h, w = im.shape[:2] 72 | new_h, new_w = _factor_closest(h, factor), _factor_closest(w, factor) 73 | # new_ = max(new_h, new_w) 74 | new_shape = [new_h, new_w] if im.ndim < 3 else [new_h, new_w, im.shape[-1]] 75 | # new_shape = [new_, new_] if im.ndim < 3 else [new_, new_, im.shape[-1]] 76 | 77 | # Pad the image. 78 | im_padded = np.full(new_shape, fill_value=pad_val, dtype=im.dtype) 79 | im_padded[0:h, 0:w] = im 80 | 81 | return im_padded, im_scale, im.shape 82 | 83 | 84 | class TestParams(object): 85 | 86 | trunk = 'resnet101' # select the model 87 | coeff = 2 88 | in_thres = 0.21 89 | 90 | testdata_dir = './demo/test_images/' 91 | testresult_dir = './demo/output/' 92 | testresult_write_image = False # write image results or not 93 | testresult_write_json = False # write json results or not 94 | gpus = [0] 95 | ckpt = './demo/models/ckpt_baseline_resnet101.h5' # checkpoint file to load, no need to change this 96 | coco_root = 'coco_root/' 97 | coco_result_filename = './extra/multipose_coco2017_results.json' 98 | 99 | # # required params 100 | inp_size = 480 # input size 480*480 101 | exp_name = 'multipose101' 102 | subnet_name = 'keypoint_subnet' 103 | batch_size = 32 104 | print_freq = 20 105 | 106 | class Tester(object): 107 | 108 | TestParams = TestParams 109 | 110 | def __init__(self, model, train_params, batch_processor=None, val_data=None): 111 | assert isinstance(train_params, TestParams) 112 | self.params = train_params 113 | self.batch_timer = Timer() 114 | self.data_timer = Timer() 115 | self.val_data = val_data if val_data else None 116 | self.batch_processor = batch_processor if batch_processor else None 117 | 118 | # load model 119 | self.model = model 120 | ckpt = self.params.ckpt 121 | 122 | if ckpt is not None: 123 | self._load_ckpt(ckpt) 124 | logger.info('Load ckpt from {}'.format(ckpt)) 125 | 126 | self.model = nn.DataParallel(self.model, device_ids=self.params.gpus) 127 | self.model = self.model.cuda(device=self.params.gpus[0]) 128 | self.model.eval() 129 | self.model.module.freeze_bn() 130 | 131 | def coco_eval(self): 132 | 133 | coco_val = os.path.join(self.params.coco_root, 'annotations/person_keypoints_val2017.json') 134 | coco = COCO(coco_val) 135 | img_ids = coco.getImgIds(catIds=[1]) 136 | 137 | multipose_results = [] 138 | coco_order = [0, 14, 13, 16, 15, 4, 1, 5, 2, 6, 3, 10, 7, 11, 8, 12, 9] 139 | 140 | for img_id in tqdm(img_ids): 141 | 142 | img_name = coco.loadImgs(img_id)[0]['file_name'] 143 | 144 | oriImg = cv2.imread(os.path.join(self.params.coco_root, 'images/val2017/', img_name)).astype(np.float32) 145 | multiplier = self._get_multiplier(oriImg) 146 | 147 | # Get results of original image 148 | orig_heat, orig_bbox_all = self._get_outputs(multiplier, oriImg) 149 | 150 | # Get results of flipped image 151 | swapped_img = oriImg[:, ::-1, :] 152 | flipped_heat, flipped_bbox_all = self._get_outputs(multiplier, swapped_img) 153 | 154 | # compute averaged heatmap 155 | heatmaps = self._handle_heat(orig_heat, flipped_heat) 156 | 157 | # segment_map = heatmaps[:, :, 17] 158 | param = {'thre1': 0.1, 'thre2': 0.05, 'thre3': 0.5} 159 | joint_list = get_joint_list(oriImg, param, heatmaps[:, :, :18], 1) 160 | joint_list = joint_list.tolist() 161 | 162 | joints = [] 163 | for joint in joint_list: 164 | if int(joint[-1]) != 1: 165 | joint[-1] = max(0, int(joint[-1]) - 1) 166 | joints.append(joint) 167 | joint_list = joints 168 | 169 | prn_result = self.prn_process(joint_list, orig_bbox_all[1], img_name, img_id) 170 | for result in prn_result: 171 | keypoints = result['keypoints'] 172 | coco_keypoint = [] 173 | for i in range(17): 174 | coco_keypoint.append(keypoints[coco_order[i] * 3]) 175 | coco_keypoint.append(keypoints[coco_order[i] * 3 + 1]) 176 | coco_keypoint.append(keypoints[coco_order[i] * 3 + 2]) 177 | result['keypoints'] = coco_keypoint 178 | multipose_results.append(result) 179 | 180 | ann_filename = self.params.coco_result_filename 181 | with open(ann_filename, "w") as f: 182 | json.dump(multipose_results, f, indent=4) 183 | # load results in COCO evaluation tool 184 | coco_pred = coco.loadRes(ann_filename) 185 | # run COCO evaluation 186 | coco_eval = COCOeval(coco, coco_pred, 'keypoints') 187 | coco_eval.params.imgIds = img_ids 188 | coco_eval.evaluate() 189 | coco_eval.accumulate() 190 | coco_eval.summarize() 191 | 192 | if not self.params.testresult_write_json: 193 | os.remove(ann_filename) 194 | 195 | def test(self): 196 | 197 | img_list = os.listdir(self.params.testdata_dir) 198 | multipose_results = [] 199 | 200 | for img_name in tqdm(img_list): 201 | 202 | img = cv2.imread(os.path.join(self.params.testdata_dir, img_name)).astype(np.float32) 203 | shape_dst = np.max(img.shape) 204 | scale = float(shape_dst) / self.params.inp_size 205 | pad_size = np.abs(img.shape[1] - img.shape[0]) 206 | img_resized = np.pad(img, ([0, pad_size], [0, pad_size], [0, 0]), 'constant')[:shape_dst, :shape_dst] 207 | img_resized = cv2.resize(img_resized, (self.params.inp_size, self.params.inp_size)) 208 | img_input = resnet_preprocess(img_resized) 209 | img_input = torch.from_numpy(np.expand_dims(img_input, 0)) 210 | 211 | with torch.no_grad(): 212 | img_input = img_input.cuda(device=self.params.gpus[0]) 213 | 214 | heatmaps, [scores, classification, transformed_anchors] = self.model([img_input, self.params.subnet_name]) 215 | heatmaps = heatmaps.cpu().detach().numpy() 216 | heatmaps = np.squeeze(heatmaps, 0) 217 | heatmaps = np.transpose(heatmaps, (1, 2, 0)) 218 | heatmap_max = np.max(heatmaps[:, :, :18], 2) 219 | # segment_map = heatmaps[:, :, 17] 220 | param = {'thre1': 0.1, 'thre2': 0.05, 'thre3': 0.5} 221 | joint_list = get_joint_list(img_resized, param, heatmaps[:, :, :18], scale) 222 | joint_list = joint_list.tolist() 223 | del img_resized 224 | 225 | joints = [] 226 | for joint in joint_list: 227 | if int(joint[-1]) != 1: 228 | joint[-1] = max(0, int(joint[-1]) - 1) 229 | joints.append(joint) 230 | joint_list = joints 231 | 232 | # bounding box from retinanet 233 | scores = scores.cpu().detach().numpy() 234 | classification = classification.cpu().detach().numpy() 235 | transformed_anchors = transformed_anchors.cpu().detach().numpy() 236 | idxs = np.where(scores > 0.5) 237 | bboxs=[] 238 | for j in range(idxs[0].shape[0]): 239 | bbox = transformed_anchors[idxs[0][j], :]*scale 240 | if int(classification[idxs[0][j]]) == 0: # class0=people 241 | bboxs.append(bbox.tolist()) 242 | 243 | prn_result = self.prn_process(joint_list, bboxs, img_name) 244 | for result in prn_result: 245 | multipose_results.append(result) 246 | 247 | if self.params.testresult_write_image: 248 | canvas = plot_result(img, prn_result) 249 | cv2.imwrite(os.path.join(self.params.testresult_dir, img_name.split('.', 1)[0] + '_1heatmap.png'), heatmap_max * 256) 250 | cv2.imwrite(os.path.join(self.params.testresult_dir, img_name.split('.', 1)[0] + '_2canvas.png'), canvas) 251 | 252 | if self.params.testresult_write_json: 253 | with open(self.params.testresult_dir+'multipose_results.json', "w") as f: 254 | json.dump(multipose_results, f) 255 | 256 | def _get_multiplier(self, img): 257 | """Computes the sizes of image at different scales 258 | :param img: numpy array, the current image 259 | :returns : list of float. The computed scales 260 | """ 261 | scale_search = [0.5, 1., 1.5, 2, 2.5] 262 | return [x * self.params.inp_size / float(img.shape[0]) for x in scale_search] 263 | 264 | def _get_outputs(self, multiplier, img): 265 | """Computes the averaged heatmap and paf for the given image 266 | :param multiplier: 267 | :param origImg: numpy array, the image being processed 268 | :param model: pytorch model 269 | :returns: numpy arrays, the averaged paf and heatmap 270 | """ 271 | 272 | heatmap_avg = np.zeros((img.shape[0], img.shape[1], 18)) 273 | bbox_all = [] 274 | # max_scale = multiplier[-1] 275 | # max_size = max_scale * img.shape[0] 276 | # # padding 277 | # max_cropped, _, _ = crop_with_factor( 278 | # img, max_size, factor=32) 279 | 280 | for m in range(len(multiplier)): 281 | scale = multiplier[m] 282 | inp_size = scale * img.shape[0] 283 | 284 | # padding 285 | im_cropped, im_scale, real_shape = crop_with_factor( 286 | img, inp_size, factor=32, pad_val=128) 287 | im_data = resnet_preprocess(im_cropped) 288 | 289 | im_data = np.expand_dims(im_data, 0) 290 | with torch.no_grad(): 291 | im_data = torch.from_numpy(im_data).type(torch.FloatTensor).cuda(device=self.params.gpus[0]) 292 | 293 | heatmaps, [scores, classification, transformed_anchors] = self.model([im_data, self.params.subnet_name]) 294 | heatmaps = heatmaps.cpu().detach().numpy().transpose(0, 2, 3, 1) 295 | scores = scores.cpu().detach().numpy() 296 | classification = classification.cpu().detach().numpy() 297 | transformed_anchors = transformed_anchors.cpu().detach().numpy() 298 | 299 | heatmap = heatmaps[0, :int(im_cropped.shape[0] / 4), :int(im_cropped.shape[1] / 4), :] 300 | heatmap = cv2.resize(heatmap, None, fx=4, fy=4, interpolation=cv2.INTER_CUBIC) 301 | heatmap = heatmap[0:real_shape[0], 0:real_shape[1], :] 302 | heatmap = cv2.resize( 303 | heatmap, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) 304 | 305 | heatmap_avg = heatmap_avg + heatmap / len(multiplier) 306 | 307 | # bboxs 308 | idxs = np.where(scores > 0.5) 309 | bboxs=[] 310 | for j in range(idxs[0].shape[0]): 311 | bbox = transformed_anchors[idxs[0][j], :]/im_scale 312 | if int(classification[idxs[0][j]]) == 0: # class0=people 313 | bboxs.append(bbox.tolist()) 314 | bbox_all.append(bboxs) 315 | 316 | return heatmap_avg, bbox_all 317 | 318 | def _handle_heat(self, normal_heat, flipped_heat): 319 | """Compute the average of normal and flipped heatmap 320 | :param normal_heat: numpy array, the normal heatmap 321 | :param flipped_heat: numpy array, the flipped heatmap 322 | :returns: numpy arrays, the averaged heatmap 323 | """ 324 | 325 | # The order to swap left and right of heatmap 326 | swap_heat = np.array((0, 1, 5, 6, 7, 2, 3, 4, 11, 12, 327 | 13, 8, 9, 10, 15, 14, 17, 16))#, 18 328 | 329 | averaged_heatmap = (normal_heat + flipped_heat[:, ::-1, :][:, :, swap_heat]) / 2. 330 | 331 | return averaged_heatmap 332 | 333 | def prn_process(self, kps, bbox_list, file_name, image_id=0): 334 | 335 | prn_result = [] 336 | 337 | idx = 0 338 | ks = [] 339 | for j in range(17): # joint type 340 | t = [] 341 | for k in kps: 342 | if k[-1] == j: # joint type 343 | x = k[0] 344 | y = k[1] 345 | v = 1 # k[2] 346 | if v > 0: 347 | t.append([x, y, 1, idx]) 348 | idx += 1 349 | ks.append(t) 350 | peaks = ks 351 | 352 | w = int(18 * self.params.coeff) 353 | h = int(28 * self.params.coeff) 354 | 355 | bboxes = [] 356 | for bbox_item in bbox_list: 357 | bboxes.append([bbox_item[0], bbox_item[1], bbox_item[2]-bbox_item[0], bbox_item[3]-bbox_item[1]]) 358 | 359 | if len(bboxes) == 0 or len(peaks) == 0: 360 | return prn_result 361 | 362 | weights_bbox = np.zeros((len(bboxes), h, w, 4, 17)) 363 | 364 | for joint_id, peak in enumerate(peaks): # joint_id: which joint 365 | for instance_id, instance in enumerate(peak): # instance_id: which people 366 | p_x = instance[0] 367 | p_y = instance[1] 368 | for bbox_id, b in enumerate(bboxes): 369 | is_inside = p_x > b[0] - b[2] * self.params.in_thres and \ 370 | p_y > b[1] - b[3] * self.params.in_thres and \ 371 | p_x < b[0] + b[2] * (1.0 + self.params.in_thres) and \ 372 | p_y < b[1] + b[3] * (1.0 + self.params.in_thres) 373 | if is_inside: 374 | x_scale = float(w) / math.ceil(b[2]) 375 | y_scale = float(h) / math.ceil(b[3]) 376 | x0 = int((p_x - b[0]) * x_scale) 377 | y0 = int((p_y - b[1]) * y_scale) 378 | if x0 >= w and y0 >= h: 379 | x0 = w - 1 380 | y0 = h - 1 381 | elif x0 >= w: 382 | x0 = w - 1 383 | elif y0 >= h: 384 | y0 = h - 1 385 | elif x0 < 0 and y0 < 0: 386 | x0 = 0 387 | y0 = 0 388 | elif x0 < 0: 389 | x0 = 0 390 | elif y0 < 0: 391 | y0 = 0 392 | p = 1e-9 393 | weights_bbox[bbox_id, y0, x0, :, joint_id] = [1, instance[2], instance[3], p] 394 | old_weights_bbox = np.copy(weights_bbox) 395 | 396 | for j in range(weights_bbox.shape[0]): 397 | for t in range(17): 398 | weights_bbox[j, :, :, 0, t] = gaussian(weights_bbox[j, :, :, 0, t]) 399 | 400 | output_bbox = [] 401 | for j in range(weights_bbox.shape[0]): 402 | inp = weights_bbox[j, :, :, 0, :] 403 | input = torch.from_numpy(np.expand_dims(inp, axis=0)).cuda().float() 404 | output, _ = self.model([input, 'prn_subnet']) 405 | temp = np.reshape(output.data.cpu().numpy(), (56, 36, 17)) 406 | output_bbox.append(temp) 407 | 408 | output_bbox = np.array(output_bbox) 409 | 410 | keypoints_score = [] 411 | 412 | for t in range(17): 413 | indexes = np.argwhere(old_weights_bbox[:, :, :, 0, t] == 1) 414 | keypoint = [] 415 | for i in indexes: 416 | cr = crop(output_bbox[i[0], :, :, t], (i[1], i[2]), N=15) 417 | score = np.sum(cr) 418 | 419 | kp_id = old_weights_bbox[i[0], i[1], i[2], 2, t] 420 | kp_score = old_weights_bbox[i[0], i[1], i[2], 1, t] 421 | p_score = old_weights_bbox[i[0], i[1], i[2], 3, t] ## ?? 422 | bbox_id = i[0] 423 | 424 | score = kp_score * score 425 | 426 | s = [kp_id, bbox_id, kp_score, score] 427 | 428 | keypoint.append(s) 429 | keypoints_score.append(keypoint) 430 | 431 | bbox_keypoints = np.zeros((weights_bbox.shape[0], 17, 3)) 432 | bbox_ids = np.arange(len(bboxes)).tolist() 433 | 434 | # kp_id, bbox_id, kp_score, my_score 435 | for i in range(17): 436 | joint_keypoints = keypoints_score[i] 437 | if len(joint_keypoints) > 0: # if have output result in one type keypoint 438 | 439 | kp_ids = list(set([x[0] for x in joint_keypoints])) 440 | 441 | table = np.zeros((len(bbox_ids), len(kp_ids), 4)) 442 | 443 | for b_id, bbox in enumerate(bbox_ids): 444 | for k_id, kp in enumerate(kp_ids): 445 | own = [x for x in joint_keypoints if x[0] == kp and x[1] == bbox] 446 | 447 | if len(own) > 0: 448 | table[bbox, k_id] = own[0] 449 | else: 450 | table[bbox, k_id] = [0] * 4 451 | 452 | for b_id, bbox in enumerate(bbox_ids): # all bbx, from 0 to ... 453 | 454 | row = np.argsort(-table[bbox, :, 3]) # in bbx(bbox), sort from big to small, keypoint score 455 | 456 | if table[bbox, row[0], 3] > 0: # score 457 | for r in row: # all keypoints 458 | if table[bbox, r, 3] > 0: 459 | column = np.argsort( 460 | -table[:, r, 3]) # sort all keypoints r, from big to small, bbx score 461 | 462 | if bbox == column[0]: # best bbx. best keypoint 463 | bbox_keypoints[bbox, i, :] = [x[:3] for x in peaks[i] if x[3] == table[bbox, r, 0]][ 464 | 0] 465 | break 466 | else: # for bbx column[0], the worst keypoint is row2[0], 467 | row2 = np.argsort(table[column[0], :, 3]) 468 | if row2[0] == r: 469 | bbox_keypoints[bbox, i, :] = \ 470 | [x[:3] for x in peaks[i] if x[3] == table[bbox, r, 0]][0] 471 | break 472 | else: # len(joint_keypoints) == 0: 473 | for j in range(weights_bbox.shape[0]): 474 | b = bboxes[j] 475 | x_scale = float(w) / math.ceil(b[2]) 476 | y_scale = float(h) / math.ceil(b[3]) 477 | 478 | for t in range(17): 479 | indexes = np.argwhere(old_weights_bbox[j, :, :, 0, t] == 1) 480 | if len(indexes) == 0: 481 | max_index = np.argwhere(output_bbox[j, :, :, t] == np.max(output_bbox[j, :, :, t])) 482 | bbox_keypoints[j, t, :] = [max_index[0][1] / x_scale + b[0], 483 | max_index[0][0] / y_scale + b[1], 0] 484 | 485 | my_keypoints = [] 486 | 487 | for i in range(bbox_keypoints.shape[0]): 488 | k = np.zeros(51) 489 | k[0::3] = bbox_keypoints[i, :, 0] 490 | k[1::3] = bbox_keypoints[i, :, 1] 491 | k[2::3] = bbox_keypoints[i, :, 2] 492 | 493 | pose_score = 0 494 | count = 0 495 | for f in range(17): 496 | if bbox_keypoints[i, f, 0] != 0 and bbox_keypoints[i, f, 1] != 0: 497 | count += 1 498 | pose_score += bbox_keypoints[i, f, 2] 499 | pose_score /= 17.0 500 | 501 | my_keypoints.append(k) 502 | 503 | image_data = { 504 | 'image_id': image_id, 505 | 'file_name': file_name, 506 | 'category_id': 1, 507 | 'bbox': bboxes[i], 508 | 'score': pose_score, 509 | 'keypoints': k.tolist() 510 | } 511 | prn_result.append(image_data) 512 | 513 | return prn_result 514 | 515 | def val(self): 516 | self.model.eval() 517 | logs = OrderedDict() 518 | sum_loss = meter_utils.AverageValueMeter() 519 | logger.info('Val on validation set...') 520 | 521 | self.batch_timer.clear() 522 | self.data_timer.clear() 523 | self.batch_timer.tic() 524 | self.data_timer.tic() 525 | for step, batch in enumerate(self.val_data): 526 | self.data_timer.toc() 527 | 528 | inputs, gts, _ = self.batch_processor(self, batch) 529 | _, saved_for_loss = self.model(*inputs) 530 | self.batch_timer.toc() 531 | 532 | loss, saved_for_log = self.model.module.build_loss(saved_for_loss, *gts) 533 | sum_loss.add(loss.item()) 534 | self._process_log(saved_for_log, logs) 535 | 536 | if step % self.params.print_freq == 0: 537 | self._print_log(step, logs, 'Validation', max_n_batch=len(self.val_data)) 538 | 539 | self.data_timer.tic() 540 | self.batch_timer.tic() 541 | 542 | mean, std = sum_loss.value() 543 | logger.info('\n\nValidation loss: mean: {}, std: {}'.format(mean, std)) 544 | 545 | def _load_ckpt(self, ckpt): 546 | _, _ = net_utils.load_net(ckpt, self.model, load_state_dict=True) 547 | 548 | def _process_log(self, src_dict, dest_dict): 549 | for k, v in src_dict.items(): 550 | if isinstance(v, (int, float)): 551 | dest_dict.setdefault(k, meter_utils.AverageValueMeter()) 552 | dest_dict[k].add(float(v)) 553 | else: 554 | dest_dict[k] = v 555 | 556 | def _print_log(self, step, log_values, title='', max_n_batch=None): 557 | log_str = '{}\n'.format(self.params.exp_name) 558 | log_str += '{}: epoch {}'.format(title, 0) 559 | 560 | log_str += '[{}/{}]'.format(step, max_n_batch) 561 | 562 | i = 0 563 | for k, v in log_values.items(): 564 | if isinstance(v, meter_utils.AverageValueMeter): 565 | mean, std = v.value() 566 | log_str += '\n\t{}: {:.10f}'.format(k, mean) 567 | i += 1 568 | 569 | if max_n_batch: 570 | # print time 571 | data_time = self.data_timer.duration + 1e-6 572 | batch_time = self.batch_timer.duration + 1e-6 573 | rest_seconds = int((max_n_batch - step) * batch_time) 574 | log_str += '\n\t({:.2f}/{:.2f}s,' \ 575 | ' fps:{:.1f}, rest: {})'.format(data_time, batch_time, 576 | self.params.batch_size / batch_time, 577 | str(datetime.timedelta(seconds=rest_seconds))) 578 | self.batch_timer.clear() 579 | self.data_timer.clear() 580 | 581 | logger.info(log_str) 582 | --------------------------------------------------------------------------------