├── models ├── ops │ ├── align2nat │ │ ├── __init__.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-37.pyc │ │ │ │ └── swap_align2nat.cpython-37.pyc │ │ │ └── swap_align2nat.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-37.pyc │ │ │ │ └── roi_align.cpython-35.pyc │ │ │ └── swap_align2nat.py │ │ ├── setup.py │ │ └── src │ │ │ ├── swap_align2nat_cuda.cpp │ │ │ └── swap_align2nat_kernel.cu │ └── sigmoid_focal_loss │ │ ├── functions │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ └── sigmoid_focal_loss.cpython-37.pyc │ │ └── sigmoid_focal_loss.py │ │ ├── modules │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ └── sigmoid_focal_loss.cpython-37.pyc │ │ └── sigmoid_focal_loss.py │ │ ├── __init__.py │ │ ├── setup.py │ │ └── src │ │ ├── sigmoid_focal_loss.cpp │ │ └── sigmoid_focal_loss_cuda.cu ├── tensormask.py ├── losses.py ├── res_fpn.py └── detector.py ├── img ├── test.png └── test_2.png ├── README.md ├── config.py ├── demo.py ├── eval.py ├── exp └── coco_person │ ├── logs_2019-12-07-23-55 │ ├── log.txt │ └── opt.txt │ └── opt.txt ├── train.py ├── show_pred_window.py └── lib ├── trainer.py ├── optimer.py ├── coco.py └── utils.py /models/ops/align2nat/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /models/ops/align2nat/functions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/ops/align2nat/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/ops/sigmoid_focal_loss/functions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/ops/sigmoid_focal_loss/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /img/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/img/test.png -------------------------------------------------------------------------------- /img/test_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/img/test_2.png -------------------------------------------------------------------------------- /models/ops/align2nat/modules/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/align2nat/modules/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /models/ops/align2nat/modules/__pycache__/roi_align.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/align2nat/modules/__pycache__/roi_align.cpython-35.pyc -------------------------------------------------------------------------------- /models/ops/align2nat/functions/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/align2nat/functions/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /models/ops/sigmoid_focal_loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .modules.sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 2 | 3 | __all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss'] 4 | -------------------------------------------------------------------------------- /models/ops/align2nat/functions/__pycache__/swap_align2nat.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/align2nat/functions/__pycache__/swap_align2nat.cpython-37.pyc -------------------------------------------------------------------------------- /models/ops/sigmoid_focal_loss/functions/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/sigmoid_focal_loss/functions/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /models/ops/sigmoid_focal_loss/modules/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/sigmoid_focal_loss/modules/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /models/ops/sigmoid_focal_loss/functions/__pycache__/sigmoid_focal_loss.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/sigmoid_focal_loss/functions/__pycache__/sigmoid_focal_loss.cpython-37.pyc -------------------------------------------------------------------------------- /models/ops/sigmoid_focal_loss/modules/__pycache__/sigmoid_focal_loss.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/sigmoid_focal_loss/modules/__pycache__/sigmoid_focal_loss.cpython-37.pyc -------------------------------------------------------------------------------- /models/ops/align2nat/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='align2nat_cuda', 6 | ext_modules=[ 7 | CUDAExtension('swap_align2nat_cuda', [ 8 | 'src/swap_align2nat_cuda.cpp', 9 | 'src/swap_align2nat_kernel.cu', 10 | ]), 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /models/ops/sigmoid_focal_loss/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='SigmoidFocalLoss', 6 | ext_modules=[ 7 | CUDAExtension('sigmoid_focal_loss_cuda', [ 8 | 'src/sigmoid_focal_loss.cpp', 9 | 'src/sigmoid_focal_loss_cuda.cu', 10 | ]), 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /models/ops/align2nat/modules/swap_align2nat.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.swap_align2nat import SwapAlign2NatFunction 3 | 4 | 5 | class SwapAlign2Nat(Module): 6 | def __init__(self, alpha=1 ,lamda = 1, pad_val = -9.0 ,align_corners=True ): 7 | super(SwapAlign2Nat, self).__init__() 8 | self.alpha = alpha 9 | self.lamda = lamda 10 | self.align_corners = align_corners 11 | self.pad_val = pad_val 12 | 13 | def forward(self, features): 14 | return SwapAlign2NatFunction.apply(features , self.alpha,self.lamda,self.pad_val,self.align_corners) 15 | -------------------------------------------------------------------------------- /models/ops/sigmoid_focal_loss/modules/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from ..functions.sigmoid_focal_loss import sigmoid_focal_loss 4 | 5 | 6 | # TODO: remove this module 7 | class SigmoidFocalLoss(nn.Module): 8 | 9 | def __init__(self, gamma, alpha): 10 | super(SigmoidFocalLoss, self).__init__() 11 | self.gamma = gamma 12 | self.alpha = alpha 13 | 14 | def forward(self, logits, targets): 15 | assert logits.is_cuda 16 | loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha) 17 | return loss.sum() 18 | 19 | def __repr__(self): 20 | tmpstr = self.__class__.__name__ + "(" 21 | tmpstr += "gamma=" + str(self.gamma) 22 | tmpstr += ", alpha=" + str(self.alpha) 23 | tmpstr += ")" 24 | return tmpstr 25 | -------------------------------------------------------------------------------- /models/ops/align2nat/functions/swap_align2nat.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | 3 | from .. import swap_align2nat_cuda 4 | 5 | class SwapAlign2NatFunction(Function): 6 | 7 | @staticmethod 8 | def forward(ctx, features,alpha,lamda,pad_val,align_corners): 9 | ctx.feature_size = features.size() 10 | ctx.alpha = alpha 11 | ctx.lamda = lamda 12 | ctx.align_corners = align_corners 13 | if features.is_cuda: 14 | output=swap_align2nat_cuda.forward(features,alpha,lamda,align_corners,pad_val) 15 | else: 16 | raise NotImplementedError 17 | return output 18 | 19 | @staticmethod 20 | def backward(ctx, grad_output): 21 | 22 | feature_size = ctx.feature_size 23 | alpha = ctx.alpha 24 | lamda = ctx.lamda 25 | align_corners = ctx.align_corners 26 | assert (feature_size is not None and grad_output.is_cuda) 27 | grad_input = swap_align2nat_cuda.backward(grad_output.contiguous(),alpha,lamda,align_corners) 28 | return grad_input,None,None,None,None 29 | 30 | swap_align2nat = SwapAlign2NatFunction.apply -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TensorMask 2 | ### The code is unofficial version for [TensorMask: A Foundation for Dense Object Segmentation](https://arxiv.org/abs/1903.12174). 3 | 4 | * ![image](img/test.png) 5 | * ![image](img/test_2.png) 6 | 7 | input_size = (640x512), no multi-scale training, no multi-scale test 8 | 9 | |class|AP|AP50|AP75|APS|APM|APL| 10 | |---|---|---|---|---|---|---| 11 | |person box|0.481|0.752|0.503|0.256|0.559|0.704| 12 | |person mask|0.395|0.721|0.392|0.184|0.454|0.614| 13 | 14 | ### Reuirments 15 | 1. python==3.7 16 | 1. pytorch==1.0.0 17 | 2. torchvision==0.4.2 18 | 3. opencv-python,pycocotools,progress,numpy,easydict 19 | 20 | ### Installation 21 | ```bash 22 | git clone https://github.com/CaoWGG/TensorMask.git 23 | cd TensorMask/models/ops/align2nat 24 | python setup.py build_ext --inplace 25 | cd TensorMask/models/ops/sigmoid_focal_loss 26 | python setup.py build_ext --inplace 27 | ``` 28 | 29 | ### Training 30 | ```bash 31 | ## dataset coco2017 32 | ## modify config.py : cfg.class_name and cfg.num_class 33 | python train.py 34 | ``` 35 | 36 | ### Show result 37 | ```bash 38 | python demo.py 39 | ## show window 40 | python show_pred_window.py 41 | ``` 42 | -------------------------------------------------------------------------------- /models/ops/sigmoid_focal_loss/functions/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | from torch.autograd.function import once_differentiable 3 | 4 | from .. import sigmoid_focal_loss_cuda 5 | 6 | 7 | class SigmoidFocalLossFunction(Function): 8 | 9 | @staticmethod 10 | def forward(ctx, input, target, gamma=2.0, alpha=0.25): 11 | ctx.save_for_backward(input, target) 12 | num_classes = input.shape[1] 13 | ctx.num_classes = num_classes 14 | ctx.gamma = gamma 15 | ctx.alpha = alpha 16 | 17 | loss = sigmoid_focal_loss_cuda.forward(input, target, num_classes, 18 | gamma, alpha) 19 | return loss 20 | 21 | @staticmethod 22 | @once_differentiable 23 | def backward(ctx, d_loss): 24 | input, target = ctx.saved_tensors 25 | num_classes = ctx.num_classes 26 | gamma = ctx.gamma 27 | alpha = ctx.alpha 28 | d_loss = d_loss.contiguous() 29 | d_input = sigmoid_focal_loss_cuda.backward(input, target, d_loss, 30 | num_classes, gamma, alpha) 31 | return d_input, None, None, None, None 32 | 33 | 34 | sigmoid_focal_loss = SigmoidFocalLossFunction.apply 35 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict 2 | 3 | cfg = EasyDict() 4 | 5 | cfg.backbone = 'resnet50' 6 | cfg.frezeBN = False 7 | cfg.frezeLayer = False 8 | cfg.align_corners = False ## ref torch.nn.functional.interpolate /// when align_corners==False : [Follow Opencv resize logic] 9 | cfg.weights = '' 10 | cfg.resume = True 11 | cfg.device = 'cuda' 12 | 13 | cfg.cls_weights = 1. 14 | cfg.xywh_weights = 1. 15 | cfg.mask_weights = 2. ## from paper 16 | 17 | cfg.data_dir = '/data0/cao/DataSet/coco' 18 | cfg.num_class = 1 19 | cfg.class_name = 'person' ## [person , *] 20 | cfg.input_h = 512 ## 512 % 128 = 0 21 | cfg.input_w = 640 ## 640 % 128 = 0 22 | cfg.base_window = 8 ## base_window%2==0 in this impl..|| max window = 12* base_stride * 2^5 / 2 = 768 > 640 23 | cfg.base_stride = 4 ## feat_2 --> strideHW=4 24 | cfg.k = 5 # 0 1 2 3 4 5 25 | cfg.max_objs = 45 26 | cfg.jitter = 0.3 27 | 28 | 29 | cfg.lr = 0.02 ## from paper 30 | cfg.num_epochs = 72 ## from paper 31 | cfg.lr_step = [64,70] ## from paper 32 | cfg.warm_up = 1000 33 | cfg.batch_size = 6 34 | 35 | cfg.gpus_str = '0,1,2,3' 36 | 37 | cfg.save_dir = 'exp' 38 | cfg.exp_id = 'coco_person_8' 39 | cfg.print_iter = 1 40 | cfg.test = False 41 | cfg.vis_thresh = 0.3 42 | cfg.show_box = True 43 | cfg.demo = '' 44 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | from config import cfg as opt 4 | from models.detector import Detector 5 | os.environ['CUDA_VISIBLE_DEVICES'] = '3' 6 | image_ext = ['jpg', 'jpeg', 'png', 'webp'] 7 | video_ext = ['mp4', 'mov', 'avi', 'mkv', 'h264'] 8 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge'] 9 | 10 | opt.demo = '/data/yoloCao/DataSet/coco/val2017' 11 | opt.weights = 'exp/coco_person/model_last.pth' 12 | opt.vis_trehs = 0.4 13 | detector = Detector(opt) 14 | cv2.namedWindow('result', cv2.WINDOW_NORMAL) 15 | cv2.resizeWindow('result', 1024, 768) 16 | if opt.demo == 'webcam' or \ 17 | opt.demo[opt.demo.rfind('.') + 1:].lower() in video_ext: 18 | cam = cv2.VideoCapture(0 if opt.demo == 'webcam' else opt.demo) 19 | 20 | while True: 21 | _, img = cam.read() 22 | ret = detector.run(img) 23 | if cv2.waitKey(1) == 27: 24 | break 25 | else: 26 | if os.path.isdir(opt.demo): 27 | image_names = [] 28 | ls = os.listdir(opt.demo) 29 | for file_name in sorted(ls): 30 | ext = file_name[file_name.rfind('.') + 1:].lower() 31 | if ext in image_ext: 32 | image_names.append(os.path.join(opt.demo, file_name)) 33 | elif opt.demo.endswith('.txt'): 34 | image_names = [] 35 | with open(opt.demo) as f: 36 | lines = f.readlines() 37 | for file_name in sorted(lines): 38 | file_name = file_name.strip() 39 | if file_name.split('.')[-1] in image_ext: 40 | image_names.append(file_name) 41 | else: 42 | image_names = [opt.demo] 43 | 44 | for (image_name) in image_names: 45 | ret = detector.run(image_name) 46 | if cv2.waitKey(0) == 27: 47 | break 48 | 49 | -------------------------------------------------------------------------------- /models/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h 3 | #include 4 | 5 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits, 6 | const at::Tensor &targets, 7 | const int num_classes, 8 | const float gamma, const float alpha); 9 | 10 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits, 11 | const at::Tensor &targets, 12 | const at::Tensor &d_losses, 13 | const int num_classes, 14 | const float gamma, const float alpha); 15 | 16 | // Interface for Python 17 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits, 18 | const at::Tensor &targets, 19 | const int num_classes, const float gamma, 20 | const float alpha) { 21 | if (logits.type().is_cuda()) { 22 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, 23 | alpha); 24 | } 25 | } 26 | 27 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits, 28 | const at::Tensor &targets, 29 | const at::Tensor &d_losses, 30 | const int num_classes, const float gamma, 31 | const float alpha) { 32 | if (logits.type().is_cuda()) { 33 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, 34 | num_classes, gamma, alpha); 35 | } 36 | } 37 | 38 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 39 | m.def("forward", &SigmoidFocalLoss_forward, 40 | "SigmoidFocalLoss forward (CUDA)"); 41 | m.def("backward", &SigmoidFocalLoss_backward, 42 | "SigmoidFocalLoss backward (CUDA)"); 43 | } 44 | -------------------------------------------------------------------------------- /models/ops/align2nat/src/swap_align2nat_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 7 | #define CHECK_CONTIGUOUS(x) \ 8 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 9 | #define CHECK_INPUT(x) \ 10 | CHECK_CUDA(x); \ 11 | CHECK_CONTIGUOUS(x) 12 | 13 | int SwapAlign2NatForwardLaucher(const at::Tensor& input,at::Tensor& output, 14 | const int alpha,const bool align_corners,const float pad_val); 15 | int SwapAlign2NatBackwardLaucher(const at::Tensor& grad_output,at::Tensor& grad_input, 16 | const int alpha,const bool align_corners); 17 | 18 | at::Tensor swap_align2nat_forward_cuda(const at::Tensor& input , const int alpha, const int lamda,const bool align_corners,const float pad_val) { 19 | CHECK_INPUT(input); 20 | int B = input.size(0); 21 | int V = input.size(1); 22 | int U = input.size(2); 23 | int H = input.size(3); 24 | int W = input.size(4); 25 | auto output = torch::zeros_like(input); 26 | output.resize_({B, lamda*V, lamda*U, H/lamda,W/lamda}); 27 | output.contiguous(); 28 | CHECK_INPUT(output); 29 | SwapAlign2NatForwardLaucher(input,output,alpha,align_corners,pad_val); 30 | return output; 31 | } 32 | 33 | at::Tensor swap_align2nat_backward_cuda(const at::Tensor& grad_output,const int alpha,const int lamda,const bool align_corners) { 34 | CHECK_INPUT(grad_output); 35 | int B = grad_output.size(0); 36 | int V = grad_output.size(1); 37 | int U = grad_output.size(2); 38 | int H = grad_output.size(3); 39 | int W = grad_output.size(4); 40 | auto grad_input = torch::zeros_like(grad_output); 41 | grad_input.resize_({B, V/lamda, U/lamda, H*lamda,W*lamda}); 42 | grad_input.contiguous(); 43 | CHECK_INPUT(grad_input); 44 | SwapAlign2NatBackwardLaucher(grad_output,grad_input,alpha,align_corners); 45 | return grad_input; 46 | } 47 | 48 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 49 | m.def("forward", &swap_align2nat_forward_cuda, "SwapAlign2Nat forward (CUDA)"); 50 | m.def("backward", &swap_align2nat_backward_cuda, "SwapAlign2Nat backward (CUDA)"); 51 | } 52 | -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | from models.detector import Detector 3 | from pycocotools.cocoeval import COCOeval 4 | import pycocotools.coco as coco 5 | import pycocotools.mask as mask_util 6 | import numpy as np 7 | from tqdm import tqdm 8 | from config import cfg as opt 9 | os.environ['CUDA_VISIBLE_DEVICES'] = '3' 10 | valid_ids = [ 11 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 12 | 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 13 | 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 14 | 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 15 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 16 | 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 17 | 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 18 | 82, 84, 85, 86, 87, 88, 89, 90] 19 | 20 | ## config recover weights 21 | opt.weights = 'exp/coco_person/model_last.pth' 22 | opt.vis_trehs = 0.01 23 | split = 'val' 24 | 25 | detector = Detector(opt) 26 | data = coco.COCO(os.path.join( 27 | opt.data_dir, 'annotations', 28 | 'instances_{}2017.json').format(split)) 29 | 30 | if opt.class_name!='*' : ## for one class 31 | catIds = data.getCatIds(opt.class_name) 32 | imgIds = data.getImgIds(catIds=catIds) 33 | valid_ids = catIds 34 | 35 | detections = [] 36 | for img_id in tqdm(data.getImgIds()): 37 | img_name = os.path.join(os.path.join(opt.data_dir, '{}2017'.format(split)), 38 | data.loadImgs(ids=[img_id])[0]['file_name']).strip() 39 | boxs,masks = detector.run(img_name,vis=False) 40 | for i,det in enumerate(boxs): 41 | x, y, x1, y1, conf, cls = det[:6] 42 | detection = { 43 | "image_id": img_id, 44 | "category_id": int(valid_ids[int(cls)]), 45 | 'segmentation':mask_util.encode(np.asfortranarray(masks[i])), 46 | #"bbox": [x, y, x1 - x, y1 - y], 47 | "score": float("{:.2f}".format(conf)) 48 | } 49 | detections.append(detection) 50 | coco_dets = data.loadRes(detections) 51 | coco_eval = COCOeval(data, coco_dets, "segm") 52 | 53 | if opt.class_name!='*': ## for one class 54 | coco_eval.params.imgIds = imgIds 55 | coco_eval.params.catIds = catIds 56 | 57 | coco_eval.evaluate() 58 | coco_eval.accumulate() 59 | coco_eval.summarize() 60 | -------------------------------------------------------------------------------- /exp/coco_person/logs_2019-12-07-23-55/log.txt: -------------------------------------------------------------------------------- 1 | 2019-12-08-01-03: epoch: 1 |loss 0.718341 | cls_loss 0.319530 | diou_loss 0.313822 | mask_loss 0.160178 | time 67.066667 | loss 0.547814 | cls_loss 0.235685 | diou_loss 0.224505 | mask_loss 0.128002 | time 2.916667 | 2 | 2019-12-08-02-12: epoch: 2 |loss 0.500938 | cls_loss 0.221124 | diou_loss 0.216653 | mask_loss 0.112826 | time 66.216667 | loss 0.502653 | cls_loss 0.216273 | diou_loss 0.202636 | mask_loss 0.117861 | time 2.833333 | 3 | 2019-12-08-03-21: epoch: 3 |loss 0.469828 | cls_loss 0.208693 | diou_loss 0.200540 | mask_loss 0.105500 | time 66.383333 | loss 0.477843 | cls_loss 0.207729 | diou_loss 0.195091 | mask_loss 0.110670 | time 2.850000 | 4 | 2019-12-08-04-30: epoch: 4 |loss 0.452117 | cls_loss 0.200818 | diou_loss 0.191730 | mask_loss 0.101684 | time 66.300000 | loss 0.470088 | cls_loss 0.204463 | diou_loss 0.187136 | mask_loss 0.109420 | time 2.866667 | 5 | 2019-12-08-05-39: epoch: 5 |loss 0.442711 | cls_loss 0.196554 | diou_loss 0.186292 | mask_loss 0.099792 | time 66.033333 | loss 0.469164 | cls_loss 0.204524 | diou_loss 0.183815 | mask_loss 0.109343 | time 2.883333 | 6 | 2019-12-08-06-48: epoch: 6 |loss 0.432938 | cls_loss 0.192415 | diou_loss 0.181634 | mask_loss 0.097557 | time 66.116667 | loss 0.455528 | cls_loss 0.197939 | diou_loss 0.179313 | mask_loss 0.106380 | time 2.866667 | 7 | 2019-12-08-07-57: epoch: 7 |loss 0.427711 | cls_loss 0.190262 | diou_loss 0.178843 | mask_loss 0.096369 | time 66.183333 | loss 0.449601 | cls_loss 0.197045 | diou_loss 0.175069 | mask_loss 0.104394 | time 2.833333 | 8 | 2019-12-08-09-06: epoch: 8 |loss 0.421918 | cls_loss 0.187740 | diou_loss 0.176018 | mask_loss 0.095087 | time 66.100000 | loss 0.450542 | cls_loss 0.200763 | diou_loss 0.171751 | mask_loss 0.103420 | time 2.916667 | 9 | 2019-12-08-10-16: epoch: 9 |loss 0.418336 | cls_loss 0.185566 | diou_loss 0.174558 | mask_loss 0.094565 | time 66.550000 | loss 0.450484 | cls_loss 0.201971 | diou_loss 0.172454 | mask_loss 0.102700 | time 2.866667 | 10 | 2019-12-08-11-25: epoch: 10 |loss 0.413934 | cls_loss 0.184827 | diou_loss 0.172182 | mask_loss 0.093031 | time 66.266667 | loss 0.438343 | cls_loss 0.193945 | diou_loss 0.170348 | mask_loss 0.100905 | time 2.950000 | 11 | 2019-12-08-12-35: epoch: 11 |loss 0.412394 | cls_loss 0.183310 | diou_loss 0.170863 | mask_loss 0.093184 | time 66.466667 | loss 0.450937 | cls_loss 0.203606 | diou_loss 0.170497 | mask_loss 0.102353 | time 2.916667 | 12 | -------------------------------------------------------------------------------- /exp/coco_person/opt.txt: -------------------------------------------------------------------------------- 1 | ==> torch version: 1.0.0 2 | ==> cudnn version: 7401 3 | ==> Cmd: 4 | ['train.py'] 5 | ==> Opt: 6 | align_corners: False 7 | backbone: resnet50 8 | base_stride: 4 9 | base_window: 12 10 | batch_size: 18 11 | class_name: person 12 | clear: 13 | cls_weights: 1.0 14 | copy: 15 | data_dir: /data/yoloCao/DataSet/coco 16 | device: cuda 17 | exp_id: coco_person 18 | frezeBN: False 19 | frezeLayer: False 20 | fromkeys: 21 | get: 22 | gpus: [0, 1, 2] 23 | gpus_str: 0,1,2 24 | input_h: 512 25 | input_w: 640 26 | items: 27 | jitter: 0.3 28 | k: 5 29 | keys: 30 | lr: 0.02 31 | lr_step: [64, 70] 32 | mask_weights: 2.0 33 | num_class: 1 34 | num_epochs: 72 35 | pop: 36 | popitem: 37 | print_iter: 1 38 | save_dir: exp/coco_person 39 | setdefault: 40 | test: False 41 | update: 42 | values: 43 | warm_up: 1000 44 | weights: 45 | xywh_weights: 0.25 46 | -------------------------------------------------------------------------------- /exp/coco_person/logs_2019-12-07-23-55/opt.txt: -------------------------------------------------------------------------------- 1 | ==> torch version: 1.0.0 2 | ==> cudnn version: 7401 3 | ==> Cmd: 4 | ['train.py'] 5 | ==> Opt: 6 | align_corners: False 7 | backbone: resnet50 8 | base_stride: 4 9 | base_window: 12 10 | batch_size: 18 11 | class_name: person 12 | clear: 13 | cls_weights: 1.0 14 | copy: 15 | data_dir: /data/yoloCao/DataSet/coco 16 | device: cuda 17 | exp_id: coco_person 18 | frezeBN: False 19 | frezeLayer: False 20 | fromkeys: 21 | get: 22 | gpus: [0, 1, 2] 23 | gpus_str: 0,1,2 24 | input_h: 512 25 | input_w: 640 26 | items: 27 | jitter: 0.3 28 | k: 5 29 | keys: 30 | lr: 0.02 31 | lr_step: [64, 70] 32 | mask_weights: 2.0 33 | num_class: 1 34 | num_epochs: 72 35 | pop: 36 | popitem: 37 | print_iter: 1 38 | save_dir: exp/coco_person 39 | setdefault: 40 | test: False 41 | update: 42 | values: 43 | warm_up: 1000 44 | weights: 45 | xywh_weights: 0.25 46 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | from models.tensormask import TensorMask 2 | from lib.trainer import Trainer 3 | from lib.utils import load_model,save_model,Logger 4 | from lib.coco import COCO 5 | from lib import optimer 6 | from config import cfg as opt 7 | import torch 8 | import os 9 | 10 | 11 | torch.backends.cudnn.benchmark= True ## input size is not fixed 12 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str 13 | opt.gpus = [int(i) for i in opt.gpus_str.split(',')] 14 | opt.gpus = list(range(len(opt.gpus))) 15 | opt.batch_size = opt.batch_size * len(opt.gpus) 16 | opt.save_dir = os.path.join(opt.save_dir,opt.exp_id) 17 | logger = Logger(opt) 18 | 19 | 20 | model = TensorMask(backbone=opt.backbone , num_cls=opt.num_class , 21 | base_window= opt.base_window , 22 | freezeBN=opt.frezeBN,freezeLayers=opt.frezeLayer, 23 | align_corners= opt.align_corners) 24 | 25 | optimizer = optimer.SGD([{'params':filter(lambda x:len(x.size()) == 4 ,model.parameters()),'weight_decay':0.0001 }, 26 | {'params': filter(lambda x:len(x.size()) <4,model.parameters())}], 27 | lr=opt.lr,warm_up=1000,momentum=0.9,nesterov=True) 28 | start_epoch = 0 29 | if opt.weights != '' : 30 | model, optimizer, start_epoch = load_model( 31 | model, opt.weights, optimizer, opt.resume, opt.lr, opt.lr_step) 32 | trainer = Trainer(opt,model,optimizer) 33 | trainer.set_device(opt.gpus,opt.device) 34 | 35 | print('Setting up data...') 36 | val_loader = torch.utils.data.DataLoader( 37 | COCO(cfg=opt, split='val',augment=False), 38 | batch_size=8, 39 | shuffle=False, 40 | num_workers=8, 41 | pin_memory=False 42 | ) 43 | train_loader = torch.utils.data.DataLoader( 44 | COCO(cfg=opt, split='train',augment=True), 45 | batch_size=opt.batch_size, 46 | shuffle=True, 47 | num_workers=8, 48 | pin_memory=False 49 | ) 50 | 51 | print('Starting training...') 52 | best = 1e10 53 | for epoch in range(start_epoch + 1, opt.num_epochs + 1): 54 | log_dict_train, _ = trainer.train(epoch, train_loader) 55 | logger.write('epoch: {} |'.format(epoch)) 56 | for k, v in log_dict_train.items(): 57 | logger.scalar_summary('train_{}'.format(k), v, epoch) 58 | logger.write('{} {:8f} | '.format(k, v)) 59 | with torch.no_grad(): 60 | log_dict_val, preds = trainer.val(epoch, val_loader) 61 | for k, v in log_dict_val.items(): 62 | logger.scalar_summary('val_{}'.format(k), v, epoch) 63 | logger.write('{} {:8f} | '.format(k, v)) 64 | if log_dict_val['loss'] < best: 65 | best = log_dict_val['loss'] 66 | save_model(os.path.join(opt.save_dir, 'model_best.pth'), 67 | epoch, model) 68 | save_model(os.path.join(opt.save_dir, 'model_last.pth'), 69 | epoch, model, optimizer) 70 | logger.write('\n') 71 | if epoch in opt.lr_step: 72 | save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), 73 | epoch, model, optimizer) 74 | lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1)) 75 | print('Drop LR to', lr) 76 | for param_group in optimizer.param_groups: 77 | param_group['lr'] = lr 78 | -------------------------------------------------------------------------------- /show_pred_window.py: -------------------------------------------------------------------------------- 1 | from models.tensormask import TensorMask 2 | from config import cfg as opt 3 | from lib.utils import load_model,save_model 4 | from lib.coco import COCO 5 | import numpy as np 6 | import torch 7 | import os 8 | import cv2 9 | 10 | os.environ['CUDA_VISIBLE_DEVICES'] = '3' 11 | model = TensorMask(backbone=opt.backbone , num_cls=opt.num_class , 12 | base_window= opt.base_window , 13 | freezeBN=opt.frezeBN,freezeLayers=opt.frezeLayer, 14 | align_corners=opt.align_corners) 15 | 16 | opt.test = True 17 | opt.weights = 'exp/coco_person/model_last.pth' 18 | model = load_model(model, opt.weights) 19 | model.eval() 20 | model.cuda() 21 | val_loader = torch.utils.data.DataLoader( 22 | COCO(cfg=opt, split='val',augment=False), 23 | batch_size=1, 24 | shuffle=False, 25 | num_workers=1, 26 | pin_memory=True 27 | ) 28 | strides = np.array([opt.base_stride * 2 ** i for i in range(opt.k + 1)]) 29 | windows = np.array([opt.base_window * lamda for lamda in strides], np.int32) 30 | 31 | output_size = np.array(list(zip(opt.input_w // strides, opt.input_h // strides))) 32 | num_det = [output_w * output_h for output_w, output_h in output_size] 33 | det_offset = np.cumsum(num_det) 34 | for batch in val_loader: 35 | image= batch['img'].numpy()[0] 36 | input = batch['input'].cuda() 37 | output= model(input) 38 | 39 | socres, cls = torch.max(output['cls'].sigmoid_(), dim=-1) 40 | socres = socres.detach().cpu().numpy() 41 | cls = cls.detach().cpu().numpy() 42 | box= output['box'].detach().cpu().numpy() 43 | seg = [output['%d'%i].sigmoid_().detach().cpu().numpy() for i in range(opt.k+1)] 44 | topk_inds = np.where(socres > 0.4) 45 | 46 | for det_num in topk_inds[1]: 47 | p = socres[0,det_num] 48 | b = box[0,det_num,:] 49 | for id,num in enumerate(det_offset): 50 | if num > det_num: 51 | break 52 | offset = det_num-det_offset[id-1]if id > 0 else det_num 53 | width,hight = output_size[id] 54 | 55 | ### ct_int_feat 56 | y = int(offset/width) 57 | x = int(offset%width) 58 | 59 | window_seg = seg[id][0,y,x,:,:] 60 | 61 | ### ct_int 62 | x ,y = int((x + 0.5) * strides[id]),int((y + 0.5) * strides[id]) 63 | ### show box 64 | b[0:2] = x - b[0]*strides[id] ,y - b[1]*strides[id] 65 | b[2:4] = x + b[2]*strides[id] ,y + b[3]*strides[id] 66 | b = b.astype(np.int) 67 | cv2.rectangle(image,(b[0],b[1]),(b[2],b[3]),(255,0,0),2) 68 | 69 | 70 | ### show mask 71 | img_h,img_w = image.shape[:2] 72 | paste_x,paste_y,paste_x1,paste_y1= x - windows[id]//2, y- windows[id]//2,x + windows[id]//2,y + windows[id]//2 73 | 74 | window_x,window_y,window_x1,window_y1 = max(-paste_x,0),max(-paste_y,0), \ 75 | windows[id]-max(0,paste_x1-img_w), \ 76 | windows[id]-max(0,paste_y1-img_h) 77 | 78 | paste_x, paste_y, paste_x1, paste_y1 = max(paste_x, 0), max(paste_y, 0), min(paste_x1, img_w), min(paste_y1, 79 | img_h) 80 | window_seg = cv2.resize(window_seg,(windows[id],windows[id])) 81 | window_seg = (window_seg>0.5) 82 | 83 | ### paste to img 84 | window_seg_paste = window_seg[window_y:window_y1,window_x:window_x1] 85 | color = np.array([[np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)]]) 86 | image[paste_y:paste_y1,paste_x:paste_x1][window_seg_paste] = image[paste_y:paste_y1,paste_x:paste_x1][window_seg_paste]*0.2 + color*0.8 87 | 88 | ### show 89 | cv2.imshow('window',(window_seg).astype(np.uint8)*255) 90 | cv2.imshow('',image) 91 | cv2.waitKey(0) 92 | -------------------------------------------------------------------------------- /models/tensormask.py: -------------------------------------------------------------------------------- 1 | from models.ops.align2nat.functions.swap_align2nat import swap_align2nat 2 | from models.res_fpn import resnet_fpn_backbone 3 | import torch.nn.functional as F 4 | import torch.nn as nn 5 | import torch 6 | import math 7 | 8 | class Subnet(nn.Module): 9 | def __init__(self, in_channels = 256,mid_channels = 256 ,num_cls = -1): 10 | super(Subnet, self).__init__() 11 | 12 | self.conv = nn.Sequential(nn.Conv2d(in_channels, mid_channels, 3, padding=1), 13 | nn.ReLU(inplace=True), 14 | nn.Conv2d(mid_channels, mid_channels, 3, padding=1), 15 | nn.ReLU(inplace=True), 16 | nn.Conv2d(mid_channels, mid_channels, 3, padding=1), 17 | nn.ReLU(inplace=True), 18 | nn.Conv2d(mid_channels, mid_channels, 3, padding=1), 19 | nn.ReLU(inplace=True)) 20 | self.num_cls = num_cls 21 | if num_cls > 0: 22 | self.fc = nn.Conv2d(mid_channels, num_cls, 3, padding=1) 23 | 24 | for m in self.modules(): 25 | if isinstance(m, nn.Conv2d): 26 | nn.init.kaiming_uniform_(m.weight, a=1) 27 | nn.init.constant_(m.bias, 0) 28 | 29 | def forward(self, x): 30 | x = self.conv(x) 31 | if self.num_cls > 0: 32 | x = self.fc(x) 33 | x = x.permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, self.num_cls) 34 | return x 35 | 36 | class TensorMask(nn.Module): 37 | def __init__(self,backbone = 'resnet50',num_cls = 80,base_window = 12, 38 | freezeBN = True,freezeLayers = False ,align_corners = True): 39 | super(TensorMask,self).__init__() 40 | self.align_corners = align_corners 41 | self.base_fpn = resnet_fpn_backbone(backbone,pretrained=True,freezeBN=freezeBN,freezeLayers=freezeLayers,align_corners=align_corners) 42 | 43 | self.cls_subnet = Subnet(in_channels=256,mid_channels=256,num_cls = num_cls) 44 | 45 | self.box_subnet = Subnet(in_channels=256,mid_channels=128,num_cls = 4) 46 | 47 | self.mask_subnet = Subnet(in_channels=256,mid_channels=128) 48 | 49 | self.mask_fuse = nn.Sequential(nn.Conv2d(128, 128, 3, padding=1),nn.ReLU(inplace=True)) 50 | self.mask_head = nn.Conv2d(128, base_window**2 , kernel_size=1, padding=0) 51 | 52 | self.base_window = base_window 53 | 54 | 55 | nn.init.constant_(self.box_subnet.fc.bias, 1) ### training box start with a little box not a point(its hard). 56 | nn.init.kaiming_uniform_(self.mask_fuse[0].weight, a=1) 57 | nn.init.constant_(self.mask_fuse[0].bias, 0) 58 | nn.init.kaiming_uniform_(self.mask_head.weight, a=1) 59 | nn.init.constant_(self.mask_head.bias, 0) 60 | 61 | nn.init.constant_(self.cls_subnet.fc.bias,-math.log((1-0.01)/0.01)) 62 | 63 | def forward(self, x): 64 | x = self.base_fpn(x) 65 | cls_branch = torch.cat([self.cls_subnet(feat) for feat in x],dim = 1) 66 | box_branch = torch.cat([self.box_subnet(feat) for feat in x],dim = 1) 67 | mask_branch = [self.mask_subnet(feat) for feat in x] 68 | 69 | ret = {'cls':cls_branch,'box':box_branch} 70 | 71 | finest_feat = mask_branch[0] 72 | ## tensor bipyamid 73 | for i in range(len(mask_branch)): 74 | x = mask_branch[i] 75 | if i > 0: 76 | x = F.interpolate(x, scale_factor=2**i, mode="bilinear" ,align_corners=self.align_corners ) 77 | x = self.mask_fuse(x + finest_feat) 78 | x = self.mask_head(x) 79 | x = x.view(x.size(0), self.base_window, self.base_window, x.size(2), x.size(3)) 80 | x = swap_align2nat(x, 1 , 2**i ,-6., self.align_corners) 81 | ret['%d'%i]= x.permute(0, 3, 4, 1 , 2).contiguous() 82 | 83 | return ret 84 | 85 | if __name__ == '__main__': 86 | import os 87 | os.environ.setdefault('CUDA_VISIBLE_DEVICES','1') 88 | import torch 89 | model = TensorMask(num_cls=1,base_window=10) 90 | model.cuda() 91 | input = torch.zeros([1,3,512,512]).cuda() 92 | out = model(input) 93 | pass 94 | -------------------------------------------------------------------------------- /lib/trainer.py: -------------------------------------------------------------------------------- 1 | import time 2 | import torch 3 | import torch.nn as nn 4 | from .utils import AverageMeter 5 | from progress.bar import Bar 6 | from models.losses import TensorMaskLoss 7 | 8 | class ModleWithLoss(nn.Module): 9 | def __init__(self, model, loss): 10 | super(ModleWithLoss, self).__init__() 11 | self.model = model 12 | self.loss = loss 13 | 14 | def forward(self, batch): 15 | outputs = self.model(batch['input']) 16 | loss, loss_stats = self.loss(outputs, batch) 17 | return loss, loss_stats 18 | 19 | 20 | class Trainer(object): 21 | def __init__(self, opt, model, optimizer=None): 22 | self.opt = opt 23 | self.optimizer = optimizer 24 | self.loss_stats, self.loss = self._get_losses(opt) 25 | self.model_with_loss = ModleWithLoss(model, self.loss) 26 | 27 | def set_device(self, gpus, device): 28 | if len(gpus) > 1: 29 | self.model_with_loss = nn.DataParallel( 30 | self.model_with_loss, device_ids=gpus).to(device) 31 | else: 32 | self.model_with_loss = self.model_with_loss.to(device) 33 | 34 | for state in self.optimizer.state.values(): 35 | for k, v in state.items(): 36 | if isinstance(v, torch.Tensor): 37 | state[k] = v.to(device=device, non_blocking=True) 38 | 39 | def run_epoch(self, phase, epoch, data_loader): 40 | model_with_loss = self.model_with_loss 41 | if phase == 'train': 42 | model_with_loss.train() 43 | else: 44 | if len(self.opt.gpus) > 1: 45 | model_with_loss = self.model_with_loss.module 46 | model_with_loss.eval() 47 | torch.cuda.empty_cache() 48 | 49 | results = {} 50 | data_time, batch_time = AverageMeter(), AverageMeter() 51 | avg_loss_stats = {l: AverageMeter() for l in self.loss_stats} 52 | num_iters = len(data_loader) 53 | bar = Bar('{}'.format('tensormask'), max=num_iters) 54 | end = time.time() 55 | for iter_id, batch in enumerate(data_loader): 56 | if iter_id >= num_iters: 57 | break 58 | data_time.update(time.time() - end) 59 | 60 | for k in batch: 61 | if k != 'meta': 62 | batch[k] = batch[k].to(device=self.opt.device, non_blocking=True) 63 | loss, loss_stats = model_with_loss(batch) 64 | loss = loss.mean() 65 | if phase == 'train': 66 | self.optimizer.zero_grad() 67 | loss.backward() 68 | self.optimizer.step() 69 | batch_time.update(time.time() - end) 70 | end = time.time() 71 | 72 | Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format( 73 | epoch, iter_id, num_iters, phase=phase, 74 | total=bar.elapsed_td, eta=bar.eta_td) 75 | for l in avg_loss_stats: 76 | avg_loss_stats[l].update( 77 | loss_stats[l].mean().item(), batch['input'].size(0)) 78 | Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(l, avg_loss_stats[l].avg) 79 | 80 | Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \ 81 | '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) 82 | if self.opt.print_iter > 0: 83 | if iter_id % self.opt.print_iter == 0: 84 | print('{}| {}'.format('tensormask', Bar.suffix)) 85 | else: 86 | bar.next() 87 | 88 | del loss, loss_stats 89 | 90 | bar.finish() 91 | ret = {k: v.avg for k, v in avg_loss_stats.items()} 92 | ret['time'] = bar.elapsed_td.total_seconds() / 60. 93 | return ret, results 94 | 95 | 96 | def _get_losses(self,opt): 97 | loss_stats = ['loss','cls_loss','diou_loss','mask_loss'] 98 | loss = TensorMaskLoss(opt) 99 | return loss_stats,loss 100 | 101 | def val(self, epoch, data_loader): 102 | return self.run_epoch('val', epoch, data_loader) 103 | 104 | def train(self, epoch, data_loader): 105 | return self.run_epoch('train', epoch, data_loader) -------------------------------------------------------------------------------- /models/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from .ops.sigmoid_focal_loss.modules.sigmoid_focal_loss import SigmoidFocalLoss 5 | def _sigmoid(x): 6 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4) 7 | return y 8 | 9 | def diou(bboxes1, bboxes2): 10 | w1 = bboxes1[..., 2] - bboxes1[..., 0] 11 | h1 = bboxes1[..., 3] - bboxes1[..., 1] 12 | w2 = bboxes2[..., 2] - bboxes2[..., 0] 13 | h2 = bboxes2[..., 3] - bboxes2[..., 1] 14 | 15 | area1 = w1 * h1 16 | area2 = w2 * h2 17 | 18 | center_x1 = (bboxes1[..., 2] + bboxes1[..., 0]) / 2 19 | center_y1 = (bboxes1[..., 3] + bboxes1[..., 1]) / 2 20 | center_x2 = (bboxes2[..., 2] + bboxes2[..., 0]) / 2 21 | center_y2 = (bboxes2[..., 3] + bboxes2[..., 1]) / 2 22 | 23 | inter_max_xy = torch.min(bboxes1[..., 2:],bboxes2[..., 2:]) 24 | inter_min_xy = torch.max(bboxes1[..., :2],bboxes2[..., :2]) 25 | out_max_xy = torch.max(bboxes1[..., 2:],bboxes2[..., 2:]) 26 | out_min_xy = torch.min(bboxes1[..., :2],bboxes2[..., :2]) 27 | 28 | inter = torch.clamp((inter_max_xy - inter_min_xy), min=0) 29 | inter_area = inter[..., 0] * inter[..., 1] 30 | inter_diag = (center_x2 - center_x1)**2 + (center_y2 - center_y1)**2 31 | outer = torch.clamp((out_max_xy - out_min_xy), min=0) 32 | outer_diag = (outer[..., 0] ** 2) + (outer[..., 1] ** 2) 33 | union = area1+area2-inter_area 34 | u = (inter_diag) / (outer_diag + 1e-7 ) 35 | iou = inter_area / (union + 1e-7) 36 | dious = iou - u 37 | return dious 38 | 39 | def _gather_feat(feat, ind, mask=None): 40 | dim = feat.size(2) 41 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) 42 | feat = feat.gather(1, ind) 43 | if mask is not None: 44 | mask = mask.unsqueeze(2).expand_as(feat) 45 | feat = feat[mask] 46 | feat = feat.view(-1, dim) 47 | return feat 48 | 49 | def _tranpose_and_gather_feat(feat, ind): 50 | feat = _gather_feat(feat, ind) 51 | return feat 52 | 53 | class BOXLoss(nn.Module): 54 | def __init__(self): 55 | super(BOXLoss, self).__init__() 56 | 57 | def forward(self, output, mask, ind, target): 58 | mask = mask.float() 59 | pred = _tranpose_and_gather_feat(output, ind) 60 | right_offset,left_offset = torch.split(pred,[2,2],dim=-1) 61 | x1y1x2y2,ct,stride = torch.split(target,[4,2,1],dim=-1) 62 | stride = stride.expand_as(right_offset).float() 63 | predx1y1 = (ct + 0.5 - right_offset)*stride 64 | predx2y2 = (ct + 0.5 + left_offset )*stride 65 | predx1y1x2y2 = torch.cat([predx1y1,predx2y2],dim = -1) 66 | diou_loss = (1. - diou(predx1y1x2y2,x1y1x2y2)) * mask 67 | loss = diou_loss.sum() / ( mask.sum() + 1e-4) 68 | 69 | return loss 70 | 71 | class MaskBCELoss(nn.Module): 72 | def __init__(self): 73 | super(MaskBCELoss, self).__init__() 74 | self.register_buffer('pos_weight',torch.tensor(1.5,dtype=torch.float32)) 75 | 76 | def forward(self, output, mask, ind, target): 77 | B,N,window=target.size(0),target.size(1),target.size(-1) 78 | output = output.view(B,-1,window*window) 79 | pred = _tranpose_and_gather_feat(output, ind).view(B,N,window,window) 80 | mask = mask.unsqueeze(2).unsqueeze(2).expand_as(pred).float() 81 | bce_loss =F.binary_cross_entropy_with_logits(pred,target, 82 | pos_weight=self.pos_weight, 83 | reduction='none') 84 | num_smaple = mask.sum() 85 | loss = (bce_loss*mask).sum() 86 | if num_smaple > 0: 87 | loss /= num_smaple 88 | return loss 89 | 90 | class TensorMaskLoss(nn.Module): 91 | def __init__(self,opt): 92 | super(TensorMaskLoss,self).__init__() 93 | self.cls_loss = SigmoidFocalLoss(gamma=3,alpha=0.3) 94 | self.box_loss = BOXLoss() 95 | self.mask_loss = MaskBCELoss() 96 | self.opt = opt 97 | 98 | def forward(self, ouput,batch): 99 | opt = self.opt 100 | mask_loss = 0 101 | num_sample = batch['reg_mask'].sum() 102 | cls_loss = self.cls_loss(ouput['cls'].view([-1,opt.num_class]),batch['cls'].view([-1])) 103 | box_loss = self.box_loss(ouput['box'],batch['reg_mask'],batch['ind'],batch['xywh']) 104 | for i in range(6): 105 | mask_loss += self.mask_loss(ouput['%d'%i],batch['seg_mask_%d'%i],batch['seg_ind_%d'%i],batch['seg_%d'%i]) 106 | mask_loss /= 6 107 | if num_sample > 0: 108 | cls_loss /= num_sample 109 | loss = opt.cls_weights * cls_loss + opt.xywh_weights * box_loss + opt.mask_weights * mask_loss 110 | loss_stats = {'loss': loss, 'cls_loss': cls_loss, 111 | 'diou_loss': box_loss, 'mask_loss': mask_loss} 112 | return loss,loss_stats 113 | -------------------------------------------------------------------------------- /models/res_fpn.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import torch 3 | from torch import nn 4 | import torch.nn.functional as F 5 | from torchvision.models import resnet 6 | from collections import OrderedDict 7 | 8 | class FrozenBatchNorm2d(nn.Module): 9 | """ 10 | BatchNorm2d where the batch statistics and the affine parameters 11 | are fixed 12 | """ 13 | def __init__(self, n): 14 | super(FrozenBatchNorm2d, self).__init__() 15 | self.register_buffer("weight", torch.ones(n)) 16 | self.register_buffer("bias", torch.zeros(n)) 17 | self.register_buffer("running_mean", torch.zeros(n)) 18 | self.register_buffer("running_var", torch.ones(n)) 19 | 20 | def forward(self, x): 21 | # move reshapes to the beginning 22 | # to make it fuser-friendly 23 | w = self.weight.reshape(1, -1, 1, 1) 24 | b = self.bias.reshape(1, -1, 1, 1) 25 | rv = self.running_var.reshape(1, -1, 1, 1) 26 | rm = self.running_mean.reshape(1, -1, 1, 1) 27 | scale = w * rv.rsqrt() 28 | bias = b - rm * scale 29 | return x * scale + bias 30 | 31 | class IntermediateLayerGetter(nn.ModuleDict): 32 | def __init__(self, model, return_layers): 33 | if not set(return_layers).issubset([name for name, _ in model.named_children()]): 34 | raise ValueError("return_layers are not present in model") 35 | orig_return_layers = return_layers 36 | return_layers = {str(k): str(v) for k, v in return_layers.items()} 37 | layers = OrderedDict() 38 | for name, module in model.named_children(): 39 | layers[name] = module 40 | if name in return_layers: 41 | del return_layers[name] 42 | if not return_layers: 43 | break 44 | 45 | super(IntermediateLayerGetter, self).__init__(layers) 46 | self.return_layers = orig_return_layers 47 | 48 | def forward(self, x): 49 | outs = [] 50 | for name, module in self.items(): 51 | x = module(x) 52 | if name in self.return_layers: 53 | outs.append(x) 54 | 55 | return tuple(outs) 56 | 57 | class LastLevelP6P7(nn.Module): 58 | """ 59 | This module is used in RetinaNet to generate extra layers, P6 and P7. 60 | """ 61 | def __init__(self, in_channels, out_channels): 62 | super(LastLevelP6P7, self).__init__() 63 | self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) 64 | self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1) 65 | for module in [self.p6, self.p7]: 66 | nn.init.kaiming_uniform_(module.weight, a=1) 67 | nn.init.constant_(module.bias, 0) 68 | self.use_P5 = in_channels == out_channels 69 | 70 | def forward(self, p, c): 71 | p5, c5 = p[-1], c[-1] 72 | x = p5 if self.use_P5 else c5 73 | p6 = self.p6(x) 74 | p7 = self.p7(F.relu(p6)) 75 | p.extend([p6, p7]) 76 | return p 77 | 78 | class FeaturePyramidNetwork(nn.Module): 79 | 80 | def __init__(self, in_channels_list, out_channels, extra_blocks=None ,align_corners=True): 81 | super(FeaturePyramidNetwork, self).__init__() 82 | self.align_corners = align_corners 83 | self.inner_blocks = nn.ModuleList() 84 | self.layer_blocks = nn.ModuleList() 85 | for in_channels in in_channels_list: 86 | if in_channels == 0: 87 | continue 88 | inner_block_module = nn.Conv2d(in_channels, out_channels, 1) 89 | layer_block_module = nn.Conv2d(out_channels, out_channels, 3, padding=1) 90 | self.inner_blocks.append(inner_block_module) 91 | self.layer_blocks.append(layer_block_module) 92 | 93 | # initialize parameters now to avoid modifying the initialization of top_blocks 94 | for m in self.children(): 95 | if isinstance(m, nn.Conv2d): 96 | nn.init.kaiming_uniform_(m.weight, a=1) 97 | nn.init.constant_(m.bias, 0) 98 | 99 | self.extra_blocks = extra_blocks 100 | 101 | def forward(self, x): 102 | last_inner = self.inner_blocks[-1](x[-1]) 103 | results = [] 104 | results.append(self.layer_blocks[-1](last_inner)) 105 | for feature, inner_block, layer_block in zip( 106 | x[:-1][::-1], self.inner_blocks[:-1][::-1], self.layer_blocks[:-1][::-1] 107 | ): 108 | if not inner_block: 109 | continue 110 | inner_lateral = inner_block(feature) 111 | feat_shape = inner_lateral.shape[-2:] 112 | inner_top_down = F.interpolate(last_inner, size=feat_shape, mode="bilinear",align_corners = self.align_corners) 113 | last_inner = inner_lateral + inner_top_down 114 | results.insert(0, layer_block(last_inner)) 115 | 116 | if self.extra_blocks is not None: 117 | results = self.extra_blocks(results, x) 118 | 119 | return tuple(results) 120 | 121 | class BackboneWithFPN(nn.Module): 122 | 123 | def __init__(self, backbone, return_layers, in_channels_list, out_channels ,align_corners): 124 | super(BackboneWithFPN, self).__init__() 125 | self.body = IntermediateLayerGetter(backbone, return_layers=return_layers) 126 | self.fpn = FeaturePyramidNetwork( 127 | in_channels_list=in_channels_list, 128 | out_channels=out_channels, 129 | extra_blocks=LastLevelP6P7(in_channels_list[-1],out_channels), 130 | align_corners=align_corners 131 | ) 132 | self.out_channels = out_channels 133 | 134 | def forward(self, x): 135 | x = self.body(x) 136 | x = self.fpn(x) 137 | return x 138 | 139 | 140 | def resnet_fpn_backbone(backbone_name, pretrained,freezeBN = False , freezeLayers = False , align_corners = True ): 141 | backbone = resnet.__dict__[backbone_name]( 142 | pretrained=pretrained, 143 | norm_layer=FrozenBatchNorm2d if freezeBN else None) 144 | # freeze layers 145 | if freezeLayers: 146 | for name, parameter in backbone.named_parameters(): 147 | print(name) 148 | if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name: 149 | parameter.requires_grad_(False) 150 | 151 | return_layers = {'layer1': 'p2', 'layer2': 'p3', 'layer3': 'p4', 'layer4': 'p5'} 152 | 153 | in_channels_stage2 = backbone.inplanes // 8 154 | in_channels_list = [ 155 | in_channels_stage2, 156 | in_channels_stage2 * 2, 157 | in_channels_stage2 * 4, 158 | in_channels_stage2 * 8, 159 | ] 160 | out_channels = 256 161 | return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels , align_corners) 162 | 163 | if __name__ == '__main__': 164 | input = torch.ones([1,3,512,512]) 165 | model = resnet_fpn_backbone('resnet50',False) 166 | out = model(input) 167 | pass -------------------------------------------------------------------------------- /models/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_cuda.cu: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu 3 | 4 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 5 | // This file is modified from 6 | // https://github.com/pytorch/pytorch/blob/master/modules/detectron/sigmoid_focal_loss_op.cu 7 | // Cheng-Yang Fu 8 | // cyfu@cs.unc.edu 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | // TODO make it in a common file 19 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 20 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ 21 | i += blockDim.x * gridDim.x) 22 | 23 | template 24 | __global__ void SigmoidFocalLossForward(const int nthreads, 25 | const scalar_t *logits, 26 | const long *targets, 27 | const int num_classes, 28 | const float gamma, const float alpha, 29 | const int num, scalar_t *losses) { 30 | CUDA_1D_KERNEL_LOOP(i, nthreads) { 31 | int n = i / num_classes; 32 | int d = i % num_classes; // current class[0~79]; 33 | int t = targets[n]; // target class [1~80]; 34 | 35 | // Decide it is positive or negative case. 36 | scalar_t c1 = (t == (d + 1)); 37 | scalar_t c2 = (t >= 0 & t != (d + 1)); 38 | 39 | scalar_t zn = (1.0 - alpha); 40 | scalar_t zp = (alpha); 41 | 42 | // p = 1. / 1. + expf(-x); p = sigmoid(x) 43 | scalar_t p = 1. / (1. + expf(-logits[i])); 44 | 45 | // (1-p)**gamma * log(p) where 46 | scalar_t term1 = powf((1. - p), gamma) * logf(max(p, FLT_MIN)); 47 | 48 | // p**gamma * log(1-p) 49 | scalar_t term2 = 50 | powf(p, gamma) * 51 | (-1. * logits[i] * (logits[i] >= 0) - 52 | logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))); 53 | 54 | losses[i] = 0.0; 55 | losses[i] += -c1 * term1 * zp; 56 | losses[i] += -c2 * term2 * zn; 57 | 58 | } // CUDA_1D_KERNEL_LOOP 59 | } // SigmoidFocalLossForward 60 | 61 | template 62 | __global__ void SigmoidFocalLossBackward( 63 | const int nthreads, const scalar_t *logits, const long *targets, 64 | const scalar_t *d_losses, const int num_classes, const float gamma, 65 | const float alpha, const int num, scalar_t *d_logits) { 66 | CUDA_1D_KERNEL_LOOP(i, nthreads) { 67 | int n = i / num_classes; 68 | int d = i % num_classes; // current class[0~79]; 69 | int t = targets[n]; // target class [1~80], 0 is background; 70 | 71 | // Decide it is positive or negative case. 72 | scalar_t c1 = (t == (d + 1)); 73 | scalar_t c2 = (t >= 0 & t != (d + 1)); 74 | 75 | scalar_t zn = (1.0 - alpha); 76 | scalar_t zp = (alpha); 77 | // p = 1. / 1. + expf(-x); p = sigmoid(x) 78 | scalar_t p = 1. / (1. + expf(-logits[i])); 79 | 80 | // (1-p)**g * (1 - p - g*p*log(p) 81 | scalar_t term1 = 82 | powf((1. - p), gamma) * (1. - p - (p * gamma * logf(max(p, FLT_MIN)))); 83 | 84 | // (p**g) * (g*(1-p)*log(1-p) - p) 85 | scalar_t term2 = 86 | powf(p, gamma) * 87 | ((-1. * logits[i] * (logits[i] >= 0) - 88 | logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))) * 89 | (1. - p) * gamma - 90 | p); 91 | d_logits[i] = 0.0; 92 | d_logits[i] += -c1 * term1 * zp; 93 | d_logits[i] += -c2 * term2 * zn; 94 | d_logits[i] = d_logits[i] * d_losses[i]; 95 | 96 | } // CUDA_1D_KERNEL_LOOP 97 | } // SigmoidFocalLossBackward 98 | 99 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits, 100 | const at::Tensor &targets, 101 | const int num_classes, 102 | const float gamma, const float alpha) { 103 | AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor"); 104 | AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor"); 105 | AT_ASSERTM(logits.dim() == 2, "logits should be NxClass"); 106 | 107 | const int num_samples = logits.size(0); 108 | 109 | auto losses = at::empty({num_samples, logits.size(1)}, logits.options()); 110 | auto losses_size = num_samples * logits.size(1); 111 | 112 | dim3 grid(std::min(THCCeilDiv(losses_size, 512L), 4096L)); 113 | dim3 block(512); 114 | 115 | if (losses.numel() == 0) { 116 | THCudaCheck(cudaGetLastError()); 117 | return losses; 118 | } 119 | 120 | AT_DISPATCH_FLOATING_TYPES_AND_HALF( 121 | logits.type(), "SigmoidFocalLoss_forward", [&] { 122 | SigmoidFocalLossForward<<>>( 123 | losses_size, logits.contiguous().data(), 124 | targets.contiguous().data(), num_classes, gamma, alpha, 125 | num_samples, losses.data()); 126 | }); 127 | THCudaCheck(cudaGetLastError()); 128 | return losses; 129 | } 130 | 131 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits, 132 | const at::Tensor &targets, 133 | const at::Tensor &d_losses, 134 | const int num_classes, 135 | const float gamma, 136 | const float alpha) { 137 | AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor"); 138 | AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor"); 139 | AT_ASSERTM(d_losses.type().is_cuda(), "d_losses must be a CUDA tensor"); 140 | 141 | AT_ASSERTM(logits.dim() == 2, "logits should be NxClass"); 142 | 143 | const int num_samples = logits.size(0); 144 | AT_ASSERTM(logits.size(1) == num_classes, 145 | "logits.size(1) should be num_classes"); 146 | 147 | auto d_logits = at::zeros({num_samples, num_classes}, logits.options()); 148 | auto d_logits_size = num_samples * logits.size(1); 149 | 150 | dim3 grid(std::min(THCCeilDiv(d_logits_size, 512L), 4096L)); 151 | dim3 block(512); 152 | 153 | if (d_logits.numel() == 0) { 154 | THCudaCheck(cudaGetLastError()); 155 | return d_logits; 156 | } 157 | 158 | AT_DISPATCH_FLOATING_TYPES_AND_HALF( 159 | logits.type(), "SigmoidFocalLoss_backward", [&] { 160 | SigmoidFocalLossBackward<<>>( 161 | d_logits_size, logits.contiguous().data(), 162 | targets.contiguous().data(), 163 | d_losses.contiguous().data(), num_classes, gamma, alpha, 164 | num_samples, d_logits.data()); 165 | }); 166 | 167 | THCudaCheck(cudaGetLastError()); 168 | return d_logits; 169 | } 170 | -------------------------------------------------------------------------------- /lib/optimer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.optim.optimizer import Optimizer, required 3 | import math 4 | 5 | 6 | class SGD(Optimizer): 7 | 8 | def __init__(self, params, lr=required, momentum=0, dampening=0, 9 | weight_decay=0, nesterov=False ,warm_up = 1000 ): 10 | if lr is not required and lr < 0.0: 11 | raise ValueError("Invalid learning rate: {}".format(lr)) 12 | if momentum < 0.0: 13 | raise ValueError("Invalid momentum value: {}".format(momentum)) 14 | if weight_decay < 0.0: 15 | raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) 16 | 17 | defaults = dict(lr=lr, momentum=momentum, dampening=dampening, 18 | weight_decay=weight_decay, nesterov=nesterov) 19 | if nesterov and (momentum <= 0 or dampening != 0): 20 | raise ValueError("Nesterov momentum requires a momentum and zero dampening") 21 | self.setp_num = 0 22 | self.warm_up = warm_up 23 | self.warm_up_end = False 24 | super(SGD, self).__init__(params, defaults) 25 | 26 | def __setstate__(self, state): 27 | super(SGD, self).__setstate__(state) 28 | for group in self.param_groups: 29 | group.setdefault('nesterov', False) 30 | 31 | def step(self, closure=None): 32 | self.setp_num += 1 33 | loss = None 34 | if closure is not None: 35 | loss = closure() 36 | 37 | for group in self.param_groups: 38 | if self.setp_num <= self.warm_up and not self.warm_up_end : 39 | lr = group['lr']*pow(self.setp_num/self.warm_up,1) 40 | else: 41 | lr = group['lr'] 42 | self.warm_up_end = True 43 | 44 | weight_decay = group['weight_decay'] 45 | momentum = group['momentum'] 46 | dampening = group['dampening'] 47 | nesterov = group['nesterov'] 48 | 49 | for p in group['params']: 50 | if p.grad is None: 51 | continue 52 | d_p = p.grad.data 53 | if weight_decay != 0: 54 | d_p.add_(weight_decay, p.data) 55 | if momentum != 0: 56 | param_state = self.state[p] 57 | if 'momentum_buffer' not in param_state: 58 | buf = param_state['momentum_buffer'] = torch.zeros_like(p.data) 59 | buf.mul_(momentum).add_(d_p) 60 | else: 61 | buf = param_state['momentum_buffer'] 62 | buf.mul_(momentum).add_(1 - dampening, d_p) 63 | if nesterov: 64 | d_p = d_p.add(momentum, buf) 65 | else: 66 | d_p = buf 67 | 68 | p.data.add_(-lr, d_p) 69 | return loss 70 | 71 | class Adam(Optimizer): 72 | 73 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, 74 | weight_decay=0, amsgrad=False , warm_up = 1000 ): 75 | if not 0.0 <= lr: 76 | raise ValueError("Invalid learning rate: {}".format(lr)) 77 | if not 0.0 <= eps: 78 | raise ValueError("Invalid epsilon value: {}".format(eps)) 79 | if not 0.0 <= betas[0] < 1.0: 80 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) 81 | if not 0.0 <= betas[1] < 1.0: 82 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) 83 | defaults = dict(lr=lr, betas=betas, eps=eps, 84 | weight_decay=weight_decay, amsgrad=amsgrad) 85 | self.setp_num = 0 86 | self.warm_up = warm_up 87 | self.warm_up_end = False 88 | super(Adam, self).__init__(params, defaults) 89 | 90 | def __setstate__(self, state): 91 | super(Adam, self).__setstate__(state) 92 | for group in self.param_groups: 93 | group.setdefault('amsgrad', False) 94 | 95 | def step(self, closure=None): 96 | """Performs a single optimization step. 97 | 98 | Arguments: 99 | closure (callable, optional): A closure that reevaluates the model 100 | and returns the loss. 101 | """ 102 | self.setp_num += 1 103 | loss = None 104 | if closure is not None: 105 | loss = closure() 106 | 107 | for group in self.param_groups: 108 | 109 | if self.setp_num <= self.warm_up and not self.warm_up_end : 110 | lr = group['lr']*pow(self.setp_num/self.warm_up,1) 111 | else: 112 | lr = group['lr'] 113 | self.warm_up_end = True 114 | 115 | for p in group['params']: 116 | 117 | if p.grad is None: 118 | continue 119 | grad = p.grad.data 120 | if grad.is_sparse: 121 | raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead') 122 | amsgrad = group['amsgrad'] 123 | 124 | state = self.state[p] 125 | 126 | # State initialization 127 | if len(state) == 0: 128 | state['step'] = 0 129 | # Exponential moving average of gradient values 130 | state['exp_avg'] = torch.zeros_like(p.data) 131 | # Exponential moving average of squared gradient values 132 | state['exp_avg_sq'] = torch.zeros_like(p.data) 133 | if amsgrad: 134 | # Maintains max of all exp. moving avg. of sq. grad. values 135 | state['max_exp_avg_sq'] = torch.zeros_like(p.data) 136 | 137 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 138 | if amsgrad: 139 | max_exp_avg_sq = state['max_exp_avg_sq'] 140 | beta1, beta2 = group['betas'] 141 | 142 | state['step'] += 1 143 | 144 | if group['weight_decay'] != 0: 145 | grad = grad.add(group['weight_decay'], p.data) 146 | 147 | # Decay the first and second moment running average coefficient 148 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 149 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 150 | if amsgrad: 151 | # Maintains the maximum of all 2nd moment running avg. till now 152 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) 153 | # Use the max. for normalizing running avg. of gradient 154 | denom = max_exp_avg_sq.sqrt().add_(group['eps']) 155 | else: 156 | denom = exp_avg_sq.sqrt().add_(group['eps']) 157 | 158 | bias_correction1 = 1 - beta1 ** state['step'] 159 | bias_correction2 = 1 - beta2 ** state['step'] 160 | step_size = lr * math.sqrt(bias_correction2) / bias_correction1 161 | 162 | p.data.addcdiv_(-step_size, exp_avg, denom) 163 | 164 | return loss -------------------------------------------------------------------------------- /models/detector.py: -------------------------------------------------------------------------------- 1 | from models.tensormask import TensorMask 2 | from config import cfg as opt 3 | from lib.utils import load_model,save_model 4 | from lib.coco import COCO 5 | import numpy as np 6 | import torch 7 | import os 8 | import cv2 9 | 10 | def affine_transform(pt, t): 11 | new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T 12 | new_pt = np.dot(t, new_pt) 13 | return new_pt[:2] 14 | 15 | def cal_iou_np(boxes1, boxes2): 16 | boxes1 = np.array(boxes1) 17 | boxes2 = np.array(boxes2) 18 | boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1]) 19 | boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1]) 20 | left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) 21 | right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) 22 | inter_section = np.maximum(right_down - left_up, 0.0) 23 | inter_area = inter_section[..., 0] * inter_section[..., 1] 24 | union_area = boxes1_area + boxes2_area - inter_area 25 | IOU = 1.0 * inter_area / union_area 26 | return IOU 27 | 28 | class Detector(): 29 | def __init__(self,opt): 30 | self.model = TensorMask(backbone=opt.backbone, num_cls=opt.num_class, 31 | base_window=opt.base_window, 32 | freezeBN=opt.frezeBN, freezeLayers=opt.frezeLayer, 33 | align_corners=opt.align_corners) 34 | self.model = load_model(self.model, opt.weights) 35 | self.model.eval() 36 | self.model.cuda() 37 | self.mean = COCO.mean 38 | self.std = COCO.std 39 | self.opt = opt 40 | 41 | self.strides = np.array([self.opt.base_stride * 2 ** i for i in range(self.opt.k + 1)]) 42 | self.windows = np.array([self.opt.base_window * lamda for lamda in self.strides], np.int32) 43 | 44 | self.output_size = np.array(list(zip(self.opt.input_w // self.strides, self.opt.input_h // self.strides))) 45 | self.num_det = [output_w * output_h for output_w, output_h in self.output_size] 46 | self.det_offset = np.cumsum(self.num_det) 47 | 48 | def run(self,image,vis=True): 49 | if isinstance(image,str): 50 | image = cv2.imread(image) 51 | show = image.copy() 52 | image,trans_output = self.prepare_image(image) 53 | input = torch.from_numpy(image).cuda() 54 | output = self.model(input) 55 | box,mask = self.decode(output,show.shape[:2],trans_output) 56 | if vis: 57 | self.show_img(show,box,mask) 58 | return box,mask 59 | 60 | def prepare_image(self,image): 61 | height, width = image.shape[0], image.shape[1] 62 | ar = width/height 63 | new_h,new_w = (self.opt.input_h,ar*self.opt.input_h) if ar < 1 else (self.opt.input_w/ar,self.opt.input_w) 64 | dx, dy = (self.opt.input_w - new_w) / 2, (self.opt.input_h - new_h) / 2 65 | src = np.array([[0, 0], [0, height], [width, 0]], dtype=np.float32) 66 | dst = np.array([[dx, dy], [dx, new_h + dy], [new_w + dx, dy]], dtype=np.float32) 67 | trans_input = cv2.getAffineTransform(src, dst) 68 | trans_output = cv2.getAffineTransform(dst, src) 69 | image = cv2.warpAffine(image, trans_input, (self.opt.input_w, self.opt.input_h), 70 | flags=cv2.INTER_LINEAR, borderValue=(0, 0, 0)) 71 | image = (image.astype(np.float32) / 255.) 72 | image = (image- self.mean) / self.std 73 | image = image.transpose(2, 0, 1) 74 | image = np.expand_dims(image,0).astype(np.float32) 75 | return image,trans_output 76 | 77 | def decode(self,output,img_hw,trans_ouput,method = 'nms',iou_threshold=0.45,sigma=0.3): 78 | socres,cls = torch.max(output['cls'].sigmoid_(),dim=-1) 79 | socres = socres.detach().cpu().numpy() 80 | cls = cls.detach().cpu().numpy() 81 | box = output['box'].detach().cpu().numpy() 82 | seg = [output['%d' % i].sigmoid_().detach().cpu().numpy() for i in range(self.opt.k + 1)] 83 | topk_inds = np.where(socres > self.opt.vis_thresh) 84 | result = [] 85 | for det_num in topk_inds[1]: 86 | p = socres[0, det_num] 87 | cls_index = cls[0,det_num] 88 | b = box[0, det_num, :] 89 | for id, num in enumerate(self.det_offset): 90 | if num > det_num: 91 | break 92 | offset = det_num - self.det_offset[id - 1] if id > 0 else det_num 93 | width, hight = self.output_size[id] 94 | 95 | ### ct_int_feat 96 | y = int(offset / width) 97 | x = int(offset % width) 98 | 99 | b[0:2] = (x + 0.5 - b[0] )* self.strides[id],( y + 0.5 - b[1] )* self.strides[id] 100 | b[2:4] = (x + 0.5 + b[2] )* self.strides[id],( y + 0.5 + b[3] )* self.strides[id] 101 | b[0:2] = affine_transform(b[0:2],trans_ouput).astype(int) 102 | b[2:4] = affine_transform(b[2:4], trans_ouput).astype(int) 103 | result.append([*b,p,cls_index,x,y,id]) 104 | 105 | result = np.array(result) ## x1 y1 x2 y2 p cls ct_feat_x ct_feat_y feat_id 106 | 107 | ### use box to nms 108 | class_index = result[:,5] if len(result) > 0 else [] 109 | classes_in_img = list(set(class_index)) 110 | best_bboxes = [] 111 | for cls in classes_in_img: 112 | cls_mask = (class_index == cls) 113 | cls_bboxes = result[cls_mask] 114 | while len(cls_bboxes) > 0: 115 | max_ind = np.argmax(cls_bboxes[:, 4]) 116 | best_bbox = cls_bboxes[max_ind] 117 | best_bboxes.append(best_bbox) 118 | cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]]) 119 | iou = cal_iou_np(best_bbox[np.newaxis, :4], cls_bboxes[:, :4]) 120 | assert method in ['nms', 'soft-nms'] 121 | weight = np.ones((len(iou),), dtype=np.float32) 122 | if method == 'nms': 123 | iou_mask = iou > iou_threshold 124 | weight[iou_mask] = 0.0 125 | if method == 'soft-nms': 126 | weight = np.exp(-(1.0 * iou ** 2 / sigma)) 127 | cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight 128 | score_mask = cls_bboxes[:, 4] > self.opt.vis_thresh 129 | cls_bboxes = cls_bboxes[score_mask] 130 | mask_res= [] 131 | for det in best_bboxes: 132 | mask = np.zeros([self.opt.input_h,self.opt.input_w],np.uint8) 133 | ct_feat_x,ct_feat_y,feat_id = int(det[-3]),int(det[-2]),int(det[-1]) 134 | x, y = int((ct_feat_x + 0.5) * self.strides[feat_id]), int((ct_feat_y + 0.5) * self.strides[feat_id]) 135 | window_seg = seg[feat_id][0, ct_feat_y, ct_feat_x, :, :] 136 | paste_x, paste_y, paste_x1, paste_y1 = x - self.windows[feat_id] // 2,\ 137 | y - self.windows[feat_id] // 2, \ 138 | x + self.windows[feat_id] // 2,\ 139 | y + self.windows[feat_id] // 2 140 | 141 | window_x, window_y, window_x1, window_y1 = max(-paste_x, 0), max(-paste_y, 0), \ 142 | self.windows[feat_id] - max(0, paste_x1 - self.opt.input_w), \ 143 | self.windows[feat_id] - max(0, paste_y1 - self.opt.input_h) 144 | paste_x, paste_y, paste_x1, paste_y1 = max(paste_x, 0), max(paste_y, 0), \ 145 | min(paste_x1, self.opt.input_w), \ 146 | min(paste_y1,self.opt.input_h) 147 | window_seg = cv2.resize(window_seg, (self.windows[feat_id],self. windows[feat_id])) 148 | window_seg = (window_seg > 0.5).astype(np.uint8) 149 | mask[paste_y:paste_y1, paste_x:paste_x1] = window_seg[window_y:window_y1, window_x:window_x1] 150 | mask = cv2.warpAffine(mask, trans_ouput, 151 | (img_hw[1], img_hw[0]), 152 | flags=cv2.INTER_LINEAR) 153 | 154 | mask_res.append(mask) 155 | return best_bboxes,mask_res 156 | 157 | def show_img(self,img,box,mask): 158 | for i in range(len(box)): 159 | det = box[i].astype(np.int) 160 | if self.opt.show_box: 161 | cv2.rectangle(img, (det[0], det[1]), (det[2], det[3]), (255, 0, 0), 2) 162 | color = np.array([[np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)]]) 163 | seg = mask[i]==1 164 | img[seg] = img[seg] * 0.2 + color * 0.8 165 | 166 | cv2.imshow('result',img) 167 | 168 | 169 | if __name__ == '__main__': 170 | os.environ['CUDA_VISIBLE_DEVICES'] = '3' 171 | opt.weights = '/data/yoloCao/pycharmProjects/tensormask/exp/coco_person/model_last.pth' 172 | detector = Detector(opt) 173 | img = '/data/yoloCao/DataSet/VOC2007/JPEGImages/2007_000027.jpg' 174 | opt.vis_thresh = 0.5 175 | detector.run(img) 176 | 177 | -------------------------------------------------------------------------------- /lib/coco.py: -------------------------------------------------------------------------------- 1 | import pycocotools.coco as coco 2 | import pycocotools.cocoeval 3 | import numpy as np 4 | import os 5 | import cv2 6 | from torch.utils.data import Dataset 7 | 8 | 9 | def affine_transform(pt, t): 10 | new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T 11 | new_pt = np.dot(t, new_pt) 12 | return new_pt[:2] 13 | 14 | class COCO(Dataset): 15 | mean = np.array([0.40789654, 0.44719302, 0.47026115], 16 | dtype=np.float32).reshape(1, 1, 3) 17 | std = np.array([0.28863828, 0.27408164, 0.27809835], 18 | dtype=np.float32).reshape(1, 1, 3) 19 | def __init__(self, cfg, split = 'train',augment = True): 20 | super(COCO, self).__init__() 21 | self.data_dir = cfg.data_dir 22 | self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split)) 23 | self.annot_path = os.path.join( 24 | self.data_dir, 'annotations', 25 | 'instances_{}2017.json').format(split) 26 | self.split = split 27 | print('==> initializing coco 2017 {} data.'.format(split)) 28 | self.coco = coco.COCO(self.annot_path) 29 | self.images = self.coco.getImgIds() 30 | self.num_samples = len(self.images) 31 | 32 | self.class_name = [ 33 | '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 34 | 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 35 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 36 | 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 37 | 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 38 | 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 39 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 40 | 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 41 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 42 | 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 43 | 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 44 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 45 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] 46 | self._valid_ids = [ 47 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 48 | 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 49 | 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 50 | 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 51 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 52 | 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 53 | 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 54 | 82, 84, 85, 86, 87, 88, 89, 90] 55 | if cfg.class_name != '*' : 56 | self._valid_ids = [self.class_name.index(cfg.class_name)] 57 | self.class_name = [cfg.class_name] 58 | catIds = self.coco.getCatIds(self.class_name[-1]) 59 | assert catIds == self._valid_ids 60 | self.images = self.coco.getImgIds(self.images, catIds) 61 | self.num_samples = len(self.images) 62 | 63 | self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)} 64 | self.input_w = cfg.input_w 65 | self.input_h = cfg.input_h 66 | self.base_stride = cfg.base_stride 67 | self.base_window = cfg.base_window 68 | self.k = cfg.k 69 | self.num_class = len(self.class_name) 70 | 71 | self.augment=augment 72 | self.max_objs = cfg.max_objs 73 | self.jitter = cfg.jitter 74 | self.cfg = cfg 75 | if not self.augment: 76 | self.jitter = 0 77 | print('Loaded {} {} samples'.format(split, self.num_samples)) 78 | 79 | def __len__(self): 80 | return self.num_samples 81 | 82 | 83 | def _coco_box_to_bbox(self, box): 84 | bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],dtype=np.float32) 85 | return bbox 86 | 87 | def get_image_name(self,img_id): 88 | return os.path.join(self.img_dir,self.coco.loadImgs(ids=[self.images[img_id]])[0]['file_name']).strip() 89 | 90 | def _to_float(self, x): 91 | return float("{:.2f}".format(x)) 92 | 93 | def __getitem__(self, index): 94 | img_id = self.images[index] 95 | file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] 96 | img_path = os.path.join(self.img_dir, file_name) 97 | ann_ids = self.coco.getAnnIds(imgIds=[img_id]) 98 | anns = self.coco.loadAnns(ids=ann_ids) 99 | anns = list(filter(lambda x: x['category_id'] in self._valid_ids and x['iscrowd'] != 1, anns)) 100 | image = cv2.imread(img_path) 101 | 102 | ## augment 103 | height, width = image.shape[0], image.shape[1] 104 | dw, dh = self.jitter * width, self.jitter * height 105 | new_ar = (width + np.random.uniform(-dw, dw)) / (height + np.random.uniform(-dh, dh)) 106 | sclae = 1 107 | if new_ar < 1: 108 | new_h = sclae * self.input_h 109 | new_w = new_ar * new_h 110 | else: 111 | new_w = sclae * self.input_w 112 | new_h = new_w / new_ar 113 | 114 | dx, dy = (np.random.uniform(0, self.input_w - new_w), np.random.uniform(0, self.input_h - new_h)) \ 115 | if self.augment else ((self.input_w - new_w) / 2, (self.input_h - new_h) / 2) 116 | 117 | flipped = False 118 | if np.random.random() < 0.5 and self.augment: 119 | image = np.copy(image[:, ::-1, :]) 120 | flipped = True 121 | 122 | src = np.array([[0, 0], [0, height], [width, 0]], dtype=np.float32) 123 | dst = np.array([[dx, dy], [dx, new_h + dy], [new_w + dx, dy]], dtype=np.float32) 124 | trans_input = cv2.getAffineTransform(src, dst) 125 | image = cv2.warpAffine(image, trans_input, (self.input_w, self.input_h), 126 | flags=cv2.INTER_LINEAR, borderValue=(0, 0, 0)) 127 | show = image.copy() 128 | image = (image.astype(np.float32) / 255.) 129 | image = (image- self.mean) / self.std 130 | image = image.transpose(2, 0, 1) 131 | 132 | strides = np.array([self.base_stride*2**i for i in range(self.k+1)]) 133 | windows = np.array([self.base_window*lamda for lamda in strides],np.int32) 134 | 135 | output_size = np.array(list(zip(self.input_w // strides, self.input_h // strides))) 136 | num_det = [output_w*output_h for output_w, output_h in output_size] 137 | det_offset = np.cumsum(num_det) 138 | label_conf = np.zeros((sum(num_det)),dtype=np.int64) 139 | xywh = np.zeros((self.max_objs, 7), dtype=np.float32) # x1 y1 x2 y2 ct_x ct_y stride 140 | ind = np.zeros((self.max_objs), dtype=np.int64) 141 | reg_mask = np.zeros((self.max_objs), dtype=np.uint8) 142 | 143 | seg = [np.zeros((self.max_objs,window//self.base_stride,window//self.base_stride),dtype=np.float32) for window in windows] 144 | seg_ind = [np.zeros((self.max_objs),dtype=np.int64) for _ in windows] 145 | seg_mask = [np.zeros((self.max_objs),dtype=np.uint8) for _ in windows] 146 | num_objs = min(len(anns),self.max_objs) 147 | 148 | if num_objs > 0 : 149 | np.random.shuffle(anns) 150 | for k in range(num_objs): 151 | ann = anns[k] 152 | bbox = self._coco_box_to_bbox(ann['bbox']) 153 | cls_id = int(self.cat_ids[ann['category_id']]) 154 | segment = self.coco.annToMask(ann) 155 | if flipped: 156 | bbox[[0, 2]] = width - bbox[[2, 0]] - 1 157 | segment = segment[:, ::-1] 158 | bbox[:2] = affine_transform(bbox[:2], trans_input) 159 | bbox[2:] = affine_transform(bbox[2:], trans_input) 160 | bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.input_w - 1) 161 | bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.input_h - 1) 162 | 163 | w, h = bbox[2:] - bbox[:2] 164 | max_edge = max(w, h) 165 | min_edge = min(w, h) 166 | ratio = max_edge / windows 167 | window_mask = (ratio >= 0.5) * (ratio <= 1.) ## window > max(w,h) > window/2 168 | best_window = windows[window_mask] 169 | if len(best_window) == 0 and \ 170 | min_edge > 0 and \ 171 | min_edge < windows[0]: ### for small guys 172 | best_window = [windows[0]] 173 | window_mask[0] = True 174 | 175 | feat_stride = strides[window_mask] 176 | feat_size = output_size[window_mask] 177 | window_offset = det_offset[window_mask] 178 | if len(best_window) > 0 and min_edge > 0: ## min_edge must > 0 179 | segment = cv2.warpAffine(segment, trans_input, 180 | (self.input_w, self.input_h), 181 | flags=cv2.INTER_LINEAR) 182 | ct = np.array( 183 | [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) 184 | 185 | for window_id in range(len(best_window)): 186 | best_window_size = best_window[window_id] 187 | feat_w, feat_h = feat_size[window_id] 188 | stride = feat_stride[window_id] 189 | 190 | xx, yy = np.arange(0, feat_w), np.arange(0, feat_h) 191 | xx, yy = (xx + 0.5) * stride, (yy + 0.5) * stride 192 | ct_feat_x, ct_feat_y = np.argmin(np.abs(ct[0] - xx)), np.argmin( 193 | np.abs(ct[1] - yy)) ## window ct close to box ct 194 | ct_img_x, ct_img_y = int(xx[ct_feat_x]), int(yy[ct_feat_y]) 195 | paded_segmnet = np.pad(segment, ((best_window_size // 2, best_window_size // 2), 196 | (best_window_size // 2, best_window_size // 2)), 'constant', 197 | constant_values=0) 198 | window_segment = paded_segmnet[ct_img_y: ct_img_y + best_window_size, 199 | ct_img_x: ct_img_x + best_window_size] 200 | 201 | feat_offset = window_offset[window_id] - feat_w * feat_h 202 | output_offset = ct_feat_y * feat_w + ct_feat_x 203 | label_conf[feat_offset + output_offset] = (cls_id + 1) 204 | 205 | xywh[k, 0:4] = bbox[0:4] 206 | xywh[k, 4:6] = ct_feat_x, ct_feat_y 207 | xywh[k, 6] = stride 208 | 209 | ind[k] = feat_offset + output_offset 210 | reg_mask[k] = 1 211 | 212 | window_segment = cv2.resize(window_segment, (best_window_size // self.base_stride, 213 | best_window_size // self.base_stride)) 214 | window_index = windows.tolist().index(best_window_size) 215 | seg[window_index][k] = window_segment.astype(np.float32).copy() 216 | 217 | seg_ind[window_index][k] = output_offset 218 | seg_mask[window_index][k] = 1 219 | 220 | ret = {'input':image ,'cls':label_conf,'ind': ind, 'xywh':xywh ,'reg_mask':reg_mask} 221 | for i in range(len(windows)): 222 | ret['seg_%d'%i] = seg[i] 223 | ret['seg_ind_%d' % i] = seg_ind[i] 224 | ret['seg_mask_%d' % i] = seg_mask[i] 225 | 226 | if self.cfg.test : 227 | ret['img'] = show 228 | 229 | return ret 230 | 231 | if __name__ == '__main__': 232 | from config import cfg 233 | import torch 234 | data = COCO(cfg,split='val',augment=False) 235 | 236 | for i,t in enumerate(data): 237 | print(i) 238 | -------------------------------------------------------------------------------- /lib/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import cv2 4 | from torch.utils.data import Sampler,RandomSampler,SequentialSampler,DataLoader 5 | import os 6 | import time 7 | import sys 8 | import math 9 | from torch.optim import Optimizer 10 | 11 | class AverageMeter(object): 12 | """Computes and stores the average and current value""" 13 | def __init__(self): 14 | self.reset() 15 | 16 | def reset(self): 17 | self.val = 0 18 | self.avg = 0 19 | self.sum = 0 20 | self.count = 0 21 | 22 | def update(self, val, n=1): 23 | self.val = val 24 | self.sum += val * n 25 | self.count += n 26 | if self.count > 0: 27 | self.avg = self.sum / self.count 28 | 29 | 30 | def load_model(model, model_path, optimizer=None, resume=False, 31 | lr=None, lr_step=None): 32 | start_epoch = 0 33 | checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) 34 | print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch'])) 35 | state_dict_ = checkpoint['state_dict'] 36 | state_dict = {} 37 | 38 | # convert data_parallal to model 39 | for k in state_dict_: 40 | if k.startswith('module') and not k.startswith('module_list'): 41 | state_dict[k[7:]] = state_dict_[k] 42 | else: 43 | state_dict[k] = state_dict_[k] 44 | model_state_dict = model.state_dict() 45 | 46 | # check loaded parameters and created model parameters 47 | msg = 'If you see this, your model does not fully load the ' + \ 48 | 'pre-trained weight. Please make sure ' + \ 49 | 'you have correctly specified --arch xxx ' + \ 50 | 'or set the correct --num_classes for your own dataset.' 51 | for k in state_dict: 52 | if k in model_state_dict: 53 | if state_dict[k].shape != model_state_dict[k].shape: 54 | print('Skip loading parameter {}, required shape{}, ' \ 55 | 'loaded shape{}. {}'.format( 56 | k, model_state_dict[k].shape, state_dict[k].shape, msg)) 57 | state_dict[k] = model_state_dict[k] 58 | else: 59 | print('Drop parameter {}.'.format(k) + msg) 60 | for k in model_state_dict: 61 | if not (k in state_dict): 62 | print('No param {}.'.format(k) + msg) 63 | state_dict[k] = model_state_dict[k] 64 | model.load_state_dict(state_dict, strict=False) 65 | 66 | # resume optimizer parameters 67 | if optimizer is not None and resume: 68 | if 'optimizer' in checkpoint: 69 | optimizer.load_state_dict(checkpoint['optimizer']) 70 | start_epoch = checkpoint['epoch'] 71 | start_lr = lr 72 | for step in lr_step: 73 | if start_epoch >= step: 74 | start_lr *= 0.1 75 | for param_group in optimizer.param_groups: 76 | param_group['lr'] = start_lr 77 | print('Resumed optimizer with start lr', start_lr) 78 | else: 79 | print('No optimizer parameters in checkpoint.') 80 | if optimizer is not None: 81 | return model, optimizer, start_epoch 82 | else: 83 | return model 84 | 85 | 86 | def save_model(path, epoch, model, optimizer=None): 87 | if isinstance(model, torch.nn.DataParallel): 88 | state_dict = model.module.state_dict() 89 | else: 90 | state_dict = model.state_dict() 91 | data = {'epoch': epoch, 92 | 'state_dict': state_dict} 93 | if not (optimizer is None): 94 | data['optimizer'] = optimizer.state_dict() 95 | torch.save(data, path) 96 | 97 | 98 | 99 | 100 | class BatchSampler(object): 101 | def __init__(self, sampler, batch_size, drop_last,multiscale_step=None,img_sizes = None): 102 | if not isinstance(sampler, Sampler): 103 | raise ValueError("sampler should be an instance of " 104 | "torch.utils.data.Sampler, but got sampler={}" 105 | .format(sampler)) 106 | if not isinstance(drop_last, bool): 107 | raise ValueError("drop_last should be a boolean value, but got " 108 | "drop_last={}".format(drop_last)) 109 | self.sampler = sampler 110 | self.batch_size = batch_size 111 | self.drop_last = drop_last 112 | if multiscale_step is not None and multiscale_step < 1 : 113 | raise ValueError("multiscale_step should be > 0, but got " 114 | "multiscale_step={}".format(multiscale_step)) 115 | if multiscale_step is not None and img_sizes is None: 116 | raise ValueError("img_sizes must a list, but got img_sizes={} ".format(img_sizes)) 117 | 118 | self.multiscale_step = multiscale_step 119 | self.img_sizes = np.array(img_sizes) 120 | 121 | def __iter__(self): 122 | num_batch = 0 123 | batch = [] 124 | size = [608,608] 125 | for idx in self.sampler: 126 | batch.append([idx,*size]) 127 | if len(batch) == self.batch_size: 128 | yield batch 129 | num_batch+=1 130 | batch = [] 131 | if self.multiscale_step and num_batch % self.multiscale_step == 0 : 132 | size = self.img_sizes[np.random.randint(0,len(self.img_sizes))] 133 | if len(batch) > 0 and not self.drop_last: 134 | yield batch 135 | 136 | def __len__(self): 137 | if self.drop_last: 138 | return len(self.sampler) // self.batch_size 139 | else: 140 | return (len(self.sampler) + self.batch_size - 1) // self.batch_size 141 | 142 | class AdaBound(Optimizer): 143 | """Implements AdaBound algorithm. 144 | It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_. 145 | Arguments: 146 | params (iterable): iterable of parameters to optimize or dicts defining 147 | parameter groups 148 | lr (float, optional): Adam learning rate (default: 1e-3) 149 | betas (Tuple[float, float], optional): coefficients used for computing 150 | running averages of gradient and its square (default: (0.9, 0.999)) 151 | final_lr (float, optional): final (SGD) learning rate (default: 0.1) 152 | gamma (float, optional): convergence speed of the bound functions (default: 1e-3) 153 | eps (float, optional): term added to the denominator to improve 154 | numerical stability (default: 1e-8) 155 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 156 | amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm 157 | .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate: 158 | https://openreview.net/forum?id=Bkg3g2R9FX 159 | """ 160 | 161 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3, 162 | eps=1e-8, weight_decay=0, amsbound=False): 163 | if not 0.0 <= lr: 164 | raise ValueError("Invalid learning rate: {}".format(lr)) 165 | if not 0.0 <= eps: 166 | raise ValueError("Invalid epsilon value: {}".format(eps)) 167 | if not 0.0 <= betas[0] < 1.0: 168 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) 169 | if not 0.0 <= betas[1] < 1.0: 170 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) 171 | if not 0.0 <= final_lr: 172 | raise ValueError("Invalid final learning rate: {}".format(final_lr)) 173 | if not 0.0 <= gamma < 1.0: 174 | raise ValueError("Invalid gamma parameter: {}".format(gamma)) 175 | defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps, 176 | weight_decay=weight_decay, amsbound=amsbound) 177 | super(AdaBound, self).__init__(params, defaults) 178 | 179 | self.base_lrs = list(map(lambda group: group['lr'], self.param_groups)) 180 | 181 | def __setstate__(self, state): 182 | super(AdaBound, self).__setstate__(state) 183 | for group in self.param_groups: 184 | group.setdefault('amsbound', False) 185 | 186 | def step(self, closure=None): 187 | """Performs a single optimization step. 188 | Arguments: 189 | closure (callable, optional): A closure that reevaluates the model 190 | and returns the loss. 191 | """ 192 | loss = None 193 | if closure is not None: 194 | loss = closure() 195 | 196 | for group, base_lr in zip(self.param_groups, self.base_lrs): 197 | for p in group['params']: 198 | if p.grad is None: 199 | continue 200 | grad = p.grad.data 201 | if grad.is_sparse: 202 | raise RuntimeError( 203 | 'Adam does not support sparse gradients, please consider SparseAdam instead') 204 | amsbound = group['amsbound'] 205 | 206 | state = self.state[p] 207 | 208 | # State initialization 209 | if len(state) == 0: 210 | state['step'] = 0 211 | # Exponential moving average of gradient values 212 | state['exp_avg'] = torch.zeros_like(p.data) 213 | # Exponential moving average of squared gradient values 214 | state['exp_avg_sq'] = torch.zeros_like(p.data) 215 | if amsbound: 216 | # Maintains max of all exp. moving avg. of sq. grad. values 217 | state['max_exp_avg_sq'] = torch.zeros_like(p.data) 218 | 219 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 220 | if amsbound: 221 | max_exp_avg_sq = state['max_exp_avg_sq'] 222 | beta1, beta2 = group['betas'] 223 | 224 | state['step'] += 1 225 | 226 | if group['weight_decay'] != 0: 227 | grad = grad.add(group['weight_decay'], p.data) 228 | 229 | # Decay the first and second moment running average coefficient 230 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 231 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 232 | if amsbound: 233 | # Maintains the maximum of all 2nd moment running avg. till now 234 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) 235 | # Use the max. for normalizing running avg. of gradient 236 | denom = max_exp_avg_sq.sqrt().add_(group['eps']) 237 | else: 238 | denom = exp_avg_sq.sqrt().add_(group['eps']) 239 | 240 | bias_correction1 = 1 - beta1 ** state['step'] 241 | bias_correction2 = 1 - beta2 ** state['step'] 242 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 243 | 244 | # Applies bounds on actual learning rate 245 | # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay 246 | final_lr = group['final_lr'] * group['lr'] / base_lr 247 | lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1)) 248 | upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step'])) 249 | step_size = torch.full_like(denom, step_size) 250 | step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg) 251 | 252 | p.data.add_(-step_size) 253 | 254 | return loss 255 | 256 | 257 | 258 | USE_TENSORBOARD = True 259 | try: 260 | import tensorboardX 261 | 262 | print('Using tensorboardX') 263 | except: 264 | USE_TENSORBOARD = False 265 | 266 | 267 | class Logger(object): 268 | def __init__(self, opt): 269 | """Create a summary writer logging to log_dir.""" 270 | if not os.path.exists(opt.save_dir): 271 | os.makedirs(opt.save_dir) 272 | 273 | 274 | time_str = time.strftime('%Y-%m-%d-%H-%M') 275 | 276 | args = dict((name, getattr(opt, name)) for name in dir(opt) 277 | if not name.startswith('_')) 278 | file_name = os.path.join(opt.save_dir, 'opt.txt') 279 | with open(file_name, 'wt') as opt_file: 280 | opt_file.write('==> torch version: {}\n'.format(torch.__version__)) 281 | opt_file.write('==> cudnn version: {}\n'.format( 282 | torch.backends.cudnn.version())) 283 | opt_file.write('==> Cmd:\n') 284 | opt_file.write(str(sys.argv)) 285 | opt_file.write('\n==> Opt:\n') 286 | for k, v in sorted(args.items()): 287 | opt_file.write(' %s: %s\n' % (str(k), str(v))) 288 | 289 | log_dir = opt.save_dir + '/logs_{}'.format(time_str) 290 | if USE_TENSORBOARD: 291 | self.writer = tensorboardX.SummaryWriter(log_dir=log_dir) 292 | else: 293 | if not os.path.exists(os.path.dirname(log_dir)): 294 | os.mkdir(os.path.dirname(log_dir)) 295 | if not os.path.exists(log_dir): 296 | os.mkdir(log_dir) 297 | self.log = open(log_dir + '/log.txt', 'w') 298 | try: 299 | os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir)) 300 | except: 301 | pass 302 | self.start_line = True 303 | 304 | def write(self, txt): 305 | if self.start_line: 306 | time_str = time.strftime('%Y-%m-%d-%H-%M') 307 | self.log.write('{}: {}'.format(time_str, txt)) 308 | else: 309 | self.log.write(txt) 310 | self.start_line = False 311 | if '\n' in txt: 312 | self.start_line = True 313 | self.log.flush() 314 | 315 | def close(self): 316 | self.log.close() 317 | 318 | def scalar_summary(self, tag, value, step): 319 | """Log a scalar variable.""" 320 | if USE_TENSORBOARD: 321 | self.writer.add_scalar(tag, value, step) -------------------------------------------------------------------------------- /models/ops/align2nat/src/swap_align2nat_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | 6 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 7 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ 8 | i += blockDim.x * gridDim.x) 9 | 10 | #define THREADS_PER_BLOCK 1024 11 | 12 | inline int GET_BLOCKS(const int N) { 13 | int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; 14 | int max_block_num = 65000; 15 | return min(optimal_block_num, max_block_num); 16 | } 17 | 18 | template 19 | __device__ __forceinline__ static accscalar_t area_pixel_compute_source_index( 20 | accscalar_t scale, 21 | int dst_index, 22 | bool align_corners) { 23 | if (align_corners) { 24 | return scale * dst_index; 25 | } else { 26 | accscalar_t src_idx = scale * (dst_index + static_cast(0.5)) - 27 | static_cast(0.5); 28 | // See Note[Follow Opencv resize logic] 29 | return (src_idx < static_cast(0)) 30 | ? static_cast(0) 31 | : src_idx; 32 | } 33 | } 34 | 35 | __device__ __forceinline__ size_t 36 | loaction(const size_t n,const size_t v,const size_t u,const size_t y,const size_t x, 37 | const size_t V,const size_t U,const size_t H,const size_t W) { 38 | return x + W*(y + H*(u + U*(v + V*n))); 39 | } 40 | 41 | template 42 | __device__ __forceinline__ scalar_t get_val(const scalar_t*data, 43 | const size_t n,const size_t v,const size_t u,const size_t y,const size_t x, 44 | const size_t V,const size_t U,const size_t H,const size_t W,const scalar_t pad_val) { 45 | if (x <0 || x >= W || y < 0 || y >= H ){ 46 | return pad_val; 47 | }else{ 48 | return data[x + W*(y + H*(u + U*(v + V*n)))]; 49 | } 50 | } 51 | 52 | template 53 | __global__ void SwapAlign2NatForward(const int nthreads, const scalar_t *bottom_data,scalar_t *top_data, 54 | const accscalar_t scaleV,const accscalar_t scaleU, 55 | const int newV,const int newU, 56 | const int newH,const int newW, 57 | const accscalar_t scaleH,const accscalar_t scaleW, 58 | const int orgV,const int orgU, 59 | const int orgH,const int orgW, 60 | const int alpha,const bool align_corners,const scalar_t pad_val 61 | ) { 62 | const float v_offset = -newV/2; 63 | const float u_offset = -newU/2; 64 | int n,ov,ou,oh,ow,bottom_h,bottom_w; 65 | CUDA_1D_KERNEL_LOOP(index,nthreads){ 66 | // (n, ov, ou, oh, ow) is an element in the top_data 67 | ow = index % newW; 68 | oh = (index / newW) % newH; 69 | ou = (index / newW / newH) % newU; 70 | ov = (index / newW / newH / newU) % newV; 71 | n = index / newW / newH / newU / newV; 72 | if (newV==orgV && newU==orgU && newW==orgW && newH==orgH){ 73 | bottom_h = oh + alpha * (ov + v_offset); 74 | bottom_w = ow + alpha * (ou + u_offset); 75 | top_data[index] = get_val(bottom_data,n,ov,ou,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val); 76 | 77 | } else { 78 | // h,w 79 | 80 | const accscalar_t h1r = area_pixel_compute_source_index( 81 | scaleH, oh, align_corners); 82 | const int h1 = h1r; 83 | const int h1p = (h1 < orgH - 1) ? 1 : 0; 84 | const accscalar_t h1lambda = h1r - h1; 85 | const accscalar_t h0lambda = static_cast(1) - h1lambda; 86 | // 87 | const accscalar_t w1r = area_pixel_compute_source_index( 88 | scaleW, ow, align_corners); 89 | const int w1 = w1r; 90 | const int w1p = (w1 < orgW - 1) ? 1 : 0; 91 | const accscalar_t w1lambda = w1r - w1; 92 | const accscalar_t w0lambda = static_cast(1) - w1lambda; 93 | 94 | 95 | // v,u 96 | 97 | const accscalar_t v1r = area_pixel_compute_source_index( 98 | scaleV, ov, align_corners); 99 | const int v1 = v1r; 100 | const int v1p = (v1 < orgV - 1) ? 1 : 0; 101 | const accscalar_t v1lambda = v1r - v1; 102 | const accscalar_t v0lambda = static_cast(1) - v1lambda; 103 | 104 | 105 | const accscalar_t u1r = area_pixel_compute_source_index( 106 | scaleU, ou, align_corners); 107 | const int u1 = u1r; 108 | const int u1p = (u1 < orgU - 1) ? 1 : 0; 109 | const accscalar_t u1lambda = u1r - u1; 110 | const accscalar_t u0lambda = static_cast(1) - u1lambda; 111 | 112 | accscalar_t h0w0,h0w1,h1w0,h1w1; 113 | 114 | bottom_h = h1 + alpha * (ov + v_offset); 115 | bottom_w = w1 + alpha * (ou + u_offset); 116 | h0w0 = v0lambda * u0lambda * static_cast(get_val(bottom_data,n,v1,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val))+ 117 | v0lambda * u1lambda * static_cast(get_val(bottom_data,n,v1,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) + 118 | v1lambda * u0lambda * static_cast(get_val(bottom_data,n,v1+v1p,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) + 119 | v1lambda * u1lambda * static_cast(get_val(bottom_data,n,v1+v1p,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)); 120 | 121 | bottom_h = h1 + alpha * (ov + v_offset); 122 | bottom_w = w1 + w1p + alpha * (ou + u_offset); 123 | h0w1 = v0lambda * u0lambda * static_cast(get_val(bottom_data,n,v1,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val))+ 124 | v0lambda * u1lambda * static_cast(get_val(bottom_data,n,v1,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) + 125 | v1lambda * u0lambda * static_cast(get_val(bottom_data,n,v1+v1p,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) + 126 | v1lambda * u1lambda * static_cast(get_val(bottom_data,n,v1+v1p,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)); 127 | 128 | bottom_h = h1 + h1p + alpha * (ov + v_offset); 129 | bottom_w = w1 + alpha * (ou + u_offset); 130 | h1w0 = v0lambda * u0lambda * static_cast(get_val(bottom_data,n,v1,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val))+ 131 | v0lambda * u1lambda * static_cast(get_val(bottom_data,n,v1,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) + 132 | v1lambda * u0lambda * static_cast(get_val(bottom_data,n,v1+v1p,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) + 133 | v1lambda * u1lambda * static_cast(get_val(bottom_data,n,v1+v1p,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)); 134 | 135 | bottom_h = h1 + h1p + alpha * (ov + v_offset); 136 | bottom_w = w1 + w1p + alpha * (ou + u_offset); 137 | h1w1 = v0lambda * u0lambda * static_cast(get_val(bottom_data,n,v1,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val))+ 138 | v0lambda * u1lambda * static_cast(get_val(bottom_data,n,v1,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) + 139 | v1lambda * u0lambda * static_cast(get_val(bottom_data,n,v1+v1p,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) + 140 | v1lambda * u1lambda * static_cast(get_val(bottom_data,n,v1+v1p,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)); 141 | 142 | const accscalar_t val = h0lambda * w0lambda * h0w0 + 143 | h0lambda * w1lambda * h0w1 + 144 | h1lambda * w0lambda * h1w0 + 145 | h1lambda * w1lambda * h1w1 ; 146 | 147 | top_data[index] = static_cast(val); 148 | } 149 | 150 | } 151 | } 152 | 153 | 154 | template 155 | __global__ void SwapAlign2NatBackward(const int nthreads, const scalar_t *bottom_data,scalar_t *top_data, 156 | const accscalar_t scaleV,const accscalar_t scaleU, 157 | const int newV,const int newU, 158 | const int newH,const int newW, 159 | const accscalar_t scaleH,const accscalar_t scaleW, 160 | const int orgV,const int orgU, 161 | const int orgH,const int orgW, 162 | const int alpha,const bool align_corners 163 | ) { 164 | const int v_offset = -newV/2; 165 | const int u_offset = -newU/2; 166 | int n,ov,ou,oh,ow,top_h,top_w; 167 | size_t top_offset ; 168 | CUDA_1D_KERNEL_LOOP(index,nthreads){ 169 | // (n, ov, ou, oh, ow) is an element in the bottom_data 170 | ow = index % newW; 171 | oh = (index / newW) % newH; 172 | ou = (index / newW / newH) % newU; 173 | ov = (index / newW / newH / newU) % newV; 174 | n = index / newW / newH / newU / newV; 175 | if (newV==orgV && newU==orgU && newW==orgW && newH==orgH){ 176 | top_h = oh + alpha * (ov + v_offset); 177 | top_w = ow + alpha * (ou + u_offset); 178 | if (!(top_w <0 || top_w >= orgW || top_h < 0 || top_h >= orgH)) 179 | { 180 | top_offset = loaction(n,ov,ou,top_h,top_w,newV,newU,newH,newW); 181 | top_data[top_offset] = bottom_data[index]; 182 | } 183 | } else { 184 | // h,w 185 | 186 | const accscalar_t h1r = area_pixel_compute_source_index( 187 | scaleH, oh, align_corners); 188 | const int h1 = h1r; 189 | const int h1p = (h1 < orgH - 1) ? 1 : 0; 190 | const accscalar_t h1lambda = h1r - h1; 191 | const accscalar_t h0lambda = static_cast(1) - h1lambda; 192 | // 193 | const accscalar_t w1r = area_pixel_compute_source_index( 194 | scaleW, ow, align_corners); 195 | const int w1 = w1r; 196 | const int w1p = (w1 < orgW - 1) ? 1 : 0; 197 | const accscalar_t w1lambda = w1r - w1; 198 | const accscalar_t w0lambda = static_cast(1) - w1lambda; 199 | 200 | 201 | // v,u 202 | 203 | const accscalar_t v1r = area_pixel_compute_source_index( 204 | scaleV, ov, align_corners); 205 | const int v1 = v1r; 206 | const int v1p = (v1 < orgV - 1) ? 1 : 0; 207 | const accscalar_t v1lambda = v1r - v1; 208 | const accscalar_t v0lambda = static_cast(1) - v1lambda; 209 | 210 | 211 | const accscalar_t u1r = area_pixel_compute_source_index( 212 | scaleU, ou, align_corners); 213 | const int u1 = u1r; 214 | const int u1p = (u1 < orgU - 1) ? 1 : 0; 215 | const accscalar_t u1lambda = u1r - u1; 216 | const accscalar_t u0lambda = static_cast(1) - u1lambda; 217 | 218 | const accscalar_t d2val = static_cast(bottom_data[index]); 219 | 220 | top_h = h1 + alpha * (ov + v_offset); 221 | top_w = w1 + alpha * (ou + u_offset); 222 | if (!(top_w <0 || top_w >= orgW || top_h < 0 || top_h >= orgH) ){ 223 | top_offset = loaction(n,v1,u1,top_h,top_w,orgV,orgU,orgH,orgW); 224 | atomicAdd(top_data + top_offset, static_cast(h0lambda * w0lambda * v0lambda * u0lambda * d2val)); 225 | top_offset = loaction(n,v1,u1 + u1p,top_h,top_w,orgV,orgU,orgH,orgW); 226 | atomicAdd(top_data + top_offset, static_cast(h0lambda * w0lambda * v0lambda * u1lambda * d2val)); 227 | top_offset = loaction(n,v1+v1p,u1,top_h,top_w,orgV,orgU,orgH,orgW); 228 | atomicAdd(top_data + top_offset, static_cast(h0lambda * w0lambda * v1lambda * u0lambda * d2val)); 229 | top_offset = loaction(n,v1+v1p,u1+u1p,top_h,top_w,orgV,orgU,orgH,orgW); 230 | atomicAdd(top_data + top_offset, static_cast(h0lambda * w0lambda * v1lambda * u1lambda * d2val)); 231 | } 232 | 233 | top_h = h1 + alpha * (ov + v_offset); 234 | top_w = w1 + w1p + alpha * (ou + u_offset); 235 | if (!(top_w <0 || top_w >= orgW || top_h < 0 || top_h >= orgH) ){ 236 | top_offset = loaction(n,v1,u1,top_h,top_w,orgV,orgU,orgH,orgW); 237 | atomicAdd(top_data + top_offset, static_cast(h0lambda * w1lambda * v0lambda * u0lambda * d2val)); 238 | top_offset = loaction(n,v1,u1 + u1p,top_h,top_w,orgV,orgU,orgH,orgW); 239 | atomicAdd(top_data + top_offset, static_cast(h0lambda * w1lambda * v0lambda * u1lambda * d2val)); 240 | top_offset = loaction(n,v1+v1p,u1,top_h,top_w,orgV,orgU,orgH,orgW); 241 | atomicAdd(top_data + top_offset, static_cast(h0lambda * w1lambda * v1lambda * u0lambda * d2val)); 242 | top_offset = loaction(n,v1+v1p,u1+u1p,top_h,top_w,orgV,orgU,orgH,orgW); 243 | atomicAdd(top_data + top_offset, static_cast(h0lambda * w1lambda * v1lambda * u1lambda * d2val)); 244 | } 245 | 246 | top_h = h1 + h1p + alpha * (ov + v_offset); 247 | top_w = w1 + alpha * (ou + u_offset); 248 | if (!(top_w <0 || top_w >= orgW || top_h < 0 || top_h >= orgH) ){ 249 | top_offset = loaction(n,v1,u1,top_h,top_w,orgV,orgU,orgH,orgW); 250 | atomicAdd(top_data + top_offset, static_cast(h1lambda * w0lambda * v0lambda * u0lambda * d2val)); 251 | top_offset = loaction(n,v1,u1 + u1p,top_h,top_w,orgV,orgU,orgH,orgW); 252 | atomicAdd(top_data + top_offset, static_cast(h1lambda * w0lambda * v0lambda * u1lambda * d2val)); 253 | top_offset = loaction(n,v1+v1p,u1,top_h,top_w,orgV,orgU,orgH,orgW); 254 | atomicAdd(top_data + top_offset, static_cast(h1lambda * w0lambda * v1lambda * u0lambda * d2val)); 255 | top_offset = loaction(n,v1+v1p,u1+u1p,top_h,top_w,orgV,orgU,orgH,orgW); 256 | atomicAdd(top_data + top_offset, static_cast(h1lambda * w0lambda * v1lambda * u1lambda * d2val)); 257 | } 258 | 259 | top_h = h1 + h1p + alpha * (ov + v_offset); 260 | top_w = w1 + w1p + alpha * (ou + u_offset); 261 | if (!(top_w <0 || top_w >= orgW || top_h < 0 || top_h >= orgH) ){ 262 | top_offset = loaction(n,v1,u1,top_h,top_w,orgV,orgU,orgH,orgW); 263 | atomicAdd(top_data + top_offset, static_cast(h1lambda * w1lambda * v0lambda * u0lambda * d2val)); 264 | top_offset = loaction(n,v1,u1 + u1p,top_h,top_w,orgV,orgU,orgH,orgW); 265 | atomicAdd(top_data + top_offset, static_cast(h1lambda * w1lambda * v0lambda * u1lambda * d2val)); 266 | top_offset = loaction(n,v1+v1p,u1,top_h,top_w,orgV,orgU,orgH,orgW); 267 | atomicAdd(top_data + top_offset, static_cast(h1lambda * w1lambda * v1lambda * u0lambda * d2val)); 268 | top_offset = loaction(n,v1+v1p,u1+u1p,top_h,top_w,orgV,orgU,orgH,orgW); 269 | atomicAdd(top_data + top_offset, static_cast(h1lambda * w1lambda * v1lambda * u1lambda * d2val)); 270 | } 271 | } 272 | 273 | } 274 | } 275 | 276 | 277 | template 278 | static inline scalar_t area_pixel_compute_scale( 279 | int64_t input_size, 280 | int64_t output_size, 281 | bool align_corners) { 282 | 283 | if (output_size > 1) { 284 | return align_corners 285 | ? static_cast(input_size - 1) / (output_size - 1) 286 | : static_cast(input_size) / output_size; 287 | } else { 288 | return scalar_t(0); 289 | } 290 | } 291 | int SwapAlign2NatForwardLaucher(const at::Tensor& input,at::Tensor& output, 292 | const int alpha,const bool align_corners,const float pad_val){ 293 | const int B = output.size(0); 294 | const int newV = output.size(1); 295 | const int newU = output.size(2); 296 | const int newH = output.size(3); 297 | const int newW = output.size(4); 298 | const int orgV = input.size(1); 299 | const int orgU = input.size(2); 300 | const int orgH = input.size(3); 301 | const int orgW = input.size(4); 302 | const int output_size = B*newV*newU*newH*newW; 303 | AT_DISPATCH_FLOATING_TYPES_AND_HALF( 304 | input.type(), "SwapAlign2NatForwardLaucher", ([&] { 305 | const scalar_t *bottom_data = input.data(); 306 | scalar_t *top_data = output.data(); 307 | using accscalar_t = at::acc_type; 308 | const accscalar_t rV = area_pixel_compute_scale( 309 | orgV, newV, align_corners); 310 | const accscalar_t rU = area_pixel_compute_scale( 311 | orgU, newV, align_corners); 312 | const accscalar_t rH = area_pixel_compute_scale( 313 | orgH, newH, align_corners); 314 | const accscalar_t rW = area_pixel_compute_scale( 315 | orgW, newW, align_corners); 316 | SwapAlign2NatForward 317 | <<>>(output_size,bottom_data,top_data, 318 | rV,rU,newV,newU,newH,newW, 319 | rH,rW,orgV,orgU,orgH,orgW, 320 | alpha,align_corners,static_cast(pad_val) 321 | ); 322 | })); 323 | THCudaCheck(cudaGetLastError()); 324 | return 1; 325 | } 326 | 327 | 328 | int SwapAlign2NatBackwardLaucher(const at::Tensor& grad_output,at::Tensor& grad_input, 329 | const int alpha,const bool align_corners){ 330 | int B = grad_output.size(0); 331 | int newV = grad_output.size(1); 332 | int newU = grad_output.size(2); 333 | int newH = grad_output.size(3); 334 | int newW = grad_output.size(4); 335 | int orgV = grad_input.size(1); 336 | int orgU = grad_input.size(2); 337 | int orgH = grad_input.size(3); 338 | int orgW = grad_input.size(4); 339 | const int output_size = B*newV*newU*newH*newW; 340 | AT_DISPATCH_FLOATING_TYPES_AND_HALF( 341 | grad_output.type(), "SwapAlign2NatBackwardLaucher", ([&] { 342 | const scalar_t *bottom_data = grad_output.data(); 343 | scalar_t *top_data = grad_input.data(); 344 | using accscalar_t = at::acc_type; 345 | const accscalar_t rV = area_pixel_compute_scale( 346 | orgV, newV, align_corners); 347 | const accscalar_t rU = area_pixel_compute_scale( 348 | orgU, newV, align_corners); 349 | const accscalar_t rH = area_pixel_compute_scale( 350 | orgH, newH, align_corners); 351 | const accscalar_t rW = area_pixel_compute_scale( 352 | orgW, newW, align_corners); 353 | SwapAlign2NatBackward 354 | <<>>(output_size,bottom_data,top_data, 355 | rV,rU,newV,newU,newH,newW, 356 | rH,rW,orgV,orgU,orgH,orgW, 357 | alpha,align_corners 358 | ); 359 | })); 360 | THCudaCheck(cudaGetLastError()); 361 | return 1; 362 | } --------------------------------------------------------------------------------