├── models
    ├── ops
    │   ├── align2nat
    │   │   ├── __init__.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   │   └── swap_align2nat.cpython-37.pyc
    │   │   │   └── swap_align2nat.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   │   └── roi_align.cpython-35.pyc
    │   │   │   └── swap_align2nat.py
    │   │   ├── setup.py
    │   │   └── src
    │   │   │   ├── swap_align2nat_cuda.cpp
    │   │   │   └── swap_align2nat_kernel.cu
    │   └── sigmoid_focal_loss
    │   │   ├── functions
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-37.pyc
    │   │       │   └── sigmoid_focal_loss.cpython-37.pyc
    │   │       └── sigmoid_focal_loss.py
    │   │   ├── modules
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-37.pyc
    │   │       │   └── sigmoid_focal_loss.cpython-37.pyc
    │   │       └── sigmoid_focal_loss.py
    │   │   ├── __init__.py
    │   │   ├── setup.py
    │   │   └── src
    │   │       ├── sigmoid_focal_loss.cpp
    │   │       └── sigmoid_focal_loss_cuda.cu
    ├── tensormask.py
    ├── losses.py
    ├── res_fpn.py
    └── detector.py
├── img
    ├── test.png
    └── test_2.png
├── README.md
├── config.py
├── demo.py
├── eval.py
├── exp
    └── coco_person
    │   ├── logs_2019-12-07-23-55
    │       ├── log.txt
    │       └── opt.txt
    │   └── opt.txt
├── train.py
├── show_pred_window.py
└── lib
    ├── trainer.py
    ├── optimer.py
    ├── coco.py
    └── utils.py


/models/ops/align2nat/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/models/ops/align2nat/functions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/ops/align2nat/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/functions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/img/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/img/test.png


--------------------------------------------------------------------------------
/img/test_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/img/test_2.png


--------------------------------------------------------------------------------
/models/ops/align2nat/modules/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/align2nat/modules/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/models/ops/align2nat/modules/__pycache__/roi_align.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/align2nat/modules/__pycache__/roi_align.cpython-35.pyc


--------------------------------------------------------------------------------
/models/ops/align2nat/functions/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/align2nat/functions/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/__init__.py:
--------------------------------------------------------------------------------
1 | from .modules.sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss
2 | 
3 | __all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss']
4 | 


--------------------------------------------------------------------------------
/models/ops/align2nat/functions/__pycache__/swap_align2nat.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/align2nat/functions/__pycache__/swap_align2nat.cpython-37.pyc


--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/functions/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/sigmoid_focal_loss/functions/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/modules/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/sigmoid_focal_loss/modules/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/functions/__pycache__/sigmoid_focal_loss.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/sigmoid_focal_loss/functions/__pycache__/sigmoid_focal_loss.cpython-37.pyc


--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/modules/__pycache__/sigmoid_focal_loss.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/sigmoid_focal_loss/modules/__pycache__/sigmoid_focal_loss.cpython-37.pyc


--------------------------------------------------------------------------------
/models/ops/align2nat/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | 
 4 | setup(
 5 |     name='align2nat_cuda',
 6 |     ext_modules=[
 7 |         CUDAExtension('swap_align2nat_cuda', [
 8 |             'src/swap_align2nat_cuda.cpp',
 9 |             'src/swap_align2nat_kernel.cu',
10 |         ]),
11 |     ],
12 |     cmdclass={'build_ext': BuildExtension})
13 | 


--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | 
 4 | setup(
 5 |     name='SigmoidFocalLoss',
 6 |     ext_modules=[
 7 |         CUDAExtension('sigmoid_focal_loss_cuda', [
 8 |             'src/sigmoid_focal_loss.cpp',
 9 |             'src/sigmoid_focal_loss_cuda.cu',
10 |         ]),
11 |     ],
12 |     cmdclass={'build_ext': BuildExtension})
13 | 


--------------------------------------------------------------------------------
/models/ops/align2nat/modules/swap_align2nat.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from ..functions.swap_align2nat import SwapAlign2NatFunction
 3 | 
 4 | 
 5 | class SwapAlign2Nat(Module):
 6 |     def __init__(self, alpha=1 ,lamda = 1, pad_val = -9.0 ,align_corners=True ):
 7 |         super(SwapAlign2Nat, self).__init__()
 8 |         self.alpha = alpha
 9 |         self.lamda = lamda
10 |         self.align_corners = align_corners
11 |         self.pad_val = pad_val
12 | 
13 |     def forward(self, features):
14 |         return SwapAlign2NatFunction.apply(features , self.alpha,self.lamda,self.pad_val,self.align_corners)
15 | 


--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/modules/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from ..functions.sigmoid_focal_loss import sigmoid_focal_loss
 4 | 
 5 | 
 6 | # TODO: remove this module
 7 | class SigmoidFocalLoss(nn.Module):
 8 | 
 9 |     def __init__(self, gamma, alpha):
10 |         super(SigmoidFocalLoss, self).__init__()
11 |         self.gamma = gamma
12 |         self.alpha = alpha
13 | 
14 |     def forward(self, logits, targets):
15 |         assert logits.is_cuda
16 |         loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha)
17 |         return loss.sum()
18 | 
19 |     def __repr__(self):
20 |         tmpstr = self.__class__.__name__ + "("
21 |         tmpstr += "gamma=" + str(self.gamma)
22 |         tmpstr += ", alpha=" + str(self.alpha)
23 |         tmpstr += ")"
24 |         return tmpstr
25 | 


--------------------------------------------------------------------------------
/models/ops/align2nat/functions/swap_align2nat.py:
--------------------------------------------------------------------------------
 1 | from torch.autograd import Function
 2 | 
 3 | from .. import swap_align2nat_cuda
 4 | 
 5 | class SwapAlign2NatFunction(Function):
 6 | 
 7 |     @staticmethod
 8 |     def forward(ctx, features,alpha,lamda,pad_val,align_corners):
 9 |         ctx.feature_size = features.size()
10 |         ctx.alpha = alpha
11 |         ctx.lamda = lamda
12 |         ctx.align_corners = align_corners
13 |         if features.is_cuda:
14 |             output=swap_align2nat_cuda.forward(features,alpha,lamda,align_corners,pad_val)
15 |         else:
16 |             raise NotImplementedError
17 |         return output
18 | 
19 |     @staticmethod
20 |     def backward(ctx, grad_output):
21 | 
22 |         feature_size = ctx.feature_size
23 |         alpha = ctx.alpha
24 |         lamda = ctx.lamda
25 |         align_corners = ctx.align_corners
26 |         assert (feature_size is not None and grad_output.is_cuda)
27 |         grad_input =  swap_align2nat_cuda.backward(grad_output.contiguous(),alpha,lamda,align_corners)
28 |         return grad_input,None,None,None,None
29 | 
30 | swap_align2nat = SwapAlign2NatFunction.apply


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TensorMask
 2 | ### The code is unofficial version for [TensorMask: A Foundation for Dense Object Segmentation](https://arxiv.org/abs/1903.12174).
 3 | 
 4 | * ![image](img/test.png)
 5 | * ![image](img/test_2.png)
 6 | 
 7 | input_size = (640x512), no multi-scale training, no multi-scale test 
 8 | 
 9 | |class|AP|AP<sub>50</sub>|AP<sub>75</sub>|AP<sub>S</sub>|AP<sub>M</sub>|AP<sub>L</sub>|
10 | |---|---|---|---|---|---|---|
11 | |person box|0.481|0.752|0.503|0.256|0.559|0.704|
12 | |person mask|0.395|0.721|0.392|0.184|0.454|0.614|
13 | 
14 | ### Reuirments
15 | 1. python==3.7
16 | 1. pytorch==1.0.0
17 | 2. torchvision==0.4.2
18 | 3. opencv-python,pycocotools,progress,numpy,easydict
19 | 
20 | ### Installation
21 | ```bash
22 | git clone https://github.com/CaoWGG/TensorMask.git	
23 | cd TensorMask/models/ops/align2nat
24 | python setup.py build_ext --inplace
25 | cd TensorMask/models/ops/sigmoid_focal_loss
26 | python setup.py build_ext --inplace
27 | ```
28 | 	
29 | ### Training
30 | ```bash
31 | ## dataset coco2017
32 | ## modify config.py : cfg.class_name and cfg.num_class
33 | python train.py
34 | ```
35 | 
36 | ### Show result
37 | ```bash
38 | python demo.py
39 | ## show window
40 | python show_pred_window.py
41 | ```
42 | 


--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/functions/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
 1 | from torch.autograd import Function
 2 | from torch.autograd.function import once_differentiable
 3 | 
 4 | from .. import sigmoid_focal_loss_cuda
 5 | 
 6 | 
 7 | class SigmoidFocalLossFunction(Function):
 8 | 
 9 |     @staticmethod
10 |     def forward(ctx, input, target, gamma=2.0, alpha=0.25):
11 |         ctx.save_for_backward(input, target)
12 |         num_classes = input.shape[1]
13 |         ctx.num_classes = num_classes
14 |         ctx.gamma = gamma
15 |         ctx.alpha = alpha
16 | 
17 |         loss = sigmoid_focal_loss_cuda.forward(input, target, num_classes,
18 |                                                gamma, alpha)
19 |         return loss
20 | 
21 |     @staticmethod
22 |     @once_differentiable
23 |     def backward(ctx, d_loss):
24 |         input, target = ctx.saved_tensors
25 |         num_classes = ctx.num_classes
26 |         gamma = ctx.gamma
27 |         alpha = ctx.alpha
28 |         d_loss = d_loss.contiguous()
29 |         d_input = sigmoid_focal_loss_cuda.backward(input, target, d_loss,
30 |                                                    num_classes, gamma, alpha)
31 |         return d_input, None, None, None, None
32 | 
33 | 
34 | sigmoid_focal_loss = SigmoidFocalLossFunction.apply
35 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict
 2 | 
 3 | cfg = EasyDict()
 4 | 
 5 | cfg.backbone = 'resnet50'
 6 | cfg.frezeBN = False
 7 | cfg.frezeLayer = False
 8 | cfg.align_corners = False   ## ref torch.nn.functional.interpolate /// when align_corners==False : [Follow Opencv resize logic]
 9 | cfg.weights = ''
10 | cfg.resume = True
11 | cfg.device = 'cuda'
12 | 
13 | cfg.cls_weights = 1.
14 | cfg.xywh_weights = 1.
15 | cfg.mask_weights = 2.   ## from paper
16 | 
17 | cfg.data_dir = '/data0/cao/DataSet/coco'
18 | cfg.num_class = 1
19 | cfg.class_name = 'person'   ## [person , *]
20 | cfg.input_h = 512   ## 512 % 128 = 0
21 | cfg.input_w = 640   ## 640 % 128 = 0
22 | cfg.base_window = 8   ## base_window%2==0 in this impl..|| max window = 12* base_stride * 2^5 / 2  =  768  > 640
23 | cfg.base_stride = 4   ## feat_2 --> strideHW=4
24 | cfg.k = 5 # 0 1 2 3 4 5
25 | cfg.max_objs = 45
26 | cfg.jitter = 0.3
27 | 
28 | 
29 | cfg.lr = 0.02           ## from paper
30 | cfg.num_epochs = 72     ## from paper
31 | cfg.lr_step = [64,70]   ## from paper
32 | cfg.warm_up = 1000
33 | cfg.batch_size = 6
34 | 
35 | cfg.gpus_str = '0,1,2,3'
36 | 
37 | cfg.save_dir = 'exp'
38 | cfg.exp_id = 'coco_person_8'
39 | cfg.print_iter = 1
40 | cfg.test = False
41 | cfg.vis_thresh = 0.3
42 | cfg.show_box =  True
43 | cfg.demo = ''
44 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | from config import cfg as opt
 4 | from models.detector import Detector
 5 | os.environ['CUDA_VISIBLE_DEVICES'] = '3'
 6 | image_ext = ['jpg', 'jpeg', 'png', 'webp']
 7 | video_ext = ['mp4', 'mov', 'avi', 'mkv', 'h264']
 8 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge']
 9 | 
10 | opt.demo = '/data/yoloCao/DataSet/coco/val2017'
11 | opt.weights = 'exp/coco_person/model_last.pth'
12 | opt.vis_trehs = 0.4
13 | detector = Detector(opt)
14 | cv2.namedWindow('result', cv2.WINDOW_NORMAL)
15 | cv2.resizeWindow('result', 1024, 768)
16 | if opt.demo == 'webcam' or \
17 |     opt.demo[opt.demo.rfind('.') + 1:].lower() in video_ext:
18 |     cam = cv2.VideoCapture(0 if opt.demo == 'webcam' else opt.demo)
19 | 
20 |     while True:
21 |         _, img = cam.read()
22 |         ret = detector.run(img)
23 |         if cv2.waitKey(1) == 27:
24 |             break
25 | else:
26 |     if os.path.isdir(opt.demo):
27 |         image_names = []
28 |         ls = os.listdir(opt.demo)
29 |         for file_name in sorted(ls):
30 |             ext = file_name[file_name.rfind('.') + 1:].lower()
31 |             if ext in image_ext:
32 |                 image_names.append(os.path.join(opt.demo, file_name))
33 |     elif opt.demo.endswith('.txt'):
34 |         image_names = []
35 |         with open(opt.demo) as f:
36 |             lines = f.readlines()
37 |         for file_name in sorted(lines):
38 |             file_name = file_name.strip()
39 |             if file_name.split('.')[-1] in image_ext:
40 |                 image_names.append(file_name)
41 |     else:
42 |         image_names = [opt.demo]
43 | 
44 |     for (image_name) in image_names:
45 |         ret = detector.run(image_name)
46 |         if cv2.waitKey(0) == 27:
47 |             break
48 | 
49 | 


--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp:
--------------------------------------------------------------------------------
 1 | // modify from
 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h
 3 | #include <torch/extension.h>
 4 | 
 5 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,
 6 |                                          const at::Tensor &targets,
 7 |                                          const int num_classes,
 8 |                                          const float gamma, const float alpha);
 9 | 
10 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,
11 |                                           const at::Tensor &targets,
12 |                                           const at::Tensor &d_losses,
13 |                                           const int num_classes,
14 |                                           const float gamma, const float alpha);
15 | 
16 | // Interface for Python
17 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits,
18 |                                     const at::Tensor &targets,
19 |                                     const int num_classes, const float gamma,
20 |                                     const float alpha) {
21 |   if (logits.type().is_cuda()) {
22 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma,
23 |                                          alpha);
24 |   }
25 | }
26 | 
27 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits,
28 |                                      const at::Tensor &targets,
29 |                                      const at::Tensor &d_losses,
30 |                                      const int num_classes, const float gamma,
31 |                                      const float alpha) {
32 |   if (logits.type().is_cuda()) {
33 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses,
34 |                                           num_classes, gamma, alpha);
35 |   }
36 | }
37 | 
38 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
39 |   m.def("forward", &SigmoidFocalLoss_forward,
40 |         "SigmoidFocalLoss forward (CUDA)");
41 |   m.def("backward", &SigmoidFocalLoss_backward,
42 |         "SigmoidFocalLoss backward (CUDA)");
43 | }
44 | 


--------------------------------------------------------------------------------
/models/ops/align2nat/src/swap_align2nat_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cmath>
 4 | #include <vector>
 5 | 
 6 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
 7 | #define CHECK_CONTIGUOUS(x) \
 8 |   AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
 9 | #define CHECK_INPUT(x) \
10 |   CHECK_CUDA(x);       \
11 |   CHECK_CONTIGUOUS(x)
12 | 
13 | int SwapAlign2NatForwardLaucher(const at::Tensor& input,at::Tensor& output,
14 |                            const int alpha,const bool align_corners,const float pad_val);
15 | int SwapAlign2NatBackwardLaucher(const at::Tensor& grad_output,at::Tensor& grad_input,
16 |                            const int alpha,const bool align_corners);
17 | 
18 | at::Tensor swap_align2nat_forward_cuda(const at::Tensor& input , const int alpha, const int lamda,const bool align_corners,const float pad_val) {
19 |   CHECK_INPUT(input);
20 |   int B = input.size(0);
21 |   int V = input.size(1);
22 |   int U = input.size(2);
23 |   int H = input.size(3);
24 |   int W = input.size(4);
25 |   auto output = torch::zeros_like(input);
26 |   output.resize_({B, lamda*V, lamda*U, H/lamda,W/lamda});
27 |   output.contiguous();
28 |   CHECK_INPUT(output);
29 |   SwapAlign2NatForwardLaucher(input,output,alpha,align_corners,pad_val);
30 |   return output;
31 | }
32 | 
33 | at::Tensor swap_align2nat_backward_cuda(const at::Tensor& grad_output,const int alpha,const int lamda,const bool align_corners) {
34 |   CHECK_INPUT(grad_output);
35 |   int B = grad_output.size(0);
36 |   int V = grad_output.size(1);
37 |   int U = grad_output.size(2);
38 |   int H = grad_output.size(3);
39 |   int W = grad_output.size(4);
40 |   auto grad_input = torch::zeros_like(grad_output);
41 |   grad_input.resize_({B, V/lamda, U/lamda, H*lamda,W*lamda});
42 |   grad_input.contiguous();
43 |   CHECK_INPUT(grad_input);
44 |   SwapAlign2NatBackwardLaucher(grad_output,grad_input,alpha,align_corners);
45 |   return grad_input;
46 | }
47 | 
48 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
49 |   m.def("forward", &swap_align2nat_forward_cuda, "SwapAlign2Nat forward (CUDA)");
50 |   m.def("backward", &swap_align2nat_backward_cuda, "SwapAlign2Nat backward (CUDA)");
51 | }
52 | 


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from models.detector import Detector
 3 | from pycocotools.cocoeval import COCOeval
 4 | import pycocotools.coco as coco
 5 | import pycocotools.mask as mask_util
 6 | import numpy as np
 7 | from tqdm import tqdm
 8 | from config import cfg as opt
 9 | os.environ['CUDA_VISIBLE_DEVICES'] = '3'
10 | valid_ids = [
11 |             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
12 |             14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
13 |             24, 25, 27, 28, 31, 32, 33, 34, 35, 36,
14 |             37, 38, 39, 40, 41, 42, 43, 44, 46, 47,
15 |             48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
16 |             58, 59, 60, 61, 62, 63, 64, 65, 67, 70,
17 |             72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
18 |             82, 84, 85, 86, 87, 88, 89, 90]
19 | 
20 | ## config recover weights
21 | opt.weights = 'exp/coco_person/model_last.pth'
22 | opt.vis_trehs = 0.01
23 | split = 'val'
24 | 
25 | detector = Detector(opt)
26 | data = coco.COCO(os.path.join(
27 |             opt.data_dir, 'annotations',
28 |             'instances_{}2017.json').format(split))
29 | 
30 | if opt.class_name!='*' :  ## for one class
31 |     catIds = data.getCatIds(opt.class_name)
32 |     imgIds = data.getImgIds(catIds=catIds)
33 |     valid_ids = catIds
34 | 
35 | detections = []
36 | for img_id in tqdm(data.getImgIds()):
37 |     img_name = os.path.join(os.path.join(opt.data_dir, '{}2017'.format(split)),
38 |                             data.loadImgs(ids=[img_id])[0]['file_name']).strip()
39 |     boxs,masks = detector.run(img_name,vis=False)
40 |     for i,det in enumerate(boxs):
41 |         x, y, x1, y1, conf, cls = det[:6]
42 |         detection = {
43 |             "image_id": img_id,
44 |             "category_id": int(valid_ids[int(cls)]),
45 |             'segmentation':mask_util.encode(np.asfortranarray(masks[i])),
46 |             #"bbox": [x, y, x1 - x, y1 - y],
47 |             "score": float("{:.2f}".format(conf))
48 |         }
49 |         detections.append(detection)
50 | coco_dets = data.loadRes(detections)
51 | coco_eval = COCOeval(data, coco_dets, "segm")
52 | 
53 | if opt.class_name!='*':  ## for one class
54 |     coco_eval.params.imgIds = imgIds
55 |     coco_eval.params.catIds = catIds
56 | 
57 | coco_eval.evaluate()
58 | coco_eval.accumulate()
59 | coco_eval.summarize()
60 | 


--------------------------------------------------------------------------------
/exp/coco_person/logs_2019-12-07-23-55/log.txt:
--------------------------------------------------------------------------------
 1 | 2019-12-08-01-03: epoch: 1 |loss 0.718341 | cls_loss 0.319530 | diou_loss 0.313822 | mask_loss 0.160178 | time 67.066667 | loss 0.547814 | cls_loss 0.235685 | diou_loss 0.224505 | mask_loss 0.128002 | time 2.916667 | 
 2 | 2019-12-08-02-12: epoch: 2 |loss 0.500938 | cls_loss 0.221124 | diou_loss 0.216653 | mask_loss 0.112826 | time 66.216667 | loss 0.502653 | cls_loss 0.216273 | diou_loss 0.202636 | mask_loss 0.117861 | time 2.833333 | 
 3 | 2019-12-08-03-21: epoch: 3 |loss 0.469828 | cls_loss 0.208693 | diou_loss 0.200540 | mask_loss 0.105500 | time 66.383333 | loss 0.477843 | cls_loss 0.207729 | diou_loss 0.195091 | mask_loss 0.110670 | time 2.850000 | 
 4 | 2019-12-08-04-30: epoch: 4 |loss 0.452117 | cls_loss 0.200818 | diou_loss 0.191730 | mask_loss 0.101684 | time 66.300000 | loss 0.470088 | cls_loss 0.204463 | diou_loss 0.187136 | mask_loss 0.109420 | time 2.866667 | 
 5 | 2019-12-08-05-39: epoch: 5 |loss 0.442711 | cls_loss 0.196554 | diou_loss 0.186292 | mask_loss 0.099792 | time 66.033333 | loss 0.469164 | cls_loss 0.204524 | diou_loss 0.183815 | mask_loss 0.109343 | time 2.883333 | 
 6 | 2019-12-08-06-48: epoch: 6 |loss 0.432938 | cls_loss 0.192415 | diou_loss 0.181634 | mask_loss 0.097557 | time 66.116667 | loss 0.455528 | cls_loss 0.197939 | diou_loss 0.179313 | mask_loss 0.106380 | time 2.866667 | 
 7 | 2019-12-08-07-57: epoch: 7 |loss 0.427711 | cls_loss 0.190262 | diou_loss 0.178843 | mask_loss 0.096369 | time 66.183333 | loss 0.449601 | cls_loss 0.197045 | diou_loss 0.175069 | mask_loss 0.104394 | time 2.833333 | 
 8 | 2019-12-08-09-06: epoch: 8 |loss 0.421918 | cls_loss 0.187740 | diou_loss 0.176018 | mask_loss 0.095087 | time 66.100000 | loss 0.450542 | cls_loss 0.200763 | diou_loss 0.171751 | mask_loss 0.103420 | time 2.916667 | 
 9 | 2019-12-08-10-16: epoch: 9 |loss 0.418336 | cls_loss 0.185566 | diou_loss 0.174558 | mask_loss 0.094565 | time 66.550000 | loss 0.450484 | cls_loss 0.201971 | diou_loss 0.172454 | mask_loss 0.102700 | time 2.866667 | 
10 | 2019-12-08-11-25: epoch: 10 |loss 0.413934 | cls_loss 0.184827 | diou_loss 0.172182 | mask_loss 0.093031 | time 66.266667 | loss 0.438343 | cls_loss 0.193945 | diou_loss 0.170348 | mask_loss 0.100905 | time 2.950000 | 
11 | 2019-12-08-12-35: epoch: 11 |loss 0.412394 | cls_loss 0.183310 | diou_loss 0.170863 | mask_loss 0.093184 | time 66.466667 | loss 0.450937 | cls_loss 0.203606 | diou_loss 0.170497 | mask_loss 0.102353 | time 2.916667 | 
12 | 


--------------------------------------------------------------------------------
/exp/coco_person/opt.txt:
--------------------------------------------------------------------------------
 1 | ==> torch version: 1.0.0
 2 | ==> cudnn version: 7401
 3 | ==> Cmd:
 4 | ['train.py']
 5 | ==> Opt:
 6 |   align_corners: False
 7 |   backbone: resnet50
 8 |   base_stride: 4
 9 |   base_window: 12
10 |   batch_size: 18
11 |   class_name: person
12 |   clear: <built-in method clear of EasyDict object at 0x7fcad8363950>
13 |   cls_weights: 1.0
14 |   copy: <built-in method copy of EasyDict object at 0x7fcad8363950>
15 |   data_dir: /data/yoloCao/DataSet/coco
16 |   device: cuda
17 |   exp_id: coco_person
18 |   frezeBN: False
19 |   frezeLayer: False
20 |   fromkeys: <built-in method fromkeys of type object at 0x56214e124ca0>
21 |   get: <built-in method get of EasyDict object at 0x7fcad8363950>
22 |   gpus: [0, 1, 2]
23 |   gpus_str: 0,1,2
24 |   input_h: 512
25 |   input_w: 640
26 |   items: <built-in method items of EasyDict object at 0x7fcad8363950>
27 |   jitter: 0.3
28 |   k: 5
29 |   keys: <built-in method keys of EasyDict object at 0x7fcad8363950>
30 |   lr: 0.02
31 |   lr_step: [64, 70]
32 |   mask_weights: 2.0
33 |   num_class: 1
34 |   num_epochs: 72
35 |   pop: <bound method EasyDict.pop of {'backbone': 'resnet50', 'frezeBN': False, 'frezeLayer': False, 'align_corners': False, 'weights': '', 'device': 'cuda', 'cls_weights': 1.0, 'xywh_weights': 0.25, 'mask_weights': 2.0, 'data_dir': '/data/yoloCao/DataSet/coco', 'num_class': 1, 'class_name': 'person', 'input_h': 512, 'input_w': 640, 'base_window': 12, 'base_stride': 4, 'k': 5, 'jitter': 0.3, 'lr': 0.02, 'num_epochs': 72, 'lr_step': [64, 70], 'warm_up': 1000, 'batch_size': 18, 'gpus_str': '0,1,2', 'save_dir': 'exp/coco_person', 'exp_id': 'coco_person', 'print_iter': 1, 'test': False, 'gpus': [0, 1, 2]}>
36 |   popitem: <built-in method popitem of EasyDict object at 0x7fcad8363950>
37 |   print_iter: 1
38 |   save_dir: exp/coco_person
39 |   setdefault: <built-in method setdefault of EasyDict object at 0x7fcad8363950>
40 |   test: False
41 |   update: <bound method EasyDict.update of {'backbone': 'resnet50', 'frezeBN': False, 'frezeLayer': False, 'align_corners': False, 'weights': '', 'device': 'cuda', 'cls_weights': 1.0, 'xywh_weights': 0.25, 'mask_weights': 2.0, 'data_dir': '/data/yoloCao/DataSet/coco', 'num_class': 1, 'class_name': 'person', 'input_h': 512, 'input_w': 640, 'base_window': 12, 'base_stride': 4, 'k': 5, 'jitter': 0.3, 'lr': 0.02, 'num_epochs': 72, 'lr_step': [64, 70], 'warm_up': 1000, 'batch_size': 18, 'gpus_str': '0,1,2', 'save_dir': 'exp/coco_person', 'exp_id': 'coco_person', 'print_iter': 1, 'test': False, 'gpus': [0, 1, 2]}>
42 |   values: <built-in method values of EasyDict object at 0x7fcad8363950>
43 |   warm_up: 1000
44 |   weights: 
45 |   xywh_weights: 0.25
46 | 


--------------------------------------------------------------------------------
/exp/coco_person/logs_2019-12-07-23-55/opt.txt:
--------------------------------------------------------------------------------
 1 | ==> torch version: 1.0.0
 2 | ==> cudnn version: 7401
 3 | ==> Cmd:
 4 | ['train.py']
 5 | ==> Opt:
 6 |   align_corners: False
 7 |   backbone: resnet50
 8 |   base_stride: 4
 9 |   base_window: 12
10 |   batch_size: 18
11 |   class_name: person
12 |   clear: <built-in method clear of EasyDict object at 0x7fcad8363950>
13 |   cls_weights: 1.0
14 |   copy: <built-in method copy of EasyDict object at 0x7fcad8363950>
15 |   data_dir: /data/yoloCao/DataSet/coco
16 |   device: cuda
17 |   exp_id: coco_person
18 |   frezeBN: False
19 |   frezeLayer: False
20 |   fromkeys: <built-in method fromkeys of type object at 0x56214e124ca0>
21 |   get: <built-in method get of EasyDict object at 0x7fcad8363950>
22 |   gpus: [0, 1, 2]
23 |   gpus_str: 0,1,2
24 |   input_h: 512
25 |   input_w: 640
26 |   items: <built-in method items of EasyDict object at 0x7fcad8363950>
27 |   jitter: 0.3
28 |   k: 5
29 |   keys: <built-in method keys of EasyDict object at 0x7fcad8363950>
30 |   lr: 0.02
31 |   lr_step: [64, 70]
32 |   mask_weights: 2.0
33 |   num_class: 1
34 |   num_epochs: 72
35 |   pop: <bound method EasyDict.pop of {'backbone': 'resnet50', 'frezeBN': False, 'frezeLayer': False, 'align_corners': False, 'weights': '', 'device': 'cuda', 'cls_weights': 1.0, 'xywh_weights': 0.25, 'mask_weights': 2.0, 'data_dir': '/data/yoloCao/DataSet/coco', 'num_class': 1, 'class_name': 'person', 'input_h': 512, 'input_w': 640, 'base_window': 12, 'base_stride': 4, 'k': 5, 'jitter': 0.3, 'lr': 0.02, 'num_epochs': 72, 'lr_step': [64, 70], 'warm_up': 1000, 'batch_size': 18, 'gpus_str': '0,1,2', 'save_dir': 'exp/coco_person', 'exp_id': 'coco_person', 'print_iter': 1, 'test': False, 'gpus': [0, 1, 2]}>
36 |   popitem: <built-in method popitem of EasyDict object at 0x7fcad8363950>
37 |   print_iter: 1
38 |   save_dir: exp/coco_person
39 |   setdefault: <built-in method setdefault of EasyDict object at 0x7fcad8363950>
40 |   test: False
41 |   update: <bound method EasyDict.update of {'backbone': 'resnet50', 'frezeBN': False, 'frezeLayer': False, 'align_corners': False, 'weights': '', 'device': 'cuda', 'cls_weights': 1.0, 'xywh_weights': 0.25, 'mask_weights': 2.0, 'data_dir': '/data/yoloCao/DataSet/coco', 'num_class': 1, 'class_name': 'person', 'input_h': 512, 'input_w': 640, 'base_window': 12, 'base_stride': 4, 'k': 5, 'jitter': 0.3, 'lr': 0.02, 'num_epochs': 72, 'lr_step': [64, 70], 'warm_up': 1000, 'batch_size': 18, 'gpus_str': '0,1,2', 'save_dir': 'exp/coco_person', 'exp_id': 'coco_person', 'print_iter': 1, 'test': False, 'gpus': [0, 1, 2]}>
42 |   values: <built-in method values of EasyDict object at 0x7fcad8363950>
43 |   warm_up: 1000
44 |   weights: 
45 |   xywh_weights: 0.25
46 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | from models.tensormask import TensorMask
 2 | from lib.trainer import  Trainer
 3 | from lib.utils import load_model,save_model,Logger
 4 | from lib.coco import COCO
 5 | from lib  import optimer
 6 | from config import cfg as opt
 7 | import torch
 8 | import os
 9 | 
10 | 
11 | torch.backends.cudnn.benchmark= True  ## input size is not fixed
12 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
13 | opt.gpus = [int(i) for i in opt.gpus_str.split(',')]
14 | opt.gpus = list(range(len(opt.gpus)))
15 | opt.batch_size = opt.batch_size * len(opt.gpus)
16 | opt.save_dir = os.path.join(opt.save_dir,opt.exp_id)
17 | logger = Logger(opt)
18 | 
19 | 
20 | model = TensorMask(backbone=opt.backbone , num_cls=opt.num_class ,
21 |                    base_window= opt.base_window ,
22 |                    freezeBN=opt.frezeBN,freezeLayers=opt.frezeLayer,
23 |                    align_corners= opt.align_corners)
24 | 
25 | optimizer = optimer.SGD([{'params':filter(lambda x:len(x.size()) == 4 ,model.parameters()),'weight_decay':0.0001 },
26 |                             {'params': filter(lambda x:len(x.size()) <4,model.parameters())}],
27 |                      lr=opt.lr,warm_up=1000,momentum=0.9,nesterov=True)
28 | start_epoch = 0
29 | if opt.weights != '' :
30 |     model, optimizer, start_epoch = load_model(
31 |       model, opt.weights, optimizer, opt.resume, opt.lr, opt.lr_step)
32 | trainer = Trainer(opt,model,optimizer)
33 | trainer.set_device(opt.gpus,opt.device)
34 | 
35 | print('Setting up data...')
36 | val_loader = torch.utils.data.DataLoader(
37 |     COCO(cfg=opt, split='val',augment=False),
38 |     batch_size=8,
39 |     shuffle=False,
40 |     num_workers=8,
41 |     pin_memory=False
42 | )
43 | train_loader = torch.utils.data.DataLoader(
44 |     COCO(cfg=opt, split='train',augment=True),
45 |     batch_size=opt.batch_size,
46 |     shuffle=True,
47 |     num_workers=8,
48 |     pin_memory=False
49 | )
50 | 
51 | print('Starting training...')
52 | best = 1e10
53 | for epoch in range(start_epoch + 1, opt.num_epochs + 1):
54 |     log_dict_train, _ = trainer.train(epoch, train_loader)
55 |     logger.write('epoch: {} |'.format(epoch))
56 |     for k, v in log_dict_train.items():
57 |         logger.scalar_summary('train_{}'.format(k), v, epoch)
58 |         logger.write('{} {:8f} | '.format(k, v))
59 |     with torch.no_grad():
60 |         log_dict_val, preds = trainer.val(epoch, val_loader)
61 |     for k, v in log_dict_val.items():
62 |         logger.scalar_summary('val_{}'.format(k), v, epoch)
63 |         logger.write('{} {:8f} | '.format(k, v))
64 |     if log_dict_val['loss'] < best:
65 |         best = log_dict_val['loss']
66 |         save_model(os.path.join(opt.save_dir, 'model_best.pth'),
67 |                epoch, model)
68 |     save_model(os.path.join(opt.save_dir, 'model_last.pth'),
69 |              epoch, model, optimizer)
70 |     logger.write('\n')
71 |     if epoch in opt.lr_step:
72 |         save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)),
73 |              epoch, model, optimizer)
74 |         lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1))
75 |         print('Drop LR to', lr)
76 |         for param_group in optimizer.param_groups:
77 |             param_group['lr'] = lr
78 | 


--------------------------------------------------------------------------------
/show_pred_window.py:
--------------------------------------------------------------------------------
 1 | from models.tensormask import TensorMask
 2 | from config import cfg as opt
 3 | from lib.utils import load_model,save_model
 4 | from lib.coco import COCO
 5 | import numpy as np
 6 | import torch
 7 | import os
 8 | import cv2
 9 | 
10 | os.environ['CUDA_VISIBLE_DEVICES'] = '3'
11 | model = TensorMask(backbone=opt.backbone , num_cls=opt.num_class ,
12 |                    base_window= opt.base_window ,
13 |                    freezeBN=opt.frezeBN,freezeLayers=opt.frezeLayer,
14 |                    align_corners=opt.align_corners)
15 | 
16 | opt.test = True
17 | opt.weights = 'exp/coco_person/model_last.pth'
18 | model = load_model(model, opt.weights)
19 | model.eval()
20 | model.cuda()
21 | val_loader = torch.utils.data.DataLoader(
22 |     COCO(cfg=opt, split='val',augment=False),
23 |     batch_size=1,
24 |     shuffle=False,
25 |     num_workers=1,
26 |     pin_memory=True
27 | )
28 | strides = np.array([opt.base_stride * 2 ** i for i in range(opt.k + 1)])
29 | windows = np.array([opt.base_window * lamda for lamda in strides], np.int32)
30 | 
31 | output_size = np.array(list(zip(opt.input_w // strides, opt.input_h // strides)))
32 | num_det = [output_w * output_h for output_w, output_h in output_size]
33 | det_offset = np.cumsum(num_det)
34 | for batch in val_loader:
35 |     image= batch['img'].numpy()[0]
36 |     input = batch['input'].cuda()
37 |     output= model(input)
38 | 
39 |     socres, cls = torch.max(output['cls'].sigmoid_(), dim=-1)
40 |     socres = socres.detach().cpu().numpy()
41 |     cls = cls.detach().cpu().numpy()
42 |     box= output['box'].detach().cpu().numpy()
43 |     seg = [output['%d'%i].sigmoid_().detach().cpu().numpy() for i in range(opt.k+1)]
44 |     topk_inds = np.where(socres > 0.4)
45 | 
46 |     for det_num in topk_inds[1]:
47 |         p = socres[0,det_num]
48 |         b = box[0,det_num,:]
49 |         for id,num in enumerate(det_offset):
50 |             if num > det_num:
51 |                 break
52 |         offset = det_num-det_offset[id-1]if id > 0 else det_num
53 |         width,hight = output_size[id]
54 | 
55 |         ### ct_int_feat
56 |         y = int(offset/width)
57 |         x = int(offset%width)
58 | 
59 |         window_seg = seg[id][0,y,x,:,:]
60 | 
61 |         ### ct_int
62 |         x ,y = int((x + 0.5) * strides[id]),int((y + 0.5) * strides[id])
63 |         ### show box
64 |         b[0:2] = x - b[0]*strides[id] ,y - b[1]*strides[id]
65 |         b[2:4] = x + b[2]*strides[id] ,y + b[3]*strides[id]
66 |         b = b.astype(np.int)
67 |         cv2.rectangle(image,(b[0],b[1]),(b[2],b[3]),(255,0,0),2)
68 | 
69 | 
70 |         ### show mask
71 |         img_h,img_w  = image.shape[:2]
72 |         paste_x,paste_y,paste_x1,paste_y1= x - windows[id]//2, y- windows[id]//2,x + windows[id]//2,y + windows[id]//2
73 | 
74 |         window_x,window_y,window_x1,window_y1 = max(-paste_x,0),max(-paste_y,0), \
75 |                                                 windows[id]-max(0,paste_x1-img_w), \
76 |                                                 windows[id]-max(0,paste_y1-img_h)
77 | 
78 |         paste_x, paste_y, paste_x1, paste_y1 = max(paste_x, 0), max(paste_y, 0), min(paste_x1, img_w), min(paste_y1,
79 |                                                                                                            img_h)
80 |         window_seg = cv2.resize(window_seg,(windows[id],windows[id]))
81 |         window_seg = (window_seg>0.5)
82 | 
83 |         ### paste to img
84 |         window_seg_paste = window_seg[window_y:window_y1,window_x:window_x1]
85 |         color = np.array([[np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)]])
86 |         image[paste_y:paste_y1,paste_x:paste_x1][window_seg_paste] = image[paste_y:paste_y1,paste_x:paste_x1][window_seg_paste]*0.2 + color*0.8
87 | 
88 |         ### show
89 |         cv2.imshow('window',(window_seg).astype(np.uint8)*255)
90 |         cv2.imshow('',image)
91 |         cv2.waitKey(0)
92 | 


--------------------------------------------------------------------------------
/models/tensormask.py:
--------------------------------------------------------------------------------
 1 | from models.ops.align2nat.functions.swap_align2nat import swap_align2nat
 2 | from models.res_fpn import resnet_fpn_backbone
 3 | import torch.nn.functional as F
 4 | import torch.nn as nn
 5 | import torch
 6 | import math
 7 | 
 8 | class Subnet(nn.Module):
 9 |     def __init__(self, in_channels = 256,mid_channels = 256 ,num_cls = -1):
10 |         super(Subnet, self).__init__()
11 | 
12 |         self.conv = nn.Sequential(nn.Conv2d(in_channels, mid_channels, 3, padding=1),
13 |                                   nn.ReLU(inplace=True),
14 |                                   nn.Conv2d(mid_channels, mid_channels, 3, padding=1),
15 |                                   nn.ReLU(inplace=True),
16 |                                   nn.Conv2d(mid_channels, mid_channels, 3, padding=1),
17 |                                   nn.ReLU(inplace=True),
18 |                                   nn.Conv2d(mid_channels, mid_channels, 3, padding=1),
19 |                                   nn.ReLU(inplace=True))
20 |         self.num_cls = num_cls
21 |         if num_cls > 0:
22 |             self.fc = nn.Conv2d(mid_channels, num_cls, 3, padding=1)
23 | 
24 |         for m in self.modules():
25 |             if isinstance(m, nn.Conv2d):
26 |                 nn.init.kaiming_uniform_(m.weight, a=1)
27 |                 nn.init.constant_(m.bias, 0)
28 | 
29 |     def forward(self, x):
30 |         x = self.conv(x)
31 |         if self.num_cls > 0:
32 |             x = self.fc(x)
33 |             x = x.permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, self.num_cls)
34 |         return x
35 | 
36 | class TensorMask(nn.Module):
37 |     def __init__(self,backbone = 'resnet50',num_cls = 80,base_window = 12,
38 |                  freezeBN = True,freezeLayers = False ,align_corners = True):
39 |         super(TensorMask,self).__init__()
40 |         self.align_corners = align_corners
41 |         self.base_fpn = resnet_fpn_backbone(backbone,pretrained=True,freezeBN=freezeBN,freezeLayers=freezeLayers,align_corners=align_corners)
42 | 
43 |         self.cls_subnet = Subnet(in_channels=256,mid_channels=256,num_cls = num_cls)
44 | 
45 |         self.box_subnet = Subnet(in_channels=256,mid_channels=128,num_cls = 4)
46 | 
47 |         self.mask_subnet = Subnet(in_channels=256,mid_channels=128)
48 | 
49 |         self.mask_fuse = nn.Sequential(nn.Conv2d(128, 128, 3, padding=1),nn.ReLU(inplace=True))
50 |         self.mask_head = nn.Conv2d(128, base_window**2 , kernel_size=1, padding=0)
51 | 
52 |         self.base_window = base_window
53 | 
54 | 
55 |         nn.init.constant_(self.box_subnet.fc.bias, 1)   ###  training box start with a little box not a point(its hard).
56 |         nn.init.kaiming_uniform_(self.mask_fuse[0].weight, a=1)
57 |         nn.init.constant_(self.mask_fuse[0].bias, 0)
58 |         nn.init.kaiming_uniform_(self.mask_head.weight, a=1)
59 |         nn.init.constant_(self.mask_head.bias, 0)
60 | 
61 |         nn.init.constant_(self.cls_subnet.fc.bias,-math.log((1-0.01)/0.01))
62 | 
63 |     def forward(self, x):
64 |         x = self.base_fpn(x)
65 |         cls_branch = torch.cat([self.cls_subnet(feat) for feat in x],dim = 1)
66 |         box_branch = torch.cat([self.box_subnet(feat) for feat in x],dim = 1)
67 |         mask_branch = [self.mask_subnet(feat) for feat in x]
68 | 
69 |         ret = {'cls':cls_branch,'box':box_branch}
70 | 
71 |         finest_feat = mask_branch[0]
72 |         ##  tensor bipyamid
73 |         for i in range(len(mask_branch)):
74 |             x  = mask_branch[i]
75 |             if i > 0:
76 |                 x = F.interpolate(x, scale_factor=2**i, mode="bilinear" ,align_corners=self.align_corners )
77 |             x = self.mask_fuse(x + finest_feat)
78 |             x = self.mask_head(x)
79 |             x = x.view(x.size(0), self.base_window, self.base_window, x.size(2), x.size(3))
80 |             x = swap_align2nat(x, 1 , 2**i ,-6., self.align_corners)
81 |             ret['%d'%i]= x.permute(0, 3, 4, 1 , 2).contiguous()
82 | 
83 |         return ret
84 | 
85 | if __name__ == '__main__':
86 |     import os
87 |     os.environ.setdefault('CUDA_VISIBLE_DEVICES','1')
88 |     import torch
89 |     model = TensorMask(num_cls=1,base_window=10)
90 |     model.cuda()
91 |     input = torch.zeros([1,3,512,512]).cuda()
92 |     out = model(input)
93 |     pass
94 | 


--------------------------------------------------------------------------------
/lib/trainer.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import torch
  3 | import torch.nn as nn
  4 | from .utils import AverageMeter
  5 | from progress.bar import Bar
  6 | from models.losses import TensorMaskLoss
  7 | 
  8 | class ModleWithLoss(nn.Module):
  9 |     def __init__(self, model, loss):
 10 |         super(ModleWithLoss, self).__init__()
 11 |         self.model = model
 12 |         self.loss = loss
 13 | 
 14 |     def forward(self, batch):
 15 |         outputs = self.model(batch['input'])
 16 |         loss, loss_stats = self.loss(outputs, batch)
 17 |         return loss, loss_stats
 18 | 
 19 | 
 20 | class Trainer(object):
 21 |     def __init__(self, opt, model, optimizer=None):
 22 |         self.opt = opt
 23 |         self.optimizer = optimizer
 24 |         self.loss_stats, self.loss = self._get_losses(opt)
 25 |         self.model_with_loss = ModleWithLoss(model, self.loss)
 26 | 
 27 |     def set_device(self, gpus, device):
 28 |         if len(gpus) > 1:
 29 |             self.model_with_loss = nn.DataParallel(
 30 |                 self.model_with_loss, device_ids=gpus).to(device)
 31 |         else:
 32 |             self.model_with_loss = self.model_with_loss.to(device)
 33 | 
 34 |         for state in self.optimizer.state.values():
 35 |             for k, v in state.items():
 36 |                 if isinstance(v, torch.Tensor):
 37 |                     state[k] = v.to(device=device, non_blocking=True)
 38 | 
 39 |     def run_epoch(self, phase, epoch, data_loader):
 40 |         model_with_loss = self.model_with_loss
 41 |         if phase == 'train':
 42 |             model_with_loss.train()
 43 |         else:
 44 |             if len(self.opt.gpus) > 1:
 45 |                 model_with_loss = self.model_with_loss.module
 46 |             model_with_loss.eval()
 47 |             torch.cuda.empty_cache()
 48 | 
 49 |         results = {}
 50 |         data_time, batch_time = AverageMeter(), AverageMeter()
 51 |         avg_loss_stats = {l: AverageMeter() for l in self.loss_stats}
 52 |         num_iters = len(data_loader)
 53 |         bar = Bar('{}'.format('tensormask'), max=num_iters)
 54 |         end = time.time()
 55 |         for iter_id, batch in enumerate(data_loader):
 56 |             if iter_id >= num_iters:
 57 |                 break
 58 |             data_time.update(time.time() - end)
 59 | 
 60 |             for k in batch:
 61 |                 if k != 'meta':
 62 |                     batch[k] = batch[k].to(device=self.opt.device, non_blocking=True)
 63 |             loss, loss_stats = model_with_loss(batch)
 64 |             loss = loss.mean()
 65 |             if phase == 'train':
 66 |                 self.optimizer.zero_grad()
 67 |                 loss.backward()
 68 |                 self.optimizer.step()
 69 |             batch_time.update(time.time() - end)
 70 |             end = time.time()
 71 | 
 72 |             Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format(
 73 |                 epoch, iter_id, num_iters, phase=phase,
 74 |                 total=bar.elapsed_td, eta=bar.eta_td)
 75 |             for l in avg_loss_stats:
 76 |                 avg_loss_stats[l].update(
 77 |                     loss_stats[l].mean().item(), batch['input'].size(0))
 78 |                 Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(l, avg_loss_stats[l].avg)
 79 | 
 80 |             Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \
 81 |                                           '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time)
 82 |             if self.opt.print_iter > 0:
 83 |                 if iter_id % self.opt.print_iter == 0:
 84 |                     print('{}| {}'.format('tensormask', Bar.suffix))
 85 |             else:
 86 |                 bar.next()
 87 | 
 88 |             del loss, loss_stats
 89 | 
 90 |         bar.finish()
 91 |         ret = {k: v.avg for k, v in avg_loss_stats.items()}
 92 |         ret['time'] = bar.elapsed_td.total_seconds() / 60.
 93 |         return ret, results
 94 | 
 95 | 
 96 |     def _get_losses(self,opt):
 97 |         loss_stats = ['loss','cls_loss','diou_loss','mask_loss']
 98 |         loss = TensorMaskLoss(opt)
 99 |         return loss_stats,loss
100 | 
101 |     def val(self, epoch, data_loader):
102 |         return self.run_epoch('val', epoch, data_loader)
103 | 
104 |     def train(self, epoch, data_loader):
105 |         return self.run_epoch('train', epoch, data_loader)


--------------------------------------------------------------------------------
/models/losses.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from .ops.sigmoid_focal_loss.modules.sigmoid_focal_loss import SigmoidFocalLoss
  5 | def _sigmoid(x):
  6 |   y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
  7 |   return y
  8 | 
  9 | def diou(bboxes1, bboxes2):
 10 |     w1 = bboxes1[..., 2] - bboxes1[..., 0]
 11 |     h1 = bboxes1[..., 3] - bboxes1[..., 1]
 12 |     w2 = bboxes2[..., 2] - bboxes2[..., 0]
 13 |     h2 = bboxes2[..., 3] - bboxes2[..., 1]
 14 | 
 15 |     area1 = w1 * h1
 16 |     area2 = w2 * h2
 17 | 
 18 |     center_x1 = (bboxes1[..., 2] + bboxes1[..., 0]) / 2
 19 |     center_y1 = (bboxes1[..., 3] + bboxes1[..., 1]) / 2
 20 |     center_x2 = (bboxes2[..., 2] + bboxes2[..., 0]) / 2
 21 |     center_y2 = (bboxes2[..., 3] + bboxes2[..., 1]) / 2
 22 | 
 23 |     inter_max_xy = torch.min(bboxes1[..., 2:],bboxes2[..., 2:])
 24 |     inter_min_xy = torch.max(bboxes1[..., :2],bboxes2[..., :2])
 25 |     out_max_xy = torch.max(bboxes1[..., 2:],bboxes2[..., 2:])
 26 |     out_min_xy = torch.min(bboxes1[..., :2],bboxes2[..., :2])
 27 | 
 28 |     inter = torch.clamp((inter_max_xy - inter_min_xy), min=0)
 29 |     inter_area = inter[..., 0] * inter[..., 1]
 30 |     inter_diag = (center_x2 - center_x1)**2 + (center_y2 - center_y1)**2
 31 |     outer = torch.clamp((out_max_xy - out_min_xy), min=0)
 32 |     outer_diag = (outer[..., 0] ** 2) + (outer[..., 1] ** 2)
 33 |     union = area1+area2-inter_area
 34 |     u = (inter_diag) / (outer_diag + 1e-7 )
 35 |     iou = inter_area / (union + 1e-7)
 36 |     dious = iou - u
 37 |     return dious
 38 | 
 39 | def _gather_feat(feat, ind, mask=None):
 40 |     dim  = feat.size(2)
 41 |     ind  = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
 42 |     feat = feat.gather(1, ind)
 43 |     if mask is not None:
 44 |         mask = mask.unsqueeze(2).expand_as(feat)
 45 |         feat = feat[mask]
 46 |         feat = feat.view(-1, dim)
 47 |     return feat
 48 | 
 49 | def _tranpose_and_gather_feat(feat, ind):
 50 |     feat = _gather_feat(feat, ind)
 51 |     return feat
 52 | 
 53 | class BOXLoss(nn.Module):
 54 |     def __init__(self):
 55 |         super(BOXLoss, self).__init__()
 56 | 
 57 |     def forward(self, output, mask, ind, target):
 58 |         mask = mask.float()
 59 |         pred = _tranpose_and_gather_feat(output, ind)
 60 |         right_offset,left_offset = torch.split(pred,[2,2],dim=-1)
 61 |         x1y1x2y2,ct,stride = torch.split(target,[4,2,1],dim=-1)
 62 |         stride = stride.expand_as(right_offset).float()
 63 |         predx1y1 = (ct + 0.5 - right_offset)*stride
 64 |         predx2y2 = (ct + 0.5 + left_offset )*stride
 65 |         predx1y1x2y2 = torch.cat([predx1y1,predx2y2],dim = -1)
 66 |         diou_loss = (1. - diou(predx1y1x2y2,x1y1x2y2)) * mask
 67 |         loss = diou_loss.sum() / ( mask.sum() + 1e-4)
 68 | 
 69 |         return loss
 70 | 
 71 | class MaskBCELoss(nn.Module):
 72 |     def __init__(self):
 73 |         super(MaskBCELoss, self).__init__()
 74 |         self.register_buffer('pos_weight',torch.tensor(1.5,dtype=torch.float32))
 75 | 
 76 |     def forward(self, output, mask, ind, target):
 77 |         B,N,window=target.size(0),target.size(1),target.size(-1)
 78 |         output = output.view(B,-1,window*window)
 79 |         pred = _tranpose_and_gather_feat(output, ind).view(B,N,window,window)
 80 |         mask = mask.unsqueeze(2).unsqueeze(2).expand_as(pred).float()
 81 |         bce_loss =F.binary_cross_entropy_with_logits(pred,target,
 82 |                                                      pos_weight=self.pos_weight,
 83 |                                                      reduction='none')
 84 |         num_smaple = mask.sum()
 85 |         loss = (bce_loss*mask).sum()
 86 |         if num_smaple > 0:
 87 |             loss /= num_smaple
 88 |         return loss
 89 | 
 90 | class TensorMaskLoss(nn.Module):
 91 |     def __init__(self,opt):
 92 |         super(TensorMaskLoss,self).__init__()
 93 |         self.cls_loss = SigmoidFocalLoss(gamma=3,alpha=0.3)
 94 |         self.box_loss = BOXLoss()
 95 |         self.mask_loss = MaskBCELoss()
 96 |         self.opt = opt
 97 | 
 98 |     def forward(self, ouput,batch):
 99 |         opt = self.opt
100 |         mask_loss = 0
101 |         num_sample = batch['reg_mask'].sum()
102 |         cls_loss = self.cls_loss(ouput['cls'].view([-1,opt.num_class]),batch['cls'].view([-1]))
103 |         box_loss = self.box_loss(ouput['box'],batch['reg_mask'],batch['ind'],batch['xywh'])
104 |         for i in range(6):
105 |             mask_loss += self.mask_loss(ouput['%d'%i],batch['seg_mask_%d'%i],batch['seg_ind_%d'%i],batch['seg_%d'%i])
106 |         mask_loss /= 6
107 |         if num_sample > 0:
108 |             cls_loss /= num_sample
109 |         loss = opt.cls_weights * cls_loss + opt.xywh_weights * box_loss + opt.mask_weights * mask_loss
110 |         loss_stats = {'loss': loss, 'cls_loss': cls_loss,
111 |                       'diou_loss': box_loss, 'mask_loss': mask_loss}
112 |         return loss,loss_stats
113 | 


--------------------------------------------------------------------------------
/models/res_fpn.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | import torch
  3 | from torch import nn
  4 | import torch.nn.functional as F
  5 | from torchvision.models import resnet
  6 | from collections import OrderedDict
  7 | 
  8 | class FrozenBatchNorm2d(nn.Module):
  9 |     """
 10 |     BatchNorm2d where the batch statistics and the affine parameters
 11 |     are fixed
 12 |     """
 13 |     def __init__(self, n):
 14 |         super(FrozenBatchNorm2d, self).__init__()
 15 |         self.register_buffer("weight", torch.ones(n))
 16 |         self.register_buffer("bias", torch.zeros(n))
 17 |         self.register_buffer("running_mean", torch.zeros(n))
 18 |         self.register_buffer("running_var", torch.ones(n))
 19 | 
 20 |     def forward(self, x):
 21 |         # move reshapes to the beginning
 22 |         # to make it fuser-friendly
 23 |         w = self.weight.reshape(1, -1, 1, 1)
 24 |         b = self.bias.reshape(1, -1, 1, 1)
 25 |         rv = self.running_var.reshape(1, -1, 1, 1)
 26 |         rm = self.running_mean.reshape(1, -1, 1, 1)
 27 |         scale = w * rv.rsqrt()
 28 |         bias = b - rm * scale
 29 |         return x * scale + bias
 30 | 
 31 | class IntermediateLayerGetter(nn.ModuleDict):
 32 |     def __init__(self, model, return_layers):
 33 |         if not set(return_layers).issubset([name for name, _ in model.named_children()]):
 34 |             raise ValueError("return_layers are not present in model")
 35 |         orig_return_layers = return_layers
 36 |         return_layers = {str(k): str(v) for k, v in return_layers.items()}
 37 |         layers = OrderedDict()
 38 |         for name, module in model.named_children():
 39 |             layers[name] = module
 40 |             if name in return_layers:
 41 |                 del return_layers[name]
 42 |             if not return_layers:
 43 |                 break
 44 | 
 45 |         super(IntermediateLayerGetter, self).__init__(layers)
 46 |         self.return_layers = orig_return_layers
 47 | 
 48 |     def forward(self, x):
 49 |         outs = []
 50 |         for name, module in self.items():
 51 |             x = module(x)
 52 |             if name in self.return_layers:
 53 |                 outs.append(x)
 54 | 
 55 |         return tuple(outs)
 56 | 
 57 | class LastLevelP6P7(nn.Module):
 58 |     """
 59 |     This module is used in RetinaNet to generate extra layers, P6 and P7.
 60 |     """
 61 |     def __init__(self, in_channels, out_channels):
 62 |         super(LastLevelP6P7, self).__init__()
 63 |         self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
 64 |         self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1)
 65 |         for module in [self.p6, self.p7]:
 66 |             nn.init.kaiming_uniform_(module.weight, a=1)
 67 |             nn.init.constant_(module.bias, 0)
 68 |         self.use_P5 = in_channels == out_channels
 69 | 
 70 |     def forward(self, p, c):
 71 |         p5, c5 = p[-1], c[-1]
 72 |         x = p5 if self.use_P5 else c5
 73 |         p6 = self.p6(x)
 74 |         p7 = self.p7(F.relu(p6))
 75 |         p.extend([p6, p7])
 76 |         return p
 77 | 
 78 | class FeaturePyramidNetwork(nn.Module):
 79 | 
 80 |     def __init__(self, in_channels_list, out_channels, extra_blocks=None ,align_corners=True):
 81 |         super(FeaturePyramidNetwork, self).__init__()
 82 |         self.align_corners = align_corners
 83 |         self.inner_blocks = nn.ModuleList()
 84 |         self.layer_blocks = nn.ModuleList()
 85 |         for in_channels in in_channels_list:
 86 |             if in_channels == 0:
 87 |                 continue
 88 |             inner_block_module = nn.Conv2d(in_channels, out_channels, 1)
 89 |             layer_block_module = nn.Conv2d(out_channels, out_channels, 3, padding=1)
 90 |             self.inner_blocks.append(inner_block_module)
 91 |             self.layer_blocks.append(layer_block_module)
 92 | 
 93 |         # initialize parameters now to avoid modifying the initialization of top_blocks
 94 |         for m in self.children():
 95 |             if isinstance(m, nn.Conv2d):
 96 |                 nn.init.kaiming_uniform_(m.weight, a=1)
 97 |                 nn.init.constant_(m.bias, 0)
 98 | 
 99 |         self.extra_blocks = extra_blocks
100 | 
101 |     def forward(self, x):
102 |         last_inner = self.inner_blocks[-1](x[-1])
103 |         results = []
104 |         results.append(self.layer_blocks[-1](last_inner))
105 |         for feature, inner_block, layer_block in zip(
106 |             x[:-1][::-1], self.inner_blocks[:-1][::-1], self.layer_blocks[:-1][::-1]
107 |         ):
108 |             if not inner_block:
109 |                 continue
110 |             inner_lateral = inner_block(feature)
111 |             feat_shape = inner_lateral.shape[-2:]
112 |             inner_top_down = F.interpolate(last_inner, size=feat_shape, mode="bilinear",align_corners = self.align_corners)
113 |             last_inner = inner_lateral + inner_top_down
114 |             results.insert(0, layer_block(last_inner))
115 | 
116 |         if self.extra_blocks is not None:
117 |             results = self.extra_blocks(results, x)
118 | 
119 |         return tuple(results)
120 | 
121 | class BackboneWithFPN(nn.Module):
122 | 
123 |     def __init__(self, backbone, return_layers, in_channels_list, out_channels ,align_corners):
124 |         super(BackboneWithFPN, self).__init__()
125 |         self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)
126 |         self.fpn = FeaturePyramidNetwork(
127 |             in_channels_list=in_channels_list,
128 |             out_channels=out_channels,
129 |             extra_blocks=LastLevelP6P7(in_channels_list[-1],out_channels),
130 |             align_corners=align_corners
131 |         )
132 |         self.out_channels = out_channels
133 | 
134 |     def forward(self, x):
135 |         x = self.body(x)
136 |         x = self.fpn(x)
137 |         return x
138 | 
139 | 
140 | def resnet_fpn_backbone(backbone_name, pretrained,freezeBN = False , freezeLayers = False , align_corners = True ):
141 |     backbone = resnet.__dict__[backbone_name](
142 |         pretrained=pretrained,
143 |         norm_layer=FrozenBatchNorm2d if freezeBN else None)
144 |     # freeze layers
145 |     if freezeLayers:
146 |         for name, parameter in backbone.named_parameters():
147 |             print(name)
148 |             if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
149 |                 parameter.requires_grad_(False)
150 | 
151 |     return_layers = {'layer1': 'p2', 'layer2': 'p3', 'layer3': 'p4', 'layer4': 'p5'}
152 | 
153 |     in_channels_stage2 = backbone.inplanes // 8
154 |     in_channels_list = [
155 |         in_channels_stage2,
156 |         in_channels_stage2 * 2,
157 |         in_channels_stage2 * 4,
158 |         in_channels_stage2 * 8,
159 |     ]
160 |     out_channels = 256
161 |     return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels , align_corners)
162 | 
163 | if __name__ == '__main__':
164 |     input = torch.ones([1,3,512,512])
165 |     model = resnet_fpn_backbone('resnet50',False)
166 |     out = model(input)
167 |     pass


--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_cuda.cu:
--------------------------------------------------------------------------------
  1 | // modify from
  2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu
  3 | 
  4 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  5 | // This file is modified from
  6 | // https://github.com/pytorch/pytorch/blob/master/modules/detectron/sigmoid_focal_loss_op.cu
  7 | // Cheng-Yang Fu
  8 | // cyfu@cs.unc.edu
  9 | #include <ATen/ATen.h>
 10 | #include <ATen/cuda/CUDAContext.h>
 11 | 
 12 | #include <THC/THC.h>
 13 | #include <THC/THCAtomics.cuh>
 14 | #include <THC/THCDeviceUtils.cuh>
 15 | 
 16 | #include <cfloat>
 17 | 
 18 | // TODO make it in a common file
 19 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
 20 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 21 |        i += blockDim.x * gridDim.x)
 22 | 
 23 | template <typename scalar_t>
 24 | __global__ void SigmoidFocalLossForward(const int nthreads,
 25 |                                         const scalar_t *logits,
 26 |                                         const long *targets,
 27 |                                         const int num_classes,
 28 |                                         const float gamma, const float alpha,
 29 |                                         const int num, scalar_t *losses) {
 30 |   CUDA_1D_KERNEL_LOOP(i, nthreads) {
 31 |     int n = i / num_classes;
 32 |     int d = i % num_classes;  // current class[0~79];
 33 |     int t = targets[n];       // target class [1~80];
 34 | 
 35 |     // Decide it is positive or negative case.
 36 |     scalar_t c1 = (t == (d + 1));
 37 |     scalar_t c2 = (t >= 0 & t != (d + 1));
 38 | 
 39 |     scalar_t zn = (1.0 - alpha);
 40 |     scalar_t zp = (alpha);
 41 | 
 42 |     // p = 1. / 1. + expf(-x); p = sigmoid(x)
 43 |     scalar_t p = 1. / (1. + expf(-logits[i]));
 44 | 
 45 |     // (1-p)**gamma * log(p) where
 46 |     scalar_t term1 = powf((1. - p), gamma) * logf(max(p, FLT_MIN));
 47 | 
 48 |     // p**gamma * log(1-p)
 49 |     scalar_t term2 =
 50 |         powf(p, gamma) *
 51 |         (-1. * logits[i] * (logits[i] >= 0) -
 52 |          logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0))));
 53 | 
 54 |     losses[i] = 0.0;
 55 |     losses[i] += -c1 * term1 * zp;
 56 |     losses[i] += -c2 * term2 * zn;
 57 | 
 58 |   }  // CUDA_1D_KERNEL_LOOP
 59 | }  // SigmoidFocalLossForward
 60 | 
 61 | template <typename scalar_t>
 62 | __global__ void SigmoidFocalLossBackward(
 63 |     const int nthreads, const scalar_t *logits, const long *targets,
 64 |     const scalar_t *d_losses, const int num_classes, const float gamma,
 65 |     const float alpha, const int num, scalar_t *d_logits) {
 66 |   CUDA_1D_KERNEL_LOOP(i, nthreads) {
 67 |     int n = i / num_classes;
 68 |     int d = i % num_classes;  // current class[0~79];
 69 |     int t = targets[n];       // target class [1~80], 0 is background;
 70 | 
 71 |     // Decide it is positive or negative case.
 72 |     scalar_t c1 = (t == (d + 1));
 73 |     scalar_t c2 = (t >= 0 & t != (d + 1));
 74 | 
 75 |     scalar_t zn = (1.0 - alpha);
 76 |     scalar_t zp = (alpha);
 77 |     // p = 1. / 1. + expf(-x); p = sigmoid(x)
 78 |     scalar_t p = 1. / (1. + expf(-logits[i]));
 79 | 
 80 |     // (1-p)**g * (1 - p - g*p*log(p)
 81 |     scalar_t term1 =
 82 |         powf((1. - p), gamma) * (1. - p - (p * gamma * logf(max(p, FLT_MIN))));
 83 | 
 84 |     // (p**g) * (g*(1-p)*log(1-p) - p)
 85 |     scalar_t term2 =
 86 |         powf(p, gamma) *
 87 |         ((-1. * logits[i] * (logits[i] >= 0) -
 88 |           logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))) *
 89 |              (1. - p) * gamma -
 90 |          p);
 91 |     d_logits[i] = 0.0;
 92 |     d_logits[i] += -c1 * term1 * zp;
 93 |     d_logits[i] += -c2 * term2 * zn;
 94 |     d_logits[i] = d_logits[i] * d_losses[i];
 95 | 
 96 |   }  // CUDA_1D_KERNEL_LOOP
 97 | }  // SigmoidFocalLossBackward
 98 | 
 99 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,
100 |                                          const at::Tensor &targets,
101 |                                          const int num_classes,
102 |                                          const float gamma, const float alpha) {
103 |   AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor");
104 |   AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor");
105 |   AT_ASSERTM(logits.dim() == 2, "logits should be NxClass");
106 | 
107 |   const int num_samples = logits.size(0);
108 | 
109 |   auto losses = at::empty({num_samples, logits.size(1)}, logits.options());
110 |   auto losses_size = num_samples * logits.size(1);
111 | 
112 |   dim3 grid(std::min(THCCeilDiv(losses_size, 512L), 4096L));
113 |   dim3 block(512);
114 | 
115 |   if (losses.numel() == 0) {
116 |     THCudaCheck(cudaGetLastError());
117 |     return losses;
118 |   }
119 | 
120 |   AT_DISPATCH_FLOATING_TYPES_AND_HALF(
121 |       logits.type(), "SigmoidFocalLoss_forward", [&] {
122 |         SigmoidFocalLossForward<scalar_t><<<grid, block>>>(
123 |             losses_size, logits.contiguous().data<scalar_t>(),
124 |             targets.contiguous().data<long>(), num_classes, gamma, alpha,
125 |             num_samples, losses.data<scalar_t>());
126 |       });
127 |   THCudaCheck(cudaGetLastError());
128 |   return losses;
129 | }
130 | 
131 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,
132 |                                           const at::Tensor &targets,
133 |                                           const at::Tensor &d_losses,
134 |                                           const int num_classes,
135 |                                           const float gamma,
136 |                                           const float alpha) {
137 |   AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor");
138 |   AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor");
139 |   AT_ASSERTM(d_losses.type().is_cuda(), "d_losses must be a CUDA tensor");
140 | 
141 |   AT_ASSERTM(logits.dim() == 2, "logits should be NxClass");
142 | 
143 |   const int num_samples = logits.size(0);
144 |   AT_ASSERTM(logits.size(1) == num_classes,
145 |              "logits.size(1) should be num_classes");
146 | 
147 |   auto d_logits = at::zeros({num_samples, num_classes}, logits.options());
148 |   auto d_logits_size = num_samples * logits.size(1);
149 | 
150 |   dim3 grid(std::min(THCCeilDiv(d_logits_size, 512L), 4096L));
151 |   dim3 block(512);
152 | 
153 |   if (d_logits.numel() == 0) {
154 |     THCudaCheck(cudaGetLastError());
155 |     return d_logits;
156 |   }
157 | 
158 |   AT_DISPATCH_FLOATING_TYPES_AND_HALF(
159 |       logits.type(), "SigmoidFocalLoss_backward", [&] {
160 |         SigmoidFocalLossBackward<scalar_t><<<grid, block>>>(
161 |             d_logits_size, logits.contiguous().data<scalar_t>(),
162 |             targets.contiguous().data<long>(),
163 |             d_losses.contiguous().data<scalar_t>(), num_classes, gamma, alpha,
164 |             num_samples, d_logits.data<scalar_t>());
165 |       });
166 | 
167 |   THCudaCheck(cudaGetLastError());
168 |   return d_logits;
169 | }
170 | 


--------------------------------------------------------------------------------
/lib/optimer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.optim.optimizer import Optimizer, required
  3 | import math
  4 | 
  5 | 
  6 | class SGD(Optimizer):
  7 | 
  8 |     def __init__(self, params, lr=required, momentum=0, dampening=0,
  9 |                  weight_decay=0, nesterov=False ,warm_up = 1000 ):
 10 |         if lr is not required and lr < 0.0:
 11 |             raise ValueError("Invalid learning rate: {}".format(lr))
 12 |         if momentum < 0.0:
 13 |             raise ValueError("Invalid momentum value: {}".format(momentum))
 14 |         if weight_decay < 0.0:
 15 |             raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
 16 | 
 17 |         defaults = dict(lr=lr, momentum=momentum, dampening=dampening,
 18 |                         weight_decay=weight_decay, nesterov=nesterov)
 19 |         if nesterov and (momentum <= 0 or dampening != 0):
 20 |             raise ValueError("Nesterov momentum requires a momentum and zero dampening")
 21 |         self.setp_num = 0
 22 |         self.warm_up = warm_up
 23 |         self.warm_up_end = False
 24 |         super(SGD, self).__init__(params, defaults)
 25 | 
 26 |     def __setstate__(self, state):
 27 |         super(SGD, self).__setstate__(state)
 28 |         for group in self.param_groups:
 29 |             group.setdefault('nesterov', False)
 30 | 
 31 |     def step(self, closure=None):
 32 |         self.setp_num += 1
 33 |         loss = None
 34 |         if closure is not None:
 35 |             loss = closure()
 36 | 
 37 |         for group in self.param_groups:
 38 |             if self.setp_num <= self.warm_up and not self.warm_up_end :
 39 |                 lr = group['lr']*pow(self.setp_num/self.warm_up,1)
 40 |             else:
 41 |                 lr = group['lr']
 42 |                 self.warm_up_end = True
 43 | 
 44 |             weight_decay = group['weight_decay']
 45 |             momentum = group['momentum']
 46 |             dampening = group['dampening']
 47 |             nesterov = group['nesterov']
 48 | 
 49 |             for p in group['params']:
 50 |                 if p.grad is None:
 51 |                     continue
 52 |                 d_p = p.grad.data
 53 |                 if weight_decay != 0:
 54 |                     d_p.add_(weight_decay, p.data)
 55 |                 if momentum != 0:
 56 |                     param_state = self.state[p]
 57 |                     if 'momentum_buffer' not in param_state:
 58 |                         buf = param_state['momentum_buffer'] = torch.zeros_like(p.data)
 59 |                         buf.mul_(momentum).add_(d_p)
 60 |                     else:
 61 |                         buf = param_state['momentum_buffer']
 62 |                         buf.mul_(momentum).add_(1 - dampening, d_p)
 63 |                     if nesterov:
 64 |                         d_p = d_p.add(momentum, buf)
 65 |                     else:
 66 |                         d_p = buf
 67 | 
 68 |                 p.data.add_(-lr, d_p)
 69 |         return loss
 70 | 
 71 | class Adam(Optimizer):
 72 | 
 73 |     def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
 74 |                  weight_decay=0, amsgrad=False , warm_up = 1000 ):
 75 |         if not 0.0 <= lr:
 76 |             raise ValueError("Invalid learning rate: {}".format(lr))
 77 |         if not 0.0 <= eps:
 78 |             raise ValueError("Invalid epsilon value: {}".format(eps))
 79 |         if not 0.0 <= betas[0] < 1.0:
 80 |             raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
 81 |         if not 0.0 <= betas[1] < 1.0:
 82 |             raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
 83 |         defaults = dict(lr=lr, betas=betas, eps=eps,
 84 |                         weight_decay=weight_decay, amsgrad=amsgrad)
 85 |         self.setp_num = 0
 86 |         self.warm_up = warm_up
 87 |         self.warm_up_end = False
 88 |         super(Adam, self).__init__(params, defaults)
 89 | 
 90 |     def __setstate__(self, state):
 91 |         super(Adam, self).__setstate__(state)
 92 |         for group in self.param_groups:
 93 |             group.setdefault('amsgrad', False)
 94 | 
 95 |     def step(self, closure=None):
 96 |         """Performs a single optimization step.
 97 | 
 98 |         Arguments:
 99 |             closure (callable, optional): A closure that reevaluates the model
100 |                 and returns the loss.
101 |         """
102 |         self.setp_num += 1
103 |         loss = None
104 |         if closure is not None:
105 |             loss = closure()
106 | 
107 |         for group in self.param_groups:
108 | 
109 |             if self.setp_num <= self.warm_up and not self.warm_up_end :
110 |                 lr = group['lr']*pow(self.setp_num/self.warm_up,1)
111 |             else:
112 |                 lr = group['lr']
113 |                 self.warm_up_end = True
114 | 
115 |             for p in group['params']:
116 | 
117 |                 if p.grad is None:
118 |                     continue
119 |                 grad = p.grad.data
120 |                 if grad.is_sparse:
121 |                     raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
122 |                 amsgrad = group['amsgrad']
123 | 
124 |                 state = self.state[p]
125 | 
126 |                 # State initialization
127 |                 if len(state) == 0:
128 |                     state['step'] = 0
129 |                     # Exponential moving average of gradient values
130 |                     state['exp_avg'] = torch.zeros_like(p.data)
131 |                     # Exponential moving average of squared gradient values
132 |                     state['exp_avg_sq'] = torch.zeros_like(p.data)
133 |                     if amsgrad:
134 |                         # Maintains max of all exp. moving avg. of sq. grad. values
135 |                         state['max_exp_avg_sq'] = torch.zeros_like(p.data)
136 | 
137 |                 exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
138 |                 if amsgrad:
139 |                     max_exp_avg_sq = state['max_exp_avg_sq']
140 |                 beta1, beta2 = group['betas']
141 | 
142 |                 state['step'] += 1
143 | 
144 |                 if group['weight_decay'] != 0:
145 |                     grad = grad.add(group['weight_decay'], p.data)
146 | 
147 |                 # Decay the first and second moment running average coefficient
148 |                 exp_avg.mul_(beta1).add_(1 - beta1, grad)
149 |                 exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
150 |                 if amsgrad:
151 |                     # Maintains the maximum of all 2nd moment running avg. till now
152 |                     torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
153 |                     # Use the max. for normalizing running avg. of gradient
154 |                     denom = max_exp_avg_sq.sqrt().add_(group['eps'])
155 |                 else:
156 |                     denom = exp_avg_sq.sqrt().add_(group['eps'])
157 | 
158 |                 bias_correction1 = 1 - beta1 ** state['step']
159 |                 bias_correction2 = 1 - beta2 ** state['step']
160 |                 step_size = lr * math.sqrt(bias_correction2) / bias_correction1
161 | 
162 |                 p.data.addcdiv_(-step_size, exp_avg, denom)
163 | 
164 |         return loss


--------------------------------------------------------------------------------
/models/detector.py:
--------------------------------------------------------------------------------
  1 | from models.tensormask import TensorMask
  2 | from config import cfg as opt
  3 | from lib.utils import load_model,save_model
  4 | from lib.coco import COCO
  5 | import numpy as np
  6 | import torch
  7 | import os
  8 | import cv2
  9 | 
 10 | def affine_transform(pt, t):
 11 |     new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T
 12 |     new_pt = np.dot(t, new_pt)
 13 |     return new_pt[:2]
 14 | 
 15 | def cal_iou_np(boxes1, boxes2):
 16 |     boxes1 = np.array(boxes1)
 17 |     boxes2 = np.array(boxes2)
 18 |     boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
 19 |     boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
 20 |     left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
 21 |     right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
 22 |     inter_section = np.maximum(right_down - left_up, 0.0)
 23 |     inter_area = inter_section[..., 0] * inter_section[..., 1]
 24 |     union_area = boxes1_area + boxes2_area - inter_area
 25 |     IOU = 1.0 * inter_area / union_area
 26 |     return IOU
 27 | 
 28 | class Detector():
 29 |     def __init__(self,opt):
 30 |         self.model = TensorMask(backbone=opt.backbone, num_cls=opt.num_class,
 31 |                            base_window=opt.base_window,
 32 |                            freezeBN=opt.frezeBN, freezeLayers=opt.frezeLayer,
 33 |                            align_corners=opt.align_corners)
 34 |         self.model = load_model(self.model, opt.weights)
 35 |         self.model.eval()
 36 |         self.model.cuda()
 37 |         self.mean = COCO.mean
 38 |         self.std = COCO.std
 39 |         self.opt = opt
 40 | 
 41 |         self.strides = np.array([self.opt.base_stride * 2 ** i for i in range(self.opt.k + 1)])
 42 |         self.windows = np.array([self.opt.base_window * lamda for lamda in self.strides], np.int32)
 43 | 
 44 |         self.output_size = np.array(list(zip(self.opt.input_w // self.strides, self.opt.input_h // self.strides)))
 45 |         self.num_det = [output_w * output_h for output_w, output_h in self.output_size]
 46 |         self.det_offset = np.cumsum(self.num_det)
 47 | 
 48 |     def run(self,image,vis=True):
 49 |         if isinstance(image,str):
 50 |             image = cv2.imread(image)
 51 |         show = image.copy()
 52 |         image,trans_output = self.prepare_image(image)
 53 |         input = torch.from_numpy(image).cuda()
 54 |         output = self.model(input)
 55 |         box,mask = self.decode(output,show.shape[:2],trans_output)
 56 |         if vis:
 57 |             self.show_img(show,box,mask)
 58 |         return box,mask
 59 | 
 60 |     def prepare_image(self,image):
 61 |         height, width = image.shape[0], image.shape[1]
 62 |         ar = width/height
 63 |         new_h,new_w = (self.opt.input_h,ar*self.opt.input_h) if ar < 1 else (self.opt.input_w/ar,self.opt.input_w)
 64 |         dx, dy = (self.opt.input_w - new_w) / 2, (self.opt.input_h - new_h) / 2
 65 |         src = np.array([[0, 0], [0, height], [width, 0]], dtype=np.float32)
 66 |         dst = np.array([[dx, dy], [dx, new_h + dy], [new_w + dx, dy]], dtype=np.float32)
 67 |         trans_input = cv2.getAffineTransform(src, dst)
 68 |         trans_output = cv2.getAffineTransform(dst, src)
 69 |         image = cv2.warpAffine(image, trans_input, (self.opt.input_w, self.opt.input_h),
 70 |                                flags=cv2.INTER_LINEAR, borderValue=(0, 0, 0))
 71 |         image = (image.astype(np.float32) / 255.)
 72 |         image = (image- self.mean) / self.std
 73 |         image = image.transpose(2, 0, 1)
 74 |         image = np.expand_dims(image,0).astype(np.float32)
 75 |         return image,trans_output
 76 | 
 77 |     def decode(self,output,img_hw,trans_ouput,method = 'nms',iou_threshold=0.45,sigma=0.3):
 78 |         socres,cls = torch.max(output['cls'].sigmoid_(),dim=-1)
 79 |         socres = socres.detach().cpu().numpy()
 80 |         cls = cls.detach().cpu().numpy()
 81 |         box = output['box'].detach().cpu().numpy()
 82 |         seg = [output['%d' % i].sigmoid_().detach().cpu().numpy() for i in range(self.opt.k + 1)]
 83 |         topk_inds = np.where(socres > self.opt.vis_thresh)
 84 |         result = []
 85 |         for det_num in topk_inds[1]:
 86 |             p = socres[0, det_num]
 87 |             cls_index = cls[0,det_num]
 88 |             b = box[0, det_num, :]
 89 |             for id, num in enumerate(self.det_offset):
 90 |                 if num > det_num:
 91 |                     break
 92 |             offset = det_num - self.det_offset[id - 1] if id > 0 else det_num
 93 |             width, hight = self.output_size[id]
 94 | 
 95 |             ### ct_int_feat
 96 |             y = int(offset / width)
 97 |             x = int(offset % width)
 98 | 
 99 |             b[0:2] = (x + 0.5 - b[0] )* self.strides[id],( y + 0.5 - b[1] )* self.strides[id]
100 |             b[2:4] = (x + 0.5 + b[2] )* self.strides[id],( y + 0.5 + b[3] )* self.strides[id]
101 |             b[0:2] = affine_transform(b[0:2],trans_ouput).astype(int)
102 |             b[2:4] = affine_transform(b[2:4], trans_ouput).astype(int)
103 |             result.append([*b,p,cls_index,x,y,id])
104 | 
105 |         result = np.array(result) ## x1 y1 x2 y2 p cls ct_feat_x ct_feat_y feat_id
106 | 
107 |         ### use box to nms
108 |         class_index = result[:,5] if len(result) > 0 else []
109 |         classes_in_img = list(set(class_index))
110 |         best_bboxes = []
111 |         for cls in classes_in_img:
112 |             cls_mask = (class_index == cls)
113 |             cls_bboxes = result[cls_mask]
114 |             while len(cls_bboxes) > 0:
115 |                 max_ind = np.argmax(cls_bboxes[:, 4])
116 |                 best_bbox = cls_bboxes[max_ind]
117 |                 best_bboxes.append(best_bbox)
118 |                 cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]])
119 |                 iou = cal_iou_np(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
120 |                 assert method in ['nms', 'soft-nms']
121 |                 weight = np.ones((len(iou),), dtype=np.float32)
122 |                 if method == 'nms':
123 |                     iou_mask = iou > iou_threshold
124 |                     weight[iou_mask] = 0.0
125 |                 if method == 'soft-nms':
126 |                     weight = np.exp(-(1.0 * iou ** 2 / sigma))
127 |                 cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
128 |                 score_mask = cls_bboxes[:, 4] >  self.opt.vis_thresh
129 |                 cls_bboxes = cls_bboxes[score_mask]
130 |         mask_res= []
131 |         for det in best_bboxes:
132 |             mask = np.zeros([self.opt.input_h,self.opt.input_w],np.uint8)
133 |             ct_feat_x,ct_feat_y,feat_id = int(det[-3]),int(det[-2]),int(det[-1])
134 |             x, y = int((ct_feat_x + 0.5) * self.strides[feat_id]), int((ct_feat_y + 0.5) * self.strides[feat_id])
135 |             window_seg = seg[feat_id][0, ct_feat_y, ct_feat_x, :, :]
136 |             paste_x, paste_y, paste_x1, paste_y1 = x - self.windows[feat_id] // 2,\
137 |                                                    y - self.windows[feat_id] // 2, \
138 |                                                    x + self.windows[feat_id] // 2,\
139 |                                                    y + self.windows[feat_id] // 2
140 | 
141 |             window_x, window_y, window_x1, window_y1 = max(-paste_x, 0), max(-paste_y, 0), \
142 |                                                        self.windows[feat_id] - max(0, paste_x1 - self.opt.input_w), \
143 |                                                        self.windows[feat_id] - max(0, paste_y1 - self.opt.input_h)
144 |             paste_x, paste_y, paste_x1, paste_y1 = max(paste_x, 0), max(paste_y, 0), \
145 |                                                    min(paste_x1, self.opt.input_w), \
146 |                                                    min(paste_y1,self.opt.input_h)
147 |             window_seg = cv2.resize(window_seg, (self.windows[feat_id],self. windows[feat_id]))
148 |             window_seg = (window_seg > 0.5).astype(np.uint8)
149 |             mask[paste_y:paste_y1, paste_x:paste_x1] = window_seg[window_y:window_y1, window_x:window_x1]
150 |             mask = cv2.warpAffine(mask, trans_ouput,
151 |                                      (img_hw[1], img_hw[0]),
152 |                                      flags=cv2.INTER_LINEAR)
153 | 
154 |             mask_res.append(mask)
155 |         return best_bboxes,mask_res
156 | 
157 |     def show_img(self,img,box,mask):
158 |         for i in range(len(box)):
159 |             det = box[i].astype(np.int)
160 |             if self.opt.show_box:
161 |                 cv2.rectangle(img, (det[0], det[1]), (det[2], det[3]), (255, 0, 0), 2)
162 |             color = np.array([[np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)]])
163 |             seg = mask[i]==1
164 |             img[seg] = img[seg] * 0.2 + color * 0.8
165 | 
166 |         cv2.imshow('result',img)
167 | 
168 | 
169 | if __name__ == '__main__':
170 |     os.environ['CUDA_VISIBLE_DEVICES'] = '3'
171 |     opt.weights = '/data/yoloCao/pycharmProjects/tensormask/exp/coco_person/model_last.pth'
172 |     detector = Detector(opt)
173 |     img = '/data/yoloCao/DataSet/VOC2007/JPEGImages/2007_000027.jpg'
174 |     opt.vis_thresh = 0.5
175 |     detector.run(img)
176 | 
177 | 


--------------------------------------------------------------------------------
/lib/coco.py:
--------------------------------------------------------------------------------
  1 | import pycocotools.coco as coco
  2 | import pycocotools.cocoeval
  3 | import numpy as np
  4 | import os
  5 | import cv2
  6 | from torch.utils.data import Dataset
  7 | 
  8 | 
  9 | def affine_transform(pt, t):
 10 |     new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T
 11 |     new_pt = np.dot(t, new_pt)
 12 |     return new_pt[:2]
 13 | 
 14 | class COCO(Dataset):
 15 |     mean = np.array([0.40789654, 0.44719302, 0.47026115],
 16 |                     dtype=np.float32).reshape(1, 1, 3)
 17 |     std = np.array([0.28863828, 0.27408164, 0.27809835],
 18 |                    dtype=np.float32).reshape(1, 1, 3)
 19 |     def __init__(self, cfg, split = 'train',augment = True):
 20 |         super(COCO, self).__init__()
 21 |         self.data_dir = cfg.data_dir
 22 |         self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split))
 23 |         self.annot_path = os.path.join(
 24 |             self.data_dir, 'annotations',
 25 |             'instances_{}2017.json').format(split)
 26 |         self.split = split
 27 |         print('==> initializing coco 2017 {} data.'.format(split))
 28 |         self.coco = coco.COCO(self.annot_path)
 29 |         self.images = self.coco.getImgIds()
 30 |         self.num_samples = len(self.images)
 31 | 
 32 |         self.class_name = [
 33 |             '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
 34 |             'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
 35 |             'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
 36 |             'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
 37 |             'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
 38 |             'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
 39 |             'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
 40 |             'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
 41 |             'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
 42 |             'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
 43 |             'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
 44 |             'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
 45 |             'scissors', 'teddy bear', 'hair drier', 'toothbrush']
 46 |         self._valid_ids = [
 47 |             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
 48 |             14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
 49 |             24, 25, 27, 28, 31, 32, 33, 34, 35, 36,
 50 |             37, 38, 39, 40, 41, 42, 43, 44, 46, 47,
 51 |             48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
 52 |             58, 59, 60, 61, 62, 63, 64, 65, 67, 70,
 53 |             72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
 54 |             82, 84, 85, 86, 87, 88, 89, 90]
 55 |         if cfg.class_name != '*' :
 56 |             self._valid_ids = [self.class_name.index(cfg.class_name)]
 57 |             self.class_name = [cfg.class_name]
 58 |             catIds = self.coco.getCatIds(self.class_name[-1])
 59 |             assert catIds == self._valid_ids
 60 |             self.images = self.coco.getImgIds(self.images, catIds)
 61 |             self.num_samples = len(self.images)
 62 | 
 63 |         self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
 64 |         self.input_w = cfg.input_w
 65 |         self.input_h = cfg.input_h
 66 |         self.base_stride = cfg.base_stride
 67 |         self.base_window = cfg.base_window
 68 |         self.k = cfg.k
 69 |         self.num_class = len(self.class_name)
 70 | 
 71 |         self.augment=augment
 72 |         self.max_objs = cfg.max_objs
 73 |         self.jitter = cfg.jitter
 74 |         self.cfg = cfg
 75 |         if not self.augment:
 76 |             self.jitter = 0
 77 |         print('Loaded {} {} samples'.format(split, self.num_samples))
 78 | 
 79 |     def __len__(self):
 80 |         return self.num_samples
 81 | 
 82 | 
 83 |     def _coco_box_to_bbox(self, box):
 84 |         bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],dtype=np.float32)
 85 |         return bbox
 86 | 
 87 |     def get_image_name(self,img_id):
 88 |         return os.path.join(self.img_dir,self.coco.loadImgs(ids=[self.images[img_id]])[0]['file_name']).strip()
 89 | 
 90 |     def _to_float(self, x):
 91 |         return float("{:.2f}".format(x))
 92 | 
 93 |     def __getitem__(self, index):
 94 |         img_id = self.images[index]
 95 |         file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
 96 |         img_path = os.path.join(self.img_dir, file_name)
 97 |         ann_ids = self.coco.getAnnIds(imgIds=[img_id])
 98 |         anns = self.coco.loadAnns(ids=ann_ids)
 99 |         anns = list(filter(lambda x: x['category_id'] in self._valid_ids and x['iscrowd'] != 1, anns))
100 |         image = cv2.imread(img_path)
101 | 
102 |         ## augment
103 |         height, width = image.shape[0], image.shape[1]
104 |         dw, dh = self.jitter * width, self.jitter * height
105 |         new_ar = (width + np.random.uniform(-dw, dw)) / (height + np.random.uniform(-dh, dh))
106 |         sclae = 1
107 |         if new_ar < 1:
108 |             new_h = sclae * self.input_h
109 |             new_w = new_ar * new_h
110 |         else:
111 |             new_w = sclae * self.input_w
112 |             new_h = new_w / new_ar
113 | 
114 |         dx, dy = (np.random.uniform(0, self.input_w - new_w), np.random.uniform(0, self.input_h - new_h)) \
115 |             if self.augment else ((self.input_w - new_w) / 2, (self.input_h - new_h) / 2)
116 | 
117 |         flipped = False
118 |         if np.random.random() < 0.5  and self.augment:
119 |             image = np.copy(image[:, ::-1, :])
120 |             flipped = True
121 | 
122 |         src = np.array([[0, 0], [0, height], [width, 0]], dtype=np.float32)
123 |         dst = np.array([[dx, dy], [dx, new_h + dy], [new_w + dx, dy]], dtype=np.float32)
124 |         trans_input = cv2.getAffineTransform(src, dst)
125 |         image = cv2.warpAffine(image, trans_input, (self.input_w, self.input_h),
126 |                                flags=cv2.INTER_LINEAR, borderValue=(0, 0, 0))
127 |         show = image.copy()
128 |         image = (image.astype(np.float32) / 255.)
129 |         image = (image- self.mean) / self.std
130 |         image = image.transpose(2, 0, 1)
131 | 
132 |         strides = np.array([self.base_stride*2**i for i in range(self.k+1)])
133 |         windows = np.array([self.base_window*lamda for lamda in strides],np.int32)
134 | 
135 |         output_size = np.array(list(zip(self.input_w // strides, self.input_h // strides)))
136 |         num_det = [output_w*output_h for output_w, output_h in output_size]
137 |         det_offset = np.cumsum(num_det)
138 |         label_conf = np.zeros((sum(num_det)),dtype=np.int64)
139 |         xywh = np.zeros((self.max_objs, 7), dtype=np.float32) # x1 y1 x2 y2 ct_x ct_y stride
140 |         ind = np.zeros((self.max_objs), dtype=np.int64)
141 |         reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
142 | 
143 |         seg = [np.zeros((self.max_objs,window//self.base_stride,window//self.base_stride),dtype=np.float32) for window in windows]
144 |         seg_ind = [np.zeros((self.max_objs),dtype=np.int64) for _ in windows]
145 |         seg_mask = [np.zeros((self.max_objs),dtype=np.uint8) for _ in windows]
146 |         num_objs = min(len(anns),self.max_objs)
147 | 
148 |         if num_objs > 0 :
149 |             np.random.shuffle(anns)
150 |         for k in range(num_objs):
151 |             ann = anns[k]
152 |             bbox = self._coco_box_to_bbox(ann['bbox'])
153 |             cls_id = int(self.cat_ids[ann['category_id']])
154 |             segment = self.coco.annToMask(ann)
155 |             if flipped:
156 |                 bbox[[0, 2]] = width - bbox[[2, 0]] - 1
157 |                 segment = segment[:, ::-1]
158 |             bbox[:2] = affine_transform(bbox[:2], trans_input)
159 |             bbox[2:] = affine_transform(bbox[2:], trans_input)
160 |             bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.input_w - 1)
161 |             bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.input_h - 1)
162 | 
163 |             w, h = bbox[2:] - bbox[:2]
164 |             max_edge = max(w, h)
165 |             min_edge = min(w, h)
166 |             ratio = max_edge / windows
167 |             window_mask = (ratio >= 0.5) * (ratio <= 1.)  ## window > max(w,h) > window/2
168 |             best_window = windows[window_mask]
169 |             if len(best_window) == 0 and \
170 |                     min_edge > 0 and \
171 |                     min_edge < windows[0]:  ### for small guys
172 |                 best_window = [windows[0]]
173 |                 window_mask[0] = True
174 | 
175 |             feat_stride = strides[window_mask]
176 |             feat_size = output_size[window_mask]
177 |             window_offset = det_offset[window_mask]
178 |             if len(best_window) > 0 and min_edge > 0:  ## min_edge must > 0
179 |                 segment = cv2.warpAffine(segment, trans_input,
180 |                                          (self.input_w, self.input_h),
181 |                                          flags=cv2.INTER_LINEAR)
182 |                 ct = np.array(
183 |                     [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
184 | 
185 |                 for window_id in range(len(best_window)):
186 |                     best_window_size = best_window[window_id]
187 |                     feat_w, feat_h = feat_size[window_id]
188 |                     stride = feat_stride[window_id]
189 | 
190 |                     xx, yy = np.arange(0, feat_w), np.arange(0, feat_h)
191 |                     xx, yy = (xx + 0.5) * stride, (yy + 0.5) * stride
192 |                     ct_feat_x, ct_feat_y = np.argmin(np.abs(ct[0] - xx)), np.argmin(
193 |                         np.abs(ct[1] - yy))  ## window ct close to box ct
194 |                     ct_img_x, ct_img_y = int(xx[ct_feat_x]), int(yy[ct_feat_y])
195 |                     paded_segmnet = np.pad(segment, ((best_window_size // 2, best_window_size // 2),
196 |                                                      (best_window_size // 2, best_window_size // 2)), 'constant',
197 |                                            constant_values=0)
198 |                     window_segment = paded_segmnet[ct_img_y: ct_img_y + best_window_size,
199 |                                      ct_img_x: ct_img_x + best_window_size]
200 | 
201 |                     feat_offset = window_offset[window_id] - feat_w * feat_h
202 |                     output_offset = ct_feat_y * feat_w + ct_feat_x
203 |                     label_conf[feat_offset + output_offset] = (cls_id + 1)
204 | 
205 |                     xywh[k, 0:4] = bbox[0:4]
206 |                     xywh[k, 4:6] = ct_feat_x, ct_feat_y
207 |                     xywh[k, 6] = stride
208 | 
209 |                     ind[k] = feat_offset + output_offset
210 |                     reg_mask[k] = 1
211 | 
212 |                     window_segment = cv2.resize(window_segment, (best_window_size // self.base_stride,
213 |                                                                  best_window_size // self.base_stride))
214 |                     window_index = windows.tolist().index(best_window_size)
215 |                     seg[window_index][k] = window_segment.astype(np.float32).copy()
216 | 
217 |                     seg_ind[window_index][k] = output_offset
218 |                     seg_mask[window_index][k] = 1
219 | 
220 |         ret = {'input':image ,'cls':label_conf,'ind': ind, 'xywh':xywh ,'reg_mask':reg_mask}
221 |         for i in range(len(windows)):
222 |             ret['seg_%d'%i] = seg[i]
223 |             ret['seg_ind_%d' % i] = seg_ind[i]
224 |             ret['seg_mask_%d' % i] = seg_mask[i]
225 | 
226 |         if self.cfg.test :
227 |             ret['img'] = show
228 | 
229 |         return ret
230 | 
231 | if __name__ == '__main__':
232 |     from config import cfg
233 |     import torch
234 |     data = COCO(cfg,split='val',augment=False)
235 | 
236 |     for i,t in enumerate(data):
237 |         print(i)
238 | 


--------------------------------------------------------------------------------
/lib/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import cv2
  4 | from torch.utils.data import Sampler,RandomSampler,SequentialSampler,DataLoader
  5 | import os
  6 | import time
  7 | import sys
  8 | import math
  9 | from torch.optim import Optimizer
 10 | 
 11 | class AverageMeter(object):
 12 |     """Computes and stores the average and current value"""
 13 |     def __init__(self):
 14 |         self.reset()
 15 | 
 16 |     def reset(self):
 17 |         self.val = 0
 18 |         self.avg = 0
 19 |         self.sum = 0
 20 |         self.count = 0
 21 | 
 22 |     def update(self, val, n=1):
 23 |         self.val = val
 24 |         self.sum += val * n
 25 |         self.count += n
 26 |         if self.count > 0:
 27 |           self.avg = self.sum / self.count
 28 | 
 29 | 
 30 | def load_model(model, model_path, optimizer=None, resume=False,
 31 |                lr=None, lr_step=None):
 32 |     start_epoch = 0
 33 |     checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
 34 |     print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
 35 |     state_dict_ = checkpoint['state_dict']
 36 |     state_dict = {}
 37 | 
 38 |     # convert data_parallal to model
 39 |     for k in state_dict_:
 40 |         if k.startswith('module') and not k.startswith('module_list'):
 41 |             state_dict[k[7:]] = state_dict_[k]
 42 |         else:
 43 |             state_dict[k] = state_dict_[k]
 44 |     model_state_dict = model.state_dict()
 45 | 
 46 |     # check loaded parameters and created model parameters
 47 |     msg = 'If you see this, your model does not fully load the ' + \
 48 |           'pre-trained weight. Please make sure ' + \
 49 |           'you have correctly specified --arch xxx ' + \
 50 |           'or set the correct --num_classes for your own dataset.'
 51 |     for k in state_dict:
 52 |         if k in model_state_dict:
 53 |             if state_dict[k].shape != model_state_dict[k].shape:
 54 |                 print('Skip loading parameter {}, required shape{}, ' \
 55 |                       'loaded shape{}. {}'.format(
 56 |                     k, model_state_dict[k].shape, state_dict[k].shape, msg))
 57 |                 state_dict[k] = model_state_dict[k]
 58 |         else:
 59 |             print('Drop parameter {}.'.format(k) + msg)
 60 |     for k in model_state_dict:
 61 |         if not (k in state_dict):
 62 |             print('No param {}.'.format(k) + msg)
 63 |             state_dict[k] = model_state_dict[k]
 64 |     model.load_state_dict(state_dict, strict=False)
 65 | 
 66 |     # resume optimizer parameters
 67 |     if optimizer is not None and resume:
 68 |         if 'optimizer' in checkpoint:
 69 |             optimizer.load_state_dict(checkpoint['optimizer'])
 70 |             start_epoch = checkpoint['epoch']
 71 |             start_lr = lr
 72 |             for step in lr_step:
 73 |                 if start_epoch >= step:
 74 |                     start_lr *= 0.1
 75 |             for param_group in optimizer.param_groups:
 76 |                 param_group['lr'] = start_lr
 77 |             print('Resumed optimizer with start lr', start_lr)
 78 |         else:
 79 |             print('No optimizer parameters in checkpoint.')
 80 |     if optimizer is not None:
 81 |         return model, optimizer, start_epoch
 82 |     else:
 83 |         return model
 84 | 
 85 | 
 86 | def save_model(path, epoch, model, optimizer=None):
 87 |     if isinstance(model, torch.nn.DataParallel):
 88 |         state_dict = model.module.state_dict()
 89 |     else:
 90 |         state_dict = model.state_dict()
 91 |     data = {'epoch': epoch,
 92 |             'state_dict': state_dict}
 93 |     if not (optimizer is None):
 94 |         data['optimizer'] = optimizer.state_dict()
 95 |     torch.save(data, path)
 96 | 
 97 | 
 98 | 
 99 | 
100 | class BatchSampler(object):
101 |     def __init__(self, sampler, batch_size, drop_last,multiscale_step=None,img_sizes = None):
102 |         if not isinstance(sampler, Sampler):
103 |             raise ValueError("sampler should be an instance of "
104 |                              "torch.utils.data.Sampler, but got sampler={}"
105 |                              .format(sampler))
106 |         if not isinstance(drop_last, bool):
107 |             raise ValueError("drop_last should be a boolean value, but got "
108 |                              "drop_last={}".format(drop_last))
109 |         self.sampler = sampler
110 |         self.batch_size = batch_size
111 |         self.drop_last = drop_last
112 |         if multiscale_step is not None and multiscale_step < 1 :
113 |             raise ValueError("multiscale_step should be > 0, but got "
114 |                              "multiscale_step={}".format(multiscale_step))
115 |         if multiscale_step is not None and img_sizes is None:
116 |             raise ValueError("img_sizes must a list, but got img_sizes={} ".format(img_sizes))
117 | 
118 |         self.multiscale_step = multiscale_step
119 |         self.img_sizes = np.array(img_sizes)
120 | 
121 |     def __iter__(self):
122 |         num_batch = 0
123 |         batch = []
124 |         size = [608,608]
125 |         for idx in self.sampler:
126 |             batch.append([idx,*size])
127 |             if len(batch) == self.batch_size:
128 |                 yield batch
129 |                 num_batch+=1
130 |                 batch = []
131 |                 if self.multiscale_step and num_batch % self.multiscale_step == 0 :
132 |                     size = self.img_sizes[np.random.randint(0,len(self.img_sizes))]
133 |         if len(batch) > 0 and not self.drop_last:
134 |             yield batch
135 | 
136 |     def __len__(self):
137 |         if self.drop_last:
138 |             return len(self.sampler) // self.batch_size
139 |         else:
140 |             return (len(self.sampler) + self.batch_size - 1) // self.batch_size
141 | 
142 | class AdaBound(Optimizer):
143 |     """Implements AdaBound algorithm.
144 |     It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
145 |     Arguments:
146 |         params (iterable): iterable of parameters to optimize or dicts defining
147 |             parameter groups
148 |         lr (float, optional): Adam learning rate (default: 1e-3)
149 |         betas (Tuple[float, float], optional): coefficients used for computing
150 |             running averages of gradient and its square (default: (0.9, 0.999))
151 |         final_lr (float, optional): final (SGD) learning rate (default: 0.1)
152 |         gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
153 |         eps (float, optional): term added to the denominator to improve
154 |             numerical stability (default: 1e-8)
155 |         weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
156 |         amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
157 |     .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
158 |         https://openreview.net/forum?id=Bkg3g2R9FX
159 |     """
160 | 
161 |     def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
162 |                  eps=1e-8, weight_decay=0, amsbound=False):
163 |         if not 0.0 <= lr:
164 |             raise ValueError("Invalid learning rate: {}".format(lr))
165 |         if not 0.0 <= eps:
166 |             raise ValueError("Invalid epsilon value: {}".format(eps))
167 |         if not 0.0 <= betas[0] < 1.0:
168 |             raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
169 |         if not 0.0 <= betas[1] < 1.0:
170 |             raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
171 |         if not 0.0 <= final_lr:
172 |             raise ValueError("Invalid final learning rate: {}".format(final_lr))
173 |         if not 0.0 <= gamma < 1.0:
174 |             raise ValueError("Invalid gamma parameter: {}".format(gamma))
175 |         defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
176 |                         weight_decay=weight_decay, amsbound=amsbound)
177 |         super(AdaBound, self).__init__(params, defaults)
178 | 
179 |         self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
180 | 
181 |     def __setstate__(self, state):
182 |         super(AdaBound, self).__setstate__(state)
183 |         for group in self.param_groups:
184 |             group.setdefault('amsbound', False)
185 | 
186 |     def step(self, closure=None):
187 |         """Performs a single optimization step.
188 |         Arguments:
189 |             closure (callable, optional): A closure that reevaluates the model
190 |                 and returns the loss.
191 |         """
192 |         loss = None
193 |         if closure is not None:
194 |             loss = closure()
195 | 
196 |         for group, base_lr in zip(self.param_groups, self.base_lrs):
197 |             for p in group['params']:
198 |                 if p.grad is None:
199 |                     continue
200 |                 grad = p.grad.data
201 |                 if grad.is_sparse:
202 |                     raise RuntimeError(
203 |                         'Adam does not support sparse gradients, please consider SparseAdam instead')
204 |                 amsbound = group['amsbound']
205 | 
206 |                 state = self.state[p]
207 | 
208 |                 # State initialization
209 |                 if len(state) == 0:
210 |                     state['step'] = 0
211 |                     # Exponential moving average of gradient values
212 |                     state['exp_avg'] = torch.zeros_like(p.data)
213 |                     # Exponential moving average of squared gradient values
214 |                     state['exp_avg_sq'] = torch.zeros_like(p.data)
215 |                     if amsbound:
216 |                         # Maintains max of all exp. moving avg. of sq. grad. values
217 |                         state['max_exp_avg_sq'] = torch.zeros_like(p.data)
218 | 
219 |                 exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
220 |                 if amsbound:
221 |                     max_exp_avg_sq = state['max_exp_avg_sq']
222 |                 beta1, beta2 = group['betas']
223 | 
224 |                 state['step'] += 1
225 | 
226 |                 if group['weight_decay'] != 0:
227 |                     grad = grad.add(group['weight_decay'], p.data)
228 | 
229 |                 # Decay the first and second moment running average coefficient
230 |                 exp_avg.mul_(beta1).add_(1 - beta1, grad)
231 |                 exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
232 |                 if amsbound:
233 |                     # Maintains the maximum of all 2nd moment running avg. till now
234 |                     torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
235 |                     # Use the max. for normalizing running avg. of gradient
236 |                     denom = max_exp_avg_sq.sqrt().add_(group['eps'])
237 |                 else:
238 |                     denom = exp_avg_sq.sqrt().add_(group['eps'])
239 | 
240 |                 bias_correction1 = 1 - beta1 ** state['step']
241 |                 bias_correction2 = 1 - beta2 ** state['step']
242 |                 step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
243 | 
244 |                 # Applies bounds on actual learning rate
245 |                 # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
246 |                 final_lr = group['final_lr'] * group['lr'] / base_lr
247 |                 lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
248 |                 upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
249 |                 step_size = torch.full_like(denom, step_size)
250 |                 step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
251 | 
252 |                 p.data.add_(-step_size)
253 | 
254 |         return loss
255 | 
256 | 
257 | 
258 | USE_TENSORBOARD = True
259 | try:
260 |     import tensorboardX
261 | 
262 |     print('Using tensorboardX')
263 | except:
264 |     USE_TENSORBOARD = False
265 | 
266 | 
267 | class Logger(object):
268 |     def __init__(self, opt):
269 |         """Create a summary writer logging to log_dir."""
270 |         if not os.path.exists(opt.save_dir):
271 |             os.makedirs(opt.save_dir)
272 | 
273 | 
274 |         time_str = time.strftime('%Y-%m-%d-%H-%M')
275 | 
276 |         args = dict((name, getattr(opt, name)) for name in dir(opt)
277 |                     if not name.startswith('_'))
278 |         file_name = os.path.join(opt.save_dir, 'opt.txt')
279 |         with open(file_name, 'wt') as opt_file:
280 |             opt_file.write('==> torch version: {}\n'.format(torch.__version__))
281 |             opt_file.write('==> cudnn version: {}\n'.format(
282 |                 torch.backends.cudnn.version()))
283 |             opt_file.write('==> Cmd:\n')
284 |             opt_file.write(str(sys.argv))
285 |             opt_file.write('\n==> Opt:\n')
286 |             for k, v in sorted(args.items()):
287 |                 opt_file.write('  %s: %s\n' % (str(k), str(v)))
288 | 
289 |         log_dir = opt.save_dir + '/logs_{}'.format(time_str)
290 |         if USE_TENSORBOARD:
291 |             self.writer = tensorboardX.SummaryWriter(log_dir=log_dir)
292 |         else:
293 |             if not os.path.exists(os.path.dirname(log_dir)):
294 |                 os.mkdir(os.path.dirname(log_dir))
295 |             if not os.path.exists(log_dir):
296 |                 os.mkdir(log_dir)
297 |         self.log = open(log_dir + '/log.txt', 'w')
298 |         try:
299 |             os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir))
300 |         except:
301 |             pass
302 |         self.start_line = True
303 | 
304 |     def write(self, txt):
305 |         if self.start_line:
306 |             time_str = time.strftime('%Y-%m-%d-%H-%M')
307 |             self.log.write('{}: {}'.format(time_str, txt))
308 |         else:
309 |             self.log.write(txt)
310 |         self.start_line = False
311 |         if '\n' in txt:
312 |             self.start_line = True
313 |             self.log.flush()
314 | 
315 |     def close(self):
316 |         self.log.close()
317 | 
318 |     def scalar_summary(self, tag, value, step):
319 |         """Log a scalar variable."""
320 |         if USE_TENSORBOARD:
321 |             self.writer.add_scalar(tag, value, step)


--------------------------------------------------------------------------------
/models/ops/align2nat/src/swap_align2nat_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include <ATen/ATen.h>
  2 | #include <THC/THCAtomics.cuh>
  3 | #include <ATen/AccumulateType.h>
  4 | 
  5 | 
  6 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
  7 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
  8 |        i += blockDim.x * gridDim.x)
  9 | 
 10 | #define THREADS_PER_BLOCK 1024
 11 | 
 12 | inline int GET_BLOCKS(const int N) {
 13 |   int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
 14 |   int max_block_num = 65000;
 15 |   return min(optimal_block_num, max_block_num);
 16 | }
 17 | 
 18 | template <typename accscalar_t>
 19 | __device__ __forceinline__ static accscalar_t area_pixel_compute_source_index(
 20 |     accscalar_t scale,
 21 |     int dst_index,
 22 |     bool align_corners) {
 23 |   if (align_corners) {
 24 |     return scale * dst_index;
 25 |   } else {
 26 |     accscalar_t src_idx = scale * (dst_index + static_cast<accscalar_t>(0.5)) -
 27 |         static_cast<accscalar_t>(0.5);
 28 |     // See Note[Follow Opencv resize logic]
 29 |     return (src_idx < static_cast<accscalar_t>(0))
 30 |         ? static_cast<accscalar_t>(0)
 31 |         : src_idx;
 32 |   }
 33 | }
 34 | 
 35 | __device__ __forceinline__ size_t
 36 | loaction(const size_t n,const size_t v,const size_t u,const size_t y,const size_t x,
 37 |     const size_t V,const size_t U,const size_t H,const size_t W) {
 38 |   return x + W*(y + H*(u + U*(v + V*n)));
 39 | }
 40 | 
 41 | template <typename scalar_t>
 42 | __device__ __forceinline__ scalar_t get_val(const scalar_t*data,
 43 | const size_t n,const size_t v,const size_t u,const size_t y,const size_t x,
 44 |                const size_t V,const size_t U,const size_t H,const size_t W,const scalar_t pad_val) {
 45 |     if (x <0 || x >= W || y < 0 || y >= H ){
 46 |         return pad_val;
 47 |     }else{
 48 |         return data[x + W*(y + H*(u + U*(v + V*n)))];
 49 |     }
 50 | }
 51 | 
 52 | template <typename scalar_t,typename accscalar_t>
 53 | __global__ void SwapAlign2NatForward(const int nthreads, const scalar_t *bottom_data,scalar_t *top_data,
 54 |                                 const accscalar_t scaleV,const accscalar_t scaleU,
 55 |                                 const int newV,const int newU,
 56 |                                 const int newH,const int newW,
 57 |                                 const accscalar_t scaleH,const accscalar_t scaleW,
 58 |                                 const int orgV,const int orgU,
 59 |                                 const int orgH,const int orgW,
 60 |                                 const int alpha,const bool align_corners,const scalar_t pad_val
 61 |                                 ) {
 62 |     const float v_offset =  -newV/2;
 63 |     const float u_offset =  -newU/2;
 64 |     int n,ov,ou,oh,ow,bottom_h,bottom_w;
 65 |     CUDA_1D_KERNEL_LOOP(index,nthreads){
 66 |         // (n, ov, ou, oh, ow) is an element in the top_data
 67 |         ow = index % newW;
 68 |         oh = (index / newW) % newH;
 69 |         ou = (index / newW / newH) % newU;
 70 |         ov = (index / newW / newH / newU) % newV;
 71 |         n  =  index / newW / newH / newU / newV;
 72 |         if (newV==orgV && newU==orgU && newW==orgW && newH==orgH){
 73 |             bottom_h =  oh + alpha * (ov + v_offset);
 74 |             bottom_w =  ow + alpha * (ou + u_offset);
 75 |             top_data[index] = get_val(bottom_data,n,ov,ou,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val);
 76 | 
 77 |         } else {
 78 |             //  h,w
 79 | 
 80 |             const accscalar_t h1r = area_pixel_compute_source_index<accscalar_t>(
 81 |                 scaleH, oh, align_corners);
 82 |             const int h1 = h1r;
 83 |             const int h1p = (h1 < orgH - 1) ? 1 : 0;
 84 |             const accscalar_t h1lambda = h1r - h1;
 85 |             const accscalar_t h0lambda = static_cast<accscalar_t>(1) - h1lambda;
 86 |             //
 87 |             const accscalar_t w1r = area_pixel_compute_source_index<accscalar_t>(
 88 |                 scaleW, ow, align_corners);
 89 |             const int w1 = w1r;
 90 |             const int w1p = (w1 < orgW - 1) ? 1 : 0;
 91 |             const accscalar_t w1lambda = w1r - w1;
 92 |             const accscalar_t w0lambda = static_cast<accscalar_t>(1) - w1lambda;
 93 | 
 94 | 
 95 |             // v,u
 96 | 
 97 |             const accscalar_t v1r = area_pixel_compute_source_index<accscalar_t>(
 98 |                  scaleV, ov, align_corners);
 99 |             const int v1 = v1r;
100 |             const int v1p = (v1 < orgV - 1) ? 1 : 0;
101 |             const accscalar_t v1lambda = v1r - v1;
102 |             const accscalar_t v0lambda = static_cast<accscalar_t>(1) - v1lambda;
103 | 
104 | 
105 |             const accscalar_t u1r = area_pixel_compute_source_index<accscalar_t>(
106 |                 scaleU, ou, align_corners);
107 |             const int u1 = u1r;
108 |             const int u1p = (u1 < orgU - 1) ? 1 : 0;
109 |             const accscalar_t u1lambda = u1r - u1;
110 |             const accscalar_t u0lambda = static_cast<accscalar_t>(1) - u1lambda;
111 | 
112 |             accscalar_t h0w0,h0w1,h1w0,h1w1;
113 | 
114 |             bottom_h =  h1 + alpha * (ov + v_offset);
115 |             bottom_w =  w1 + alpha * (ou + u_offset);
116 |             h0w0 = v0lambda * u0lambda * static_cast<accscalar_t>(get_val(bottom_data,n,v1,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val))+
117 |                    v0lambda * u1lambda * static_cast<accscalar_t>(get_val(bottom_data,n,v1,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) +
118 |                    v1lambda * u0lambda * static_cast<accscalar_t>(get_val(bottom_data,n,v1+v1p,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) +
119 |                    v1lambda * u1lambda * static_cast<accscalar_t>(get_val(bottom_data,n,v1+v1p,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val));
120 | 
121 |             bottom_h =  h1 + alpha * (ov + v_offset);
122 |             bottom_w =  w1 + w1p + alpha * (ou + u_offset);
123 |             h0w1 = v0lambda * u0lambda * static_cast<accscalar_t>(get_val(bottom_data,n,v1,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val))+
124 |                    v0lambda * u1lambda * static_cast<accscalar_t>(get_val(bottom_data,n,v1,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) +
125 |                    v1lambda * u0lambda * static_cast<accscalar_t>(get_val(bottom_data,n,v1+v1p,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) +
126 |                    v1lambda * u1lambda * static_cast<accscalar_t>(get_val(bottom_data,n,v1+v1p,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val));
127 | 
128 |             bottom_h =  h1 + h1p + alpha * (ov + v_offset);
129 |             bottom_w =  w1  + alpha * (ou + u_offset);
130 |             h1w0 = v0lambda * u0lambda * static_cast<accscalar_t>(get_val(bottom_data,n,v1,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val))+
131 |                    v0lambda * u1lambda * static_cast<accscalar_t>(get_val(bottom_data,n,v1,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) +
132 |                    v1lambda * u0lambda * static_cast<accscalar_t>(get_val(bottom_data,n,v1+v1p,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) +
133 |                    v1lambda * u1lambda * static_cast<accscalar_t>(get_val(bottom_data,n,v1+v1p,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val));
134 | 
135 |             bottom_h =  h1  + h1p  + alpha * (ov + v_offset);
136 |             bottom_w =  w1  + w1p  + alpha * (ou + u_offset);
137 |             h1w1 = v0lambda * u0lambda * static_cast<accscalar_t>(get_val(bottom_data,n,v1,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val))+
138 |                    v0lambda * u1lambda * static_cast<accscalar_t>(get_val(bottom_data,n,v1,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) +
139 |                    v1lambda * u0lambda * static_cast<accscalar_t>(get_val(bottom_data,n,v1+v1p,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) +
140 |                    v1lambda * u1lambda * static_cast<accscalar_t>(get_val(bottom_data,n,v1+v1p,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val));
141 | 
142 |             const accscalar_t val = h0lambda * w0lambda * h0w0 +
143 |                                     h0lambda * w1lambda * h0w1 +
144 |                                     h1lambda * w0lambda * h1w0 +
145 |                                     h1lambda * w1lambda * h1w1 ;
146 | 
147 |             top_data[index] = static_cast<scalar_t>(val);
148 |         }
149 | 
150 |     }
151 | }
152 | 
153 | 
154 | template <typename scalar_t,typename accscalar_t>
155 | __global__ void SwapAlign2NatBackward(const int nthreads, const scalar_t *bottom_data,scalar_t *top_data,
156 |                                 const accscalar_t scaleV,const accscalar_t scaleU,
157 |                                 const int newV,const int newU,
158 |                                 const int newH,const int newW,
159 |                                 const accscalar_t scaleH,const accscalar_t scaleW,
160 |                                 const int orgV,const int orgU,
161 |                                 const int orgH,const int orgW,
162 |                                 const int alpha,const bool align_corners
163 |                                 ) {
164 |     const int v_offset =  -newV/2;
165 |     const int u_offset =  -newU/2;
166 |     int n,ov,ou,oh,ow,top_h,top_w;
167 |     size_t top_offset ;
168 |     CUDA_1D_KERNEL_LOOP(index,nthreads){
169 |         // (n, ov, ou, oh, ow) is an element in the bottom_data
170 |         ow = index % newW;
171 |         oh = (index / newW) % newH;
172 |         ou = (index / newW / newH) % newU;
173 |         ov = (index / newW / newH / newU) % newV;
174 |         n  =  index / newW / newH / newU / newV;
175 |         if (newV==orgV && newU==orgU && newW==orgW && newH==orgH){
176 |             top_h =  oh + alpha * (ov + v_offset);
177 |             top_w =  ow + alpha * (ou + u_offset);
178 |             if (!(top_w <0 || top_w >= orgW || top_h < 0 || top_h >= orgH))
179 |             {
180 |                 top_offset =  loaction(n,ov,ou,top_h,top_w,newV,newU,newH,newW);
181 |                 top_data[top_offset] = bottom_data[index];
182 |             }
183 |         } else {
184 |             //  h,w
185 | 
186 |             const accscalar_t h1r = area_pixel_compute_source_index<accscalar_t>(
187 |                 scaleH, oh, align_corners);
188 |             const int h1 = h1r;
189 |             const int h1p = (h1 < orgH - 1) ? 1 : 0;
190 |             const accscalar_t h1lambda = h1r - h1;
191 |             const accscalar_t h0lambda = static_cast<accscalar_t>(1) - h1lambda;
192 |             //
193 |             const accscalar_t w1r = area_pixel_compute_source_index<accscalar_t>(
194 |                 scaleW, ow, align_corners);
195 |             const int w1 = w1r;
196 |             const int w1p = (w1 < orgW - 1) ? 1 : 0;
197 |             const accscalar_t w1lambda = w1r - w1;
198 |             const accscalar_t w0lambda = static_cast<accscalar_t>(1) - w1lambda;
199 | 
200 | 
201 |             // v,u
202 | 
203 |             const accscalar_t v1r = area_pixel_compute_source_index<accscalar_t>(
204 |                  scaleV, ov, align_corners);
205 |             const int v1 = v1r;
206 |             const int v1p = (v1 < orgV - 1) ? 1 : 0;
207 |             const accscalar_t v1lambda = v1r - v1;
208 |             const accscalar_t v0lambda = static_cast<accscalar_t>(1) - v1lambda;
209 | 
210 | 
211 |             const accscalar_t u1r = area_pixel_compute_source_index<accscalar_t>(
212 |                 scaleU, ou, align_corners);
213 |             const int u1 = u1r;
214 |             const int u1p = (u1 < orgU - 1) ? 1 : 0;
215 |             const accscalar_t u1lambda = u1r - u1;
216 |             const accscalar_t u0lambda = static_cast<accscalar_t>(1) - u1lambda;
217 | 
218 |             const accscalar_t d2val = static_cast<accscalar_t>(bottom_data[index]);
219 | 
220 |             top_h =  h1 + alpha * (ov + v_offset);
221 |             top_w =  w1 + alpha * (ou + u_offset);
222 |             if (!(top_w <0 || top_w >= orgW || top_h < 0 || top_h >= orgH) ){
223 |                top_offset = loaction(n,v1,u1,top_h,top_w,orgV,orgU,orgH,orgW);
224 |                atomicAdd(top_data + top_offset, static_cast<scalar_t>(h0lambda * w0lambda * v0lambda * u0lambda * d2val));
225 |                top_offset = loaction(n,v1,u1 + u1p,top_h,top_w,orgV,orgU,orgH,orgW);
226 |                atomicAdd(top_data + top_offset, static_cast<scalar_t>(h0lambda * w0lambda * v0lambda * u1lambda * d2val));
227 |                top_offset = loaction(n,v1+v1p,u1,top_h,top_w,orgV,orgU,orgH,orgW);
228 |                atomicAdd(top_data + top_offset, static_cast<scalar_t>(h0lambda * w0lambda * v1lambda * u0lambda * d2val));
229 |                top_offset = loaction(n,v1+v1p,u1+u1p,top_h,top_w,orgV,orgU,orgH,orgW);
230 |                atomicAdd(top_data + top_offset, static_cast<scalar_t>(h0lambda * w0lambda * v1lambda * u1lambda * d2val));
231 |             }
232 | 
233 |             top_h =  h1 + alpha * (ov + v_offset);
234 |             top_w =  w1 + w1p + alpha * (ou + u_offset);
235 |             if (!(top_w <0 || top_w >= orgW || top_h < 0 || top_h >= orgH) ){
236 |                top_offset = loaction(n,v1,u1,top_h,top_w,orgV,orgU,orgH,orgW);
237 |                atomicAdd(top_data + top_offset, static_cast<scalar_t>(h0lambda * w1lambda * v0lambda * u0lambda * d2val));
238 |                top_offset = loaction(n,v1,u1 + u1p,top_h,top_w,orgV,orgU,orgH,orgW);
239 |                atomicAdd(top_data + top_offset, static_cast<scalar_t>(h0lambda * w1lambda * v0lambda * u1lambda * d2val));
240 |                top_offset = loaction(n,v1+v1p,u1,top_h,top_w,orgV,orgU,orgH,orgW);
241 |                atomicAdd(top_data + top_offset, static_cast<scalar_t>(h0lambda * w1lambda * v1lambda * u0lambda * d2val));
242 |                top_offset = loaction(n,v1+v1p,u1+u1p,top_h,top_w,orgV,orgU,orgH,orgW);
243 |                atomicAdd(top_data + top_offset, static_cast<scalar_t>(h0lambda * w1lambda * v1lambda * u1lambda * d2val));
244 |             }
245 | 
246 |             top_h =  h1 + h1p + alpha * (ov + v_offset);
247 |             top_w =  w1  + alpha * (ou + u_offset);
248 |             if (!(top_w <0 || top_w >= orgW || top_h < 0 || top_h >= orgH) ){
249 |                top_offset = loaction(n,v1,u1,top_h,top_w,orgV,orgU,orgH,orgW);
250 |                atomicAdd(top_data + top_offset, static_cast<scalar_t>(h1lambda * w0lambda * v0lambda * u0lambda * d2val));
251 |                top_offset = loaction(n,v1,u1 + u1p,top_h,top_w,orgV,orgU,orgH,orgW);
252 |                atomicAdd(top_data + top_offset, static_cast<scalar_t>(h1lambda * w0lambda * v0lambda * u1lambda * d2val));
253 |                top_offset = loaction(n,v1+v1p,u1,top_h,top_w,orgV,orgU,orgH,orgW);
254 |                atomicAdd(top_data + top_offset, static_cast<scalar_t>(h1lambda * w0lambda * v1lambda * u0lambda * d2val));
255 |                top_offset = loaction(n,v1+v1p,u1+u1p,top_h,top_w,orgV,orgU,orgH,orgW);
256 |                atomicAdd(top_data + top_offset, static_cast<scalar_t>(h1lambda * w0lambda * v1lambda * u1lambda * d2val));
257 |             }
258 | 
259 |             top_h =  h1  + h1p  + alpha * (ov + v_offset);
260 |             top_w =  w1  + w1p  + alpha * (ou + u_offset);
261 |             if (!(top_w <0 || top_w >= orgW || top_h < 0 || top_h >= orgH) ){
262 |                top_offset = loaction(n,v1,u1,top_h,top_w,orgV,orgU,orgH,orgW);
263 |                atomicAdd(top_data + top_offset, static_cast<scalar_t>(h1lambda * w1lambda * v0lambda * u0lambda * d2val));
264 |                top_offset = loaction(n,v1,u1 + u1p,top_h,top_w,orgV,orgU,orgH,orgW);
265 |                atomicAdd(top_data + top_offset, static_cast<scalar_t>(h1lambda * w1lambda * v0lambda * u1lambda * d2val));
266 |                top_offset = loaction(n,v1+v1p,u1,top_h,top_w,orgV,orgU,orgH,orgW);
267 |                atomicAdd(top_data + top_offset, static_cast<scalar_t>(h1lambda * w1lambda * v1lambda * u0lambda * d2val));
268 |                top_offset = loaction(n,v1+v1p,u1+u1p,top_h,top_w,orgV,orgU,orgH,orgW);
269 |                atomicAdd(top_data + top_offset, static_cast<scalar_t>(h1lambda * w1lambda * v1lambda * u1lambda * d2val));
270 |             }
271 |         }
272 | 
273 |     }
274 | }
275 | 
276 | 
277 | template <typename scalar_t>
278 | static inline scalar_t area_pixel_compute_scale(
279 |     int64_t input_size,
280 |     int64_t output_size,
281 |     bool align_corners) {
282 | 
283 |   if (output_size > 1) {
284 |     return align_corners
285 |         ? static_cast<scalar_t>(input_size - 1) / (output_size - 1)
286 |         : static_cast<scalar_t>(input_size) / output_size;
287 |   } else {
288 |     return scalar_t(0);
289 |   }
290 | }
291 | int SwapAlign2NatForwardLaucher(const at::Tensor& input,at::Tensor& output,
292 |                            const int alpha,const bool align_corners,const float pad_val){
293 |   const int B = output.size(0);
294 |   const int newV = output.size(1);
295 |   const int newU = output.size(2);
296 |   const int newH = output.size(3);
297 |   const int newW = output.size(4);
298 |   const int orgV = input.size(1);
299 |   const int orgU = input.size(2);
300 |   const int orgH = input.size(3);
301 |   const int orgW = input.size(4);
302 |   const int output_size = B*newV*newU*newH*newW;
303 |   AT_DISPATCH_FLOATING_TYPES_AND_HALF(
304 |       input.type(), "SwapAlign2NatForwardLaucher", ([&] {
305 |         const scalar_t *bottom_data = input.data<scalar_t>();
306 |         scalar_t *top_data = output.data<scalar_t>();
307 |         using accscalar_t = at::acc_type<scalar_t, true>;
308 |         const accscalar_t rV = area_pixel_compute_scale<accscalar_t>(
309 |             orgV, newV, align_corners);
310 |         const accscalar_t rU = area_pixel_compute_scale<accscalar_t>(
311 |             orgU, newV, align_corners);
312 |         const accscalar_t rH = area_pixel_compute_scale<accscalar_t>(
313 |             orgH, newH, align_corners);
314 |         const accscalar_t rW = area_pixel_compute_scale<accscalar_t>(
315 |             orgW, newW, align_corners);
316 |         SwapAlign2NatForward<scalar_t,accscalar_t>
317 |             <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(output_size,bottom_data,top_data,
318 |                             rV,rU,newV,newU,newH,newW,
319 |                             rH,rW,orgV,orgU,orgH,orgW,
320 |                             alpha,align_corners,static_cast<scalar_t>(pad_val)
321 |                             );
322 |       }));
323 |   THCudaCheck(cudaGetLastError());
324 |   return 1;
325 | }
326 | 
327 | 
328 | int SwapAlign2NatBackwardLaucher(const at::Tensor& grad_output,at::Tensor& grad_input,
329 |                            const int alpha,const bool align_corners){
330 |   int B = grad_output.size(0);
331 |   int newV = grad_output.size(1);
332 |   int newU = grad_output.size(2);
333 |   int newH = grad_output.size(3);
334 |   int newW = grad_output.size(4);
335 |   int orgV = grad_input.size(1);
336 |   int orgU = grad_input.size(2);
337 |   int orgH = grad_input.size(3);
338 |   int orgW = grad_input.size(4);
339 |   const int output_size = B*newV*newU*newH*newW;
340 |   AT_DISPATCH_FLOATING_TYPES_AND_HALF(
341 |       grad_output.type(), "SwapAlign2NatBackwardLaucher", ([&] {
342 |         const scalar_t *bottom_data = grad_output.data<scalar_t>();
343 |         scalar_t *top_data = grad_input.data<scalar_t>();
344 |         using accscalar_t = at::acc_type<scalar_t, true>;
345 |         const accscalar_t rV = area_pixel_compute_scale<accscalar_t>(
346 |             orgV, newV, align_corners);
347 |         const accscalar_t rU = area_pixel_compute_scale<accscalar_t>(
348 |             orgU, newV, align_corners);
349 |         const accscalar_t rH = area_pixel_compute_scale<accscalar_t>(
350 |             orgH, newH, align_corners);
351 |         const accscalar_t rW = area_pixel_compute_scale<accscalar_t>(
352 |             orgW, newW, align_corners);
353 |         SwapAlign2NatBackward<scalar_t,accscalar_t>
354 |             <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(output_size,bottom_data,top_data,
355 |                             rV,rU,newV,newU,newH,newW,
356 |                             rH,rW,orgV,orgU,orgH,orgW,
357 |                             alpha,align_corners
358 |                             );
359 |       }));
360 |   THCudaCheck(cudaGetLastError());
361 |   return 1;
362 | }


--------------------------------------------------------------------------------