├── models
├── ops
│ ├── align2nat
│ │ ├── __init__.py
│ │ ├── functions
│ │ │ ├── __init__.py
│ │ │ ├── __pycache__
│ │ │ │ ├── __init__.cpython-37.pyc
│ │ │ │ └── swap_align2nat.cpython-37.pyc
│ │ │ └── swap_align2nat.py
│ │ ├── modules
│ │ │ ├── __init__.py
│ │ │ ├── __pycache__
│ │ │ │ ├── __init__.cpython-37.pyc
│ │ │ │ └── roi_align.cpython-35.pyc
│ │ │ └── swap_align2nat.py
│ │ ├── setup.py
│ │ └── src
│ │ │ ├── swap_align2nat_cuda.cpp
│ │ │ └── swap_align2nat_kernel.cu
│ └── sigmoid_focal_loss
│ │ ├── functions
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-37.pyc
│ │ │ └── sigmoid_focal_loss.cpython-37.pyc
│ │ └── sigmoid_focal_loss.py
│ │ ├── modules
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-37.pyc
│ │ │ └── sigmoid_focal_loss.cpython-37.pyc
│ │ └── sigmoid_focal_loss.py
│ │ ├── __init__.py
│ │ ├── setup.py
│ │ └── src
│ │ ├── sigmoid_focal_loss.cpp
│ │ └── sigmoid_focal_loss_cuda.cu
├── tensormask.py
├── losses.py
├── res_fpn.py
└── detector.py
├── img
├── test.png
└── test_2.png
├── README.md
├── config.py
├── demo.py
├── eval.py
├── exp
└── coco_person
│ ├── logs_2019-12-07-23-55
│ ├── log.txt
│ └── opt.txt
│ └── opt.txt
├── train.py
├── show_pred_window.py
└── lib
├── trainer.py
├── optimer.py
├── coco.py
└── utils.py
/models/ops/align2nat/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/models/ops/align2nat/functions/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/models/ops/align2nat/modules/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/functions/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/modules/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/img/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/img/test.png
--------------------------------------------------------------------------------
/img/test_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/img/test_2.png
--------------------------------------------------------------------------------
/models/ops/align2nat/modules/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/align2nat/modules/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/models/ops/align2nat/modules/__pycache__/roi_align.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/align2nat/modules/__pycache__/roi_align.cpython-35.pyc
--------------------------------------------------------------------------------
/models/ops/align2nat/functions/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/align2nat/functions/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/__init__.py:
--------------------------------------------------------------------------------
1 | from .modules.sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss
2 |
3 | __all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss']
4 |
--------------------------------------------------------------------------------
/models/ops/align2nat/functions/__pycache__/swap_align2nat.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/align2nat/functions/__pycache__/swap_align2nat.cpython-37.pyc
--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/functions/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/sigmoid_focal_loss/functions/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/modules/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/sigmoid_focal_loss/modules/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/functions/__pycache__/sigmoid_focal_loss.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/sigmoid_focal_loss/functions/__pycache__/sigmoid_focal_loss.cpython-37.pyc
--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/modules/__pycache__/sigmoid_focal_loss.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaoWGG/TensorMask/HEAD/models/ops/sigmoid_focal_loss/modules/__pycache__/sigmoid_focal_loss.cpython-37.pyc
--------------------------------------------------------------------------------
/models/ops/align2nat/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
3 |
4 | setup(
5 | name='align2nat_cuda',
6 | ext_modules=[
7 | CUDAExtension('swap_align2nat_cuda', [
8 | 'src/swap_align2nat_cuda.cpp',
9 | 'src/swap_align2nat_kernel.cu',
10 | ]),
11 | ],
12 | cmdclass={'build_ext': BuildExtension})
13 |
--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
3 |
4 | setup(
5 | name='SigmoidFocalLoss',
6 | ext_modules=[
7 | CUDAExtension('sigmoid_focal_loss_cuda', [
8 | 'src/sigmoid_focal_loss.cpp',
9 | 'src/sigmoid_focal_loss_cuda.cu',
10 | ]),
11 | ],
12 | cmdclass={'build_ext': BuildExtension})
13 |
--------------------------------------------------------------------------------
/models/ops/align2nat/modules/swap_align2nat.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from ..functions.swap_align2nat import SwapAlign2NatFunction
3 |
4 |
5 | class SwapAlign2Nat(Module):
6 | def __init__(self, alpha=1 ,lamda = 1, pad_val = -9.0 ,align_corners=True ):
7 | super(SwapAlign2Nat, self).__init__()
8 | self.alpha = alpha
9 | self.lamda = lamda
10 | self.align_corners = align_corners
11 | self.pad_val = pad_val
12 |
13 | def forward(self, features):
14 | return SwapAlign2NatFunction.apply(features , self.alpha,self.lamda,self.pad_val,self.align_corners)
15 |
--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/modules/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 |
3 | from ..functions.sigmoid_focal_loss import sigmoid_focal_loss
4 |
5 |
6 | # TODO: remove this module
7 | class SigmoidFocalLoss(nn.Module):
8 |
9 | def __init__(self, gamma, alpha):
10 | super(SigmoidFocalLoss, self).__init__()
11 | self.gamma = gamma
12 | self.alpha = alpha
13 |
14 | def forward(self, logits, targets):
15 | assert logits.is_cuda
16 | loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha)
17 | return loss.sum()
18 |
19 | def __repr__(self):
20 | tmpstr = self.__class__.__name__ + "("
21 | tmpstr += "gamma=" + str(self.gamma)
22 | tmpstr += ", alpha=" + str(self.alpha)
23 | tmpstr += ")"
24 | return tmpstr
25 |
--------------------------------------------------------------------------------
/models/ops/align2nat/functions/swap_align2nat.py:
--------------------------------------------------------------------------------
1 | from torch.autograd import Function
2 |
3 | from .. import swap_align2nat_cuda
4 |
5 | class SwapAlign2NatFunction(Function):
6 |
7 | @staticmethod
8 | def forward(ctx, features,alpha,lamda,pad_val,align_corners):
9 | ctx.feature_size = features.size()
10 | ctx.alpha = alpha
11 | ctx.lamda = lamda
12 | ctx.align_corners = align_corners
13 | if features.is_cuda:
14 | output=swap_align2nat_cuda.forward(features,alpha,lamda,align_corners,pad_val)
15 | else:
16 | raise NotImplementedError
17 | return output
18 |
19 | @staticmethod
20 | def backward(ctx, grad_output):
21 |
22 | feature_size = ctx.feature_size
23 | alpha = ctx.alpha
24 | lamda = ctx.lamda
25 | align_corners = ctx.align_corners
26 | assert (feature_size is not None and grad_output.is_cuda)
27 | grad_input = swap_align2nat_cuda.backward(grad_output.contiguous(),alpha,lamda,align_corners)
28 | return grad_input,None,None,None,None
29 |
30 | swap_align2nat = SwapAlign2NatFunction.apply
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TensorMask
2 | ### The code is unofficial version for [TensorMask: A Foundation for Dense Object Segmentation](https://arxiv.org/abs/1903.12174).
3 |
4 | * 
5 | * 
6 |
7 | input_size = (640x512), no multi-scale training, no multi-scale test
8 |
9 | |class|AP|AP50|AP75|APS|APM|APL|
10 | |---|---|---|---|---|---|---|
11 | |person box|0.481|0.752|0.503|0.256|0.559|0.704|
12 | |person mask|0.395|0.721|0.392|0.184|0.454|0.614|
13 |
14 | ### Reuirments
15 | 1. python==3.7
16 | 1. pytorch==1.0.0
17 | 2. torchvision==0.4.2
18 | 3. opencv-python,pycocotools,progress,numpy,easydict
19 |
20 | ### Installation
21 | ```bash
22 | git clone https://github.com/CaoWGG/TensorMask.git
23 | cd TensorMask/models/ops/align2nat
24 | python setup.py build_ext --inplace
25 | cd TensorMask/models/ops/sigmoid_focal_loss
26 | python setup.py build_ext --inplace
27 | ```
28 |
29 | ### Training
30 | ```bash
31 | ## dataset coco2017
32 | ## modify config.py : cfg.class_name and cfg.num_class
33 | python train.py
34 | ```
35 |
36 | ### Show result
37 | ```bash
38 | python demo.py
39 | ## show window
40 | python show_pred_window.py
41 | ```
42 |
--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/functions/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
1 | from torch.autograd import Function
2 | from torch.autograd.function import once_differentiable
3 |
4 | from .. import sigmoid_focal_loss_cuda
5 |
6 |
7 | class SigmoidFocalLossFunction(Function):
8 |
9 | @staticmethod
10 | def forward(ctx, input, target, gamma=2.0, alpha=0.25):
11 | ctx.save_for_backward(input, target)
12 | num_classes = input.shape[1]
13 | ctx.num_classes = num_classes
14 | ctx.gamma = gamma
15 | ctx.alpha = alpha
16 |
17 | loss = sigmoid_focal_loss_cuda.forward(input, target, num_classes,
18 | gamma, alpha)
19 | return loss
20 |
21 | @staticmethod
22 | @once_differentiable
23 | def backward(ctx, d_loss):
24 | input, target = ctx.saved_tensors
25 | num_classes = ctx.num_classes
26 | gamma = ctx.gamma
27 | alpha = ctx.alpha
28 | d_loss = d_loss.contiguous()
29 | d_input = sigmoid_focal_loss_cuda.backward(input, target, d_loss,
30 | num_classes, gamma, alpha)
31 | return d_input, None, None, None, None
32 |
33 |
34 | sigmoid_focal_loss = SigmoidFocalLossFunction.apply
35 |
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | from easydict import EasyDict
2 |
3 | cfg = EasyDict()
4 |
5 | cfg.backbone = 'resnet50'
6 | cfg.frezeBN = False
7 | cfg.frezeLayer = False
8 | cfg.align_corners = False ## ref torch.nn.functional.interpolate /// when align_corners==False : [Follow Opencv resize logic]
9 | cfg.weights = ''
10 | cfg.resume = True
11 | cfg.device = 'cuda'
12 |
13 | cfg.cls_weights = 1.
14 | cfg.xywh_weights = 1.
15 | cfg.mask_weights = 2. ## from paper
16 |
17 | cfg.data_dir = '/data0/cao/DataSet/coco'
18 | cfg.num_class = 1
19 | cfg.class_name = 'person' ## [person , *]
20 | cfg.input_h = 512 ## 512 % 128 = 0
21 | cfg.input_w = 640 ## 640 % 128 = 0
22 | cfg.base_window = 8 ## base_window%2==0 in this impl..|| max window = 12* base_stride * 2^5 / 2 = 768 > 640
23 | cfg.base_stride = 4 ## feat_2 --> strideHW=4
24 | cfg.k = 5 # 0 1 2 3 4 5
25 | cfg.max_objs = 45
26 | cfg.jitter = 0.3
27 |
28 |
29 | cfg.lr = 0.02 ## from paper
30 | cfg.num_epochs = 72 ## from paper
31 | cfg.lr_step = [64,70] ## from paper
32 | cfg.warm_up = 1000
33 | cfg.batch_size = 6
34 |
35 | cfg.gpus_str = '0,1,2,3'
36 |
37 | cfg.save_dir = 'exp'
38 | cfg.exp_id = 'coco_person_8'
39 | cfg.print_iter = 1
40 | cfg.test = False
41 | cfg.vis_thresh = 0.3
42 | cfg.show_box = True
43 | cfg.demo = ''
44 |
--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cv2
3 | from config import cfg as opt
4 | from models.detector import Detector
5 | os.environ['CUDA_VISIBLE_DEVICES'] = '3'
6 | image_ext = ['jpg', 'jpeg', 'png', 'webp']
7 | video_ext = ['mp4', 'mov', 'avi', 'mkv', 'h264']
8 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge']
9 |
10 | opt.demo = '/data/yoloCao/DataSet/coco/val2017'
11 | opt.weights = 'exp/coco_person/model_last.pth'
12 | opt.vis_trehs = 0.4
13 | detector = Detector(opt)
14 | cv2.namedWindow('result', cv2.WINDOW_NORMAL)
15 | cv2.resizeWindow('result', 1024, 768)
16 | if opt.demo == 'webcam' or \
17 | opt.demo[opt.demo.rfind('.') + 1:].lower() in video_ext:
18 | cam = cv2.VideoCapture(0 if opt.demo == 'webcam' else opt.demo)
19 |
20 | while True:
21 | _, img = cam.read()
22 | ret = detector.run(img)
23 | if cv2.waitKey(1) == 27:
24 | break
25 | else:
26 | if os.path.isdir(opt.demo):
27 | image_names = []
28 | ls = os.listdir(opt.demo)
29 | for file_name in sorted(ls):
30 | ext = file_name[file_name.rfind('.') + 1:].lower()
31 | if ext in image_ext:
32 | image_names.append(os.path.join(opt.demo, file_name))
33 | elif opt.demo.endswith('.txt'):
34 | image_names = []
35 | with open(opt.demo) as f:
36 | lines = f.readlines()
37 | for file_name in sorted(lines):
38 | file_name = file_name.strip()
39 | if file_name.split('.')[-1] in image_ext:
40 | image_names.append(file_name)
41 | else:
42 | image_names = [opt.demo]
43 |
44 | for (image_name) in image_names:
45 | ret = detector.run(image_name)
46 | if cv2.waitKey(0) == 27:
47 | break
48 |
49 |
--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp:
--------------------------------------------------------------------------------
1 | // modify from
2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h
3 | #include
4 |
5 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,
6 | const at::Tensor &targets,
7 | const int num_classes,
8 | const float gamma, const float alpha);
9 |
10 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,
11 | const at::Tensor &targets,
12 | const at::Tensor &d_losses,
13 | const int num_classes,
14 | const float gamma, const float alpha);
15 |
16 | // Interface for Python
17 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits,
18 | const at::Tensor &targets,
19 | const int num_classes, const float gamma,
20 | const float alpha) {
21 | if (logits.type().is_cuda()) {
22 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma,
23 | alpha);
24 | }
25 | }
26 |
27 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits,
28 | const at::Tensor &targets,
29 | const at::Tensor &d_losses,
30 | const int num_classes, const float gamma,
31 | const float alpha) {
32 | if (logits.type().is_cuda()) {
33 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses,
34 | num_classes, gamma, alpha);
35 | }
36 | }
37 |
38 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
39 | m.def("forward", &SigmoidFocalLoss_forward,
40 | "SigmoidFocalLoss forward (CUDA)");
41 | m.def("backward", &SigmoidFocalLoss_backward,
42 | "SigmoidFocalLoss backward (CUDA)");
43 | }
44 |
--------------------------------------------------------------------------------
/models/ops/align2nat/src/swap_align2nat_cuda.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include
4 | #include
5 |
6 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
7 | #define CHECK_CONTIGUOUS(x) \
8 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
9 | #define CHECK_INPUT(x) \
10 | CHECK_CUDA(x); \
11 | CHECK_CONTIGUOUS(x)
12 |
13 | int SwapAlign2NatForwardLaucher(const at::Tensor& input,at::Tensor& output,
14 | const int alpha,const bool align_corners,const float pad_val);
15 | int SwapAlign2NatBackwardLaucher(const at::Tensor& grad_output,at::Tensor& grad_input,
16 | const int alpha,const bool align_corners);
17 |
18 | at::Tensor swap_align2nat_forward_cuda(const at::Tensor& input , const int alpha, const int lamda,const bool align_corners,const float pad_val) {
19 | CHECK_INPUT(input);
20 | int B = input.size(0);
21 | int V = input.size(1);
22 | int U = input.size(2);
23 | int H = input.size(3);
24 | int W = input.size(4);
25 | auto output = torch::zeros_like(input);
26 | output.resize_({B, lamda*V, lamda*U, H/lamda,W/lamda});
27 | output.contiguous();
28 | CHECK_INPUT(output);
29 | SwapAlign2NatForwardLaucher(input,output,alpha,align_corners,pad_val);
30 | return output;
31 | }
32 |
33 | at::Tensor swap_align2nat_backward_cuda(const at::Tensor& grad_output,const int alpha,const int lamda,const bool align_corners) {
34 | CHECK_INPUT(grad_output);
35 | int B = grad_output.size(0);
36 | int V = grad_output.size(1);
37 | int U = grad_output.size(2);
38 | int H = grad_output.size(3);
39 | int W = grad_output.size(4);
40 | auto grad_input = torch::zeros_like(grad_output);
41 | grad_input.resize_({B, V/lamda, U/lamda, H*lamda,W*lamda});
42 | grad_input.contiguous();
43 | CHECK_INPUT(grad_input);
44 | SwapAlign2NatBackwardLaucher(grad_output,grad_input,alpha,align_corners);
45 | return grad_input;
46 | }
47 |
48 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
49 | m.def("forward", &swap_align2nat_forward_cuda, "SwapAlign2Nat forward (CUDA)");
50 | m.def("backward", &swap_align2nat_backward_cuda, "SwapAlign2Nat backward (CUDA)");
51 | }
52 |
--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
1 | import os
2 | from models.detector import Detector
3 | from pycocotools.cocoeval import COCOeval
4 | import pycocotools.coco as coco
5 | import pycocotools.mask as mask_util
6 | import numpy as np
7 | from tqdm import tqdm
8 | from config import cfg as opt
9 | os.environ['CUDA_VISIBLE_DEVICES'] = '3'
10 | valid_ids = [
11 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
12 | 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
13 | 24, 25, 27, 28, 31, 32, 33, 34, 35, 36,
14 | 37, 38, 39, 40, 41, 42, 43, 44, 46, 47,
15 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
16 | 58, 59, 60, 61, 62, 63, 64, 65, 67, 70,
17 | 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
18 | 82, 84, 85, 86, 87, 88, 89, 90]
19 |
20 | ## config recover weights
21 | opt.weights = 'exp/coco_person/model_last.pth'
22 | opt.vis_trehs = 0.01
23 | split = 'val'
24 |
25 | detector = Detector(opt)
26 | data = coco.COCO(os.path.join(
27 | opt.data_dir, 'annotations',
28 | 'instances_{}2017.json').format(split))
29 |
30 | if opt.class_name!='*' : ## for one class
31 | catIds = data.getCatIds(opt.class_name)
32 | imgIds = data.getImgIds(catIds=catIds)
33 | valid_ids = catIds
34 |
35 | detections = []
36 | for img_id in tqdm(data.getImgIds()):
37 | img_name = os.path.join(os.path.join(opt.data_dir, '{}2017'.format(split)),
38 | data.loadImgs(ids=[img_id])[0]['file_name']).strip()
39 | boxs,masks = detector.run(img_name,vis=False)
40 | for i,det in enumerate(boxs):
41 | x, y, x1, y1, conf, cls = det[:6]
42 | detection = {
43 | "image_id": img_id,
44 | "category_id": int(valid_ids[int(cls)]),
45 | 'segmentation':mask_util.encode(np.asfortranarray(masks[i])),
46 | #"bbox": [x, y, x1 - x, y1 - y],
47 | "score": float("{:.2f}".format(conf))
48 | }
49 | detections.append(detection)
50 | coco_dets = data.loadRes(detections)
51 | coco_eval = COCOeval(data, coco_dets, "segm")
52 |
53 | if opt.class_name!='*': ## for one class
54 | coco_eval.params.imgIds = imgIds
55 | coco_eval.params.catIds = catIds
56 |
57 | coco_eval.evaluate()
58 | coco_eval.accumulate()
59 | coco_eval.summarize()
60 |
--------------------------------------------------------------------------------
/exp/coco_person/logs_2019-12-07-23-55/log.txt:
--------------------------------------------------------------------------------
1 | 2019-12-08-01-03: epoch: 1 |loss 0.718341 | cls_loss 0.319530 | diou_loss 0.313822 | mask_loss 0.160178 | time 67.066667 | loss 0.547814 | cls_loss 0.235685 | diou_loss 0.224505 | mask_loss 0.128002 | time 2.916667 |
2 | 2019-12-08-02-12: epoch: 2 |loss 0.500938 | cls_loss 0.221124 | diou_loss 0.216653 | mask_loss 0.112826 | time 66.216667 | loss 0.502653 | cls_loss 0.216273 | diou_loss 0.202636 | mask_loss 0.117861 | time 2.833333 |
3 | 2019-12-08-03-21: epoch: 3 |loss 0.469828 | cls_loss 0.208693 | diou_loss 0.200540 | mask_loss 0.105500 | time 66.383333 | loss 0.477843 | cls_loss 0.207729 | diou_loss 0.195091 | mask_loss 0.110670 | time 2.850000 |
4 | 2019-12-08-04-30: epoch: 4 |loss 0.452117 | cls_loss 0.200818 | diou_loss 0.191730 | mask_loss 0.101684 | time 66.300000 | loss 0.470088 | cls_loss 0.204463 | diou_loss 0.187136 | mask_loss 0.109420 | time 2.866667 |
5 | 2019-12-08-05-39: epoch: 5 |loss 0.442711 | cls_loss 0.196554 | diou_loss 0.186292 | mask_loss 0.099792 | time 66.033333 | loss 0.469164 | cls_loss 0.204524 | diou_loss 0.183815 | mask_loss 0.109343 | time 2.883333 |
6 | 2019-12-08-06-48: epoch: 6 |loss 0.432938 | cls_loss 0.192415 | diou_loss 0.181634 | mask_loss 0.097557 | time 66.116667 | loss 0.455528 | cls_loss 0.197939 | diou_loss 0.179313 | mask_loss 0.106380 | time 2.866667 |
7 | 2019-12-08-07-57: epoch: 7 |loss 0.427711 | cls_loss 0.190262 | diou_loss 0.178843 | mask_loss 0.096369 | time 66.183333 | loss 0.449601 | cls_loss 0.197045 | diou_loss 0.175069 | mask_loss 0.104394 | time 2.833333 |
8 | 2019-12-08-09-06: epoch: 8 |loss 0.421918 | cls_loss 0.187740 | diou_loss 0.176018 | mask_loss 0.095087 | time 66.100000 | loss 0.450542 | cls_loss 0.200763 | diou_loss 0.171751 | mask_loss 0.103420 | time 2.916667 |
9 | 2019-12-08-10-16: epoch: 9 |loss 0.418336 | cls_loss 0.185566 | diou_loss 0.174558 | mask_loss 0.094565 | time 66.550000 | loss 0.450484 | cls_loss 0.201971 | diou_loss 0.172454 | mask_loss 0.102700 | time 2.866667 |
10 | 2019-12-08-11-25: epoch: 10 |loss 0.413934 | cls_loss 0.184827 | diou_loss 0.172182 | mask_loss 0.093031 | time 66.266667 | loss 0.438343 | cls_loss 0.193945 | diou_loss 0.170348 | mask_loss 0.100905 | time 2.950000 |
11 | 2019-12-08-12-35: epoch: 11 |loss 0.412394 | cls_loss 0.183310 | diou_loss 0.170863 | mask_loss 0.093184 | time 66.466667 | loss 0.450937 | cls_loss 0.203606 | diou_loss 0.170497 | mask_loss 0.102353 | time 2.916667 |
12 |
--------------------------------------------------------------------------------
/exp/coco_person/opt.txt:
--------------------------------------------------------------------------------
1 | ==> torch version: 1.0.0
2 | ==> cudnn version: 7401
3 | ==> Cmd:
4 | ['train.py']
5 | ==> Opt:
6 | align_corners: False
7 | backbone: resnet50
8 | base_stride: 4
9 | base_window: 12
10 | batch_size: 18
11 | class_name: person
12 | clear:
13 | cls_weights: 1.0
14 | copy:
15 | data_dir: /data/yoloCao/DataSet/coco
16 | device: cuda
17 | exp_id: coco_person
18 | frezeBN: False
19 | frezeLayer: False
20 | fromkeys:
21 | get:
22 | gpus: [0, 1, 2]
23 | gpus_str: 0,1,2
24 | input_h: 512
25 | input_w: 640
26 | items:
27 | jitter: 0.3
28 | k: 5
29 | keys:
30 | lr: 0.02
31 | lr_step: [64, 70]
32 | mask_weights: 2.0
33 | num_class: 1
34 | num_epochs: 72
35 | pop:
36 | popitem:
37 | print_iter: 1
38 | save_dir: exp/coco_person
39 | setdefault:
40 | test: False
41 | update:
42 | values:
43 | warm_up: 1000
44 | weights:
45 | xywh_weights: 0.25
46 |
--------------------------------------------------------------------------------
/exp/coco_person/logs_2019-12-07-23-55/opt.txt:
--------------------------------------------------------------------------------
1 | ==> torch version: 1.0.0
2 | ==> cudnn version: 7401
3 | ==> Cmd:
4 | ['train.py']
5 | ==> Opt:
6 | align_corners: False
7 | backbone: resnet50
8 | base_stride: 4
9 | base_window: 12
10 | batch_size: 18
11 | class_name: person
12 | clear:
13 | cls_weights: 1.0
14 | copy:
15 | data_dir: /data/yoloCao/DataSet/coco
16 | device: cuda
17 | exp_id: coco_person
18 | frezeBN: False
19 | frezeLayer: False
20 | fromkeys:
21 | get:
22 | gpus: [0, 1, 2]
23 | gpus_str: 0,1,2
24 | input_h: 512
25 | input_w: 640
26 | items:
27 | jitter: 0.3
28 | k: 5
29 | keys:
30 | lr: 0.02
31 | lr_step: [64, 70]
32 | mask_weights: 2.0
33 | num_class: 1
34 | num_epochs: 72
35 | pop:
36 | popitem:
37 | print_iter: 1
38 | save_dir: exp/coco_person
39 | setdefault:
40 | test: False
41 | update:
42 | values:
43 | warm_up: 1000
44 | weights:
45 | xywh_weights: 0.25
46 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | from models.tensormask import TensorMask
2 | from lib.trainer import Trainer
3 | from lib.utils import load_model,save_model,Logger
4 | from lib.coco import COCO
5 | from lib import optimer
6 | from config import cfg as opt
7 | import torch
8 | import os
9 |
10 |
11 | torch.backends.cudnn.benchmark= True ## input size is not fixed
12 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
13 | opt.gpus = [int(i) for i in opt.gpus_str.split(',')]
14 | opt.gpus = list(range(len(opt.gpus)))
15 | opt.batch_size = opt.batch_size * len(opt.gpus)
16 | opt.save_dir = os.path.join(opt.save_dir,opt.exp_id)
17 | logger = Logger(opt)
18 |
19 |
20 | model = TensorMask(backbone=opt.backbone , num_cls=opt.num_class ,
21 | base_window= opt.base_window ,
22 | freezeBN=opt.frezeBN,freezeLayers=opt.frezeLayer,
23 | align_corners= opt.align_corners)
24 |
25 | optimizer = optimer.SGD([{'params':filter(lambda x:len(x.size()) == 4 ,model.parameters()),'weight_decay':0.0001 },
26 | {'params': filter(lambda x:len(x.size()) <4,model.parameters())}],
27 | lr=opt.lr,warm_up=1000,momentum=0.9,nesterov=True)
28 | start_epoch = 0
29 | if opt.weights != '' :
30 | model, optimizer, start_epoch = load_model(
31 | model, opt.weights, optimizer, opt.resume, opt.lr, opt.lr_step)
32 | trainer = Trainer(opt,model,optimizer)
33 | trainer.set_device(opt.gpus,opt.device)
34 |
35 | print('Setting up data...')
36 | val_loader = torch.utils.data.DataLoader(
37 | COCO(cfg=opt, split='val',augment=False),
38 | batch_size=8,
39 | shuffle=False,
40 | num_workers=8,
41 | pin_memory=False
42 | )
43 | train_loader = torch.utils.data.DataLoader(
44 | COCO(cfg=opt, split='train',augment=True),
45 | batch_size=opt.batch_size,
46 | shuffle=True,
47 | num_workers=8,
48 | pin_memory=False
49 | )
50 |
51 | print('Starting training...')
52 | best = 1e10
53 | for epoch in range(start_epoch + 1, opt.num_epochs + 1):
54 | log_dict_train, _ = trainer.train(epoch, train_loader)
55 | logger.write('epoch: {} |'.format(epoch))
56 | for k, v in log_dict_train.items():
57 | logger.scalar_summary('train_{}'.format(k), v, epoch)
58 | logger.write('{} {:8f} | '.format(k, v))
59 | with torch.no_grad():
60 | log_dict_val, preds = trainer.val(epoch, val_loader)
61 | for k, v in log_dict_val.items():
62 | logger.scalar_summary('val_{}'.format(k), v, epoch)
63 | logger.write('{} {:8f} | '.format(k, v))
64 | if log_dict_val['loss'] < best:
65 | best = log_dict_val['loss']
66 | save_model(os.path.join(opt.save_dir, 'model_best.pth'),
67 | epoch, model)
68 | save_model(os.path.join(opt.save_dir, 'model_last.pth'),
69 | epoch, model, optimizer)
70 | logger.write('\n')
71 | if epoch in opt.lr_step:
72 | save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)),
73 | epoch, model, optimizer)
74 | lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1))
75 | print('Drop LR to', lr)
76 | for param_group in optimizer.param_groups:
77 | param_group['lr'] = lr
78 |
--------------------------------------------------------------------------------
/show_pred_window.py:
--------------------------------------------------------------------------------
1 | from models.tensormask import TensorMask
2 | from config import cfg as opt
3 | from lib.utils import load_model,save_model
4 | from lib.coco import COCO
5 | import numpy as np
6 | import torch
7 | import os
8 | import cv2
9 |
10 | os.environ['CUDA_VISIBLE_DEVICES'] = '3'
11 | model = TensorMask(backbone=opt.backbone , num_cls=opt.num_class ,
12 | base_window= opt.base_window ,
13 | freezeBN=opt.frezeBN,freezeLayers=opt.frezeLayer,
14 | align_corners=opt.align_corners)
15 |
16 | opt.test = True
17 | opt.weights = 'exp/coco_person/model_last.pth'
18 | model = load_model(model, opt.weights)
19 | model.eval()
20 | model.cuda()
21 | val_loader = torch.utils.data.DataLoader(
22 | COCO(cfg=opt, split='val',augment=False),
23 | batch_size=1,
24 | shuffle=False,
25 | num_workers=1,
26 | pin_memory=True
27 | )
28 | strides = np.array([opt.base_stride * 2 ** i for i in range(opt.k + 1)])
29 | windows = np.array([opt.base_window * lamda for lamda in strides], np.int32)
30 |
31 | output_size = np.array(list(zip(opt.input_w // strides, opt.input_h // strides)))
32 | num_det = [output_w * output_h for output_w, output_h in output_size]
33 | det_offset = np.cumsum(num_det)
34 | for batch in val_loader:
35 | image= batch['img'].numpy()[0]
36 | input = batch['input'].cuda()
37 | output= model(input)
38 |
39 | socres, cls = torch.max(output['cls'].sigmoid_(), dim=-1)
40 | socres = socres.detach().cpu().numpy()
41 | cls = cls.detach().cpu().numpy()
42 | box= output['box'].detach().cpu().numpy()
43 | seg = [output['%d'%i].sigmoid_().detach().cpu().numpy() for i in range(opt.k+1)]
44 | topk_inds = np.where(socres > 0.4)
45 |
46 | for det_num in topk_inds[1]:
47 | p = socres[0,det_num]
48 | b = box[0,det_num,:]
49 | for id,num in enumerate(det_offset):
50 | if num > det_num:
51 | break
52 | offset = det_num-det_offset[id-1]if id > 0 else det_num
53 | width,hight = output_size[id]
54 |
55 | ### ct_int_feat
56 | y = int(offset/width)
57 | x = int(offset%width)
58 |
59 | window_seg = seg[id][0,y,x,:,:]
60 |
61 | ### ct_int
62 | x ,y = int((x + 0.5) * strides[id]),int((y + 0.5) * strides[id])
63 | ### show box
64 | b[0:2] = x - b[0]*strides[id] ,y - b[1]*strides[id]
65 | b[2:4] = x + b[2]*strides[id] ,y + b[3]*strides[id]
66 | b = b.astype(np.int)
67 | cv2.rectangle(image,(b[0],b[1]),(b[2],b[3]),(255,0,0),2)
68 |
69 |
70 | ### show mask
71 | img_h,img_w = image.shape[:2]
72 | paste_x,paste_y,paste_x1,paste_y1= x - windows[id]//2, y- windows[id]//2,x + windows[id]//2,y + windows[id]//2
73 |
74 | window_x,window_y,window_x1,window_y1 = max(-paste_x,0),max(-paste_y,0), \
75 | windows[id]-max(0,paste_x1-img_w), \
76 | windows[id]-max(0,paste_y1-img_h)
77 |
78 | paste_x, paste_y, paste_x1, paste_y1 = max(paste_x, 0), max(paste_y, 0), min(paste_x1, img_w), min(paste_y1,
79 | img_h)
80 | window_seg = cv2.resize(window_seg,(windows[id],windows[id]))
81 | window_seg = (window_seg>0.5)
82 |
83 | ### paste to img
84 | window_seg_paste = window_seg[window_y:window_y1,window_x:window_x1]
85 | color = np.array([[np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)]])
86 | image[paste_y:paste_y1,paste_x:paste_x1][window_seg_paste] = image[paste_y:paste_y1,paste_x:paste_x1][window_seg_paste]*0.2 + color*0.8
87 |
88 | ### show
89 | cv2.imshow('window',(window_seg).astype(np.uint8)*255)
90 | cv2.imshow('',image)
91 | cv2.waitKey(0)
92 |
--------------------------------------------------------------------------------
/models/tensormask.py:
--------------------------------------------------------------------------------
1 | from models.ops.align2nat.functions.swap_align2nat import swap_align2nat
2 | from models.res_fpn import resnet_fpn_backbone
3 | import torch.nn.functional as F
4 | import torch.nn as nn
5 | import torch
6 | import math
7 |
8 | class Subnet(nn.Module):
9 | def __init__(self, in_channels = 256,mid_channels = 256 ,num_cls = -1):
10 | super(Subnet, self).__init__()
11 |
12 | self.conv = nn.Sequential(nn.Conv2d(in_channels, mid_channels, 3, padding=1),
13 | nn.ReLU(inplace=True),
14 | nn.Conv2d(mid_channels, mid_channels, 3, padding=1),
15 | nn.ReLU(inplace=True),
16 | nn.Conv2d(mid_channels, mid_channels, 3, padding=1),
17 | nn.ReLU(inplace=True),
18 | nn.Conv2d(mid_channels, mid_channels, 3, padding=1),
19 | nn.ReLU(inplace=True))
20 | self.num_cls = num_cls
21 | if num_cls > 0:
22 | self.fc = nn.Conv2d(mid_channels, num_cls, 3, padding=1)
23 |
24 | for m in self.modules():
25 | if isinstance(m, nn.Conv2d):
26 | nn.init.kaiming_uniform_(m.weight, a=1)
27 | nn.init.constant_(m.bias, 0)
28 |
29 | def forward(self, x):
30 | x = self.conv(x)
31 | if self.num_cls > 0:
32 | x = self.fc(x)
33 | x = x.permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, self.num_cls)
34 | return x
35 |
36 | class TensorMask(nn.Module):
37 | def __init__(self,backbone = 'resnet50',num_cls = 80,base_window = 12,
38 | freezeBN = True,freezeLayers = False ,align_corners = True):
39 | super(TensorMask,self).__init__()
40 | self.align_corners = align_corners
41 | self.base_fpn = resnet_fpn_backbone(backbone,pretrained=True,freezeBN=freezeBN,freezeLayers=freezeLayers,align_corners=align_corners)
42 |
43 | self.cls_subnet = Subnet(in_channels=256,mid_channels=256,num_cls = num_cls)
44 |
45 | self.box_subnet = Subnet(in_channels=256,mid_channels=128,num_cls = 4)
46 |
47 | self.mask_subnet = Subnet(in_channels=256,mid_channels=128)
48 |
49 | self.mask_fuse = nn.Sequential(nn.Conv2d(128, 128, 3, padding=1),nn.ReLU(inplace=True))
50 | self.mask_head = nn.Conv2d(128, base_window**2 , kernel_size=1, padding=0)
51 |
52 | self.base_window = base_window
53 |
54 |
55 | nn.init.constant_(self.box_subnet.fc.bias, 1) ### training box start with a little box not a point(its hard).
56 | nn.init.kaiming_uniform_(self.mask_fuse[0].weight, a=1)
57 | nn.init.constant_(self.mask_fuse[0].bias, 0)
58 | nn.init.kaiming_uniform_(self.mask_head.weight, a=1)
59 | nn.init.constant_(self.mask_head.bias, 0)
60 |
61 | nn.init.constant_(self.cls_subnet.fc.bias,-math.log((1-0.01)/0.01))
62 |
63 | def forward(self, x):
64 | x = self.base_fpn(x)
65 | cls_branch = torch.cat([self.cls_subnet(feat) for feat in x],dim = 1)
66 | box_branch = torch.cat([self.box_subnet(feat) for feat in x],dim = 1)
67 | mask_branch = [self.mask_subnet(feat) for feat in x]
68 |
69 | ret = {'cls':cls_branch,'box':box_branch}
70 |
71 | finest_feat = mask_branch[0]
72 | ## tensor bipyamid
73 | for i in range(len(mask_branch)):
74 | x = mask_branch[i]
75 | if i > 0:
76 | x = F.interpolate(x, scale_factor=2**i, mode="bilinear" ,align_corners=self.align_corners )
77 | x = self.mask_fuse(x + finest_feat)
78 | x = self.mask_head(x)
79 | x = x.view(x.size(0), self.base_window, self.base_window, x.size(2), x.size(3))
80 | x = swap_align2nat(x, 1 , 2**i ,-6., self.align_corners)
81 | ret['%d'%i]= x.permute(0, 3, 4, 1 , 2).contiguous()
82 |
83 | return ret
84 |
85 | if __name__ == '__main__':
86 | import os
87 | os.environ.setdefault('CUDA_VISIBLE_DEVICES','1')
88 | import torch
89 | model = TensorMask(num_cls=1,base_window=10)
90 | model.cuda()
91 | input = torch.zeros([1,3,512,512]).cuda()
92 | out = model(input)
93 | pass
94 |
--------------------------------------------------------------------------------
/lib/trainer.py:
--------------------------------------------------------------------------------
1 | import time
2 | import torch
3 | import torch.nn as nn
4 | from .utils import AverageMeter
5 | from progress.bar import Bar
6 | from models.losses import TensorMaskLoss
7 |
8 | class ModleWithLoss(nn.Module):
9 | def __init__(self, model, loss):
10 | super(ModleWithLoss, self).__init__()
11 | self.model = model
12 | self.loss = loss
13 |
14 | def forward(self, batch):
15 | outputs = self.model(batch['input'])
16 | loss, loss_stats = self.loss(outputs, batch)
17 | return loss, loss_stats
18 |
19 |
20 | class Trainer(object):
21 | def __init__(self, opt, model, optimizer=None):
22 | self.opt = opt
23 | self.optimizer = optimizer
24 | self.loss_stats, self.loss = self._get_losses(opt)
25 | self.model_with_loss = ModleWithLoss(model, self.loss)
26 |
27 | def set_device(self, gpus, device):
28 | if len(gpus) > 1:
29 | self.model_with_loss = nn.DataParallel(
30 | self.model_with_loss, device_ids=gpus).to(device)
31 | else:
32 | self.model_with_loss = self.model_with_loss.to(device)
33 |
34 | for state in self.optimizer.state.values():
35 | for k, v in state.items():
36 | if isinstance(v, torch.Tensor):
37 | state[k] = v.to(device=device, non_blocking=True)
38 |
39 | def run_epoch(self, phase, epoch, data_loader):
40 | model_with_loss = self.model_with_loss
41 | if phase == 'train':
42 | model_with_loss.train()
43 | else:
44 | if len(self.opt.gpus) > 1:
45 | model_with_loss = self.model_with_loss.module
46 | model_with_loss.eval()
47 | torch.cuda.empty_cache()
48 |
49 | results = {}
50 | data_time, batch_time = AverageMeter(), AverageMeter()
51 | avg_loss_stats = {l: AverageMeter() for l in self.loss_stats}
52 | num_iters = len(data_loader)
53 | bar = Bar('{}'.format('tensormask'), max=num_iters)
54 | end = time.time()
55 | for iter_id, batch in enumerate(data_loader):
56 | if iter_id >= num_iters:
57 | break
58 | data_time.update(time.time() - end)
59 |
60 | for k in batch:
61 | if k != 'meta':
62 | batch[k] = batch[k].to(device=self.opt.device, non_blocking=True)
63 | loss, loss_stats = model_with_loss(batch)
64 | loss = loss.mean()
65 | if phase == 'train':
66 | self.optimizer.zero_grad()
67 | loss.backward()
68 | self.optimizer.step()
69 | batch_time.update(time.time() - end)
70 | end = time.time()
71 |
72 | Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format(
73 | epoch, iter_id, num_iters, phase=phase,
74 | total=bar.elapsed_td, eta=bar.eta_td)
75 | for l in avg_loss_stats:
76 | avg_loss_stats[l].update(
77 | loss_stats[l].mean().item(), batch['input'].size(0))
78 | Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(l, avg_loss_stats[l].avg)
79 |
80 | Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \
81 | '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time)
82 | if self.opt.print_iter > 0:
83 | if iter_id % self.opt.print_iter == 0:
84 | print('{}| {}'.format('tensormask', Bar.suffix))
85 | else:
86 | bar.next()
87 |
88 | del loss, loss_stats
89 |
90 | bar.finish()
91 | ret = {k: v.avg for k, v in avg_loss_stats.items()}
92 | ret['time'] = bar.elapsed_td.total_seconds() / 60.
93 | return ret, results
94 |
95 |
96 | def _get_losses(self,opt):
97 | loss_stats = ['loss','cls_loss','diou_loss','mask_loss']
98 | loss = TensorMaskLoss(opt)
99 | return loss_stats,loss
100 |
101 | def val(self, epoch, data_loader):
102 | return self.run_epoch('val', epoch, data_loader)
103 |
104 | def train(self, epoch, data_loader):
105 | return self.run_epoch('train', epoch, data_loader)
--------------------------------------------------------------------------------
/models/losses.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from .ops.sigmoid_focal_loss.modules.sigmoid_focal_loss import SigmoidFocalLoss
5 | def _sigmoid(x):
6 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
7 | return y
8 |
9 | def diou(bboxes1, bboxes2):
10 | w1 = bboxes1[..., 2] - bboxes1[..., 0]
11 | h1 = bboxes1[..., 3] - bboxes1[..., 1]
12 | w2 = bboxes2[..., 2] - bboxes2[..., 0]
13 | h2 = bboxes2[..., 3] - bboxes2[..., 1]
14 |
15 | area1 = w1 * h1
16 | area2 = w2 * h2
17 |
18 | center_x1 = (bboxes1[..., 2] + bboxes1[..., 0]) / 2
19 | center_y1 = (bboxes1[..., 3] + bboxes1[..., 1]) / 2
20 | center_x2 = (bboxes2[..., 2] + bboxes2[..., 0]) / 2
21 | center_y2 = (bboxes2[..., 3] + bboxes2[..., 1]) / 2
22 |
23 | inter_max_xy = torch.min(bboxes1[..., 2:],bboxes2[..., 2:])
24 | inter_min_xy = torch.max(bboxes1[..., :2],bboxes2[..., :2])
25 | out_max_xy = torch.max(bboxes1[..., 2:],bboxes2[..., 2:])
26 | out_min_xy = torch.min(bboxes1[..., :2],bboxes2[..., :2])
27 |
28 | inter = torch.clamp((inter_max_xy - inter_min_xy), min=0)
29 | inter_area = inter[..., 0] * inter[..., 1]
30 | inter_diag = (center_x2 - center_x1)**2 + (center_y2 - center_y1)**2
31 | outer = torch.clamp((out_max_xy - out_min_xy), min=0)
32 | outer_diag = (outer[..., 0] ** 2) + (outer[..., 1] ** 2)
33 | union = area1+area2-inter_area
34 | u = (inter_diag) / (outer_diag + 1e-7 )
35 | iou = inter_area / (union + 1e-7)
36 | dious = iou - u
37 | return dious
38 |
39 | def _gather_feat(feat, ind, mask=None):
40 | dim = feat.size(2)
41 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
42 | feat = feat.gather(1, ind)
43 | if mask is not None:
44 | mask = mask.unsqueeze(2).expand_as(feat)
45 | feat = feat[mask]
46 | feat = feat.view(-1, dim)
47 | return feat
48 |
49 | def _tranpose_and_gather_feat(feat, ind):
50 | feat = _gather_feat(feat, ind)
51 | return feat
52 |
53 | class BOXLoss(nn.Module):
54 | def __init__(self):
55 | super(BOXLoss, self).__init__()
56 |
57 | def forward(self, output, mask, ind, target):
58 | mask = mask.float()
59 | pred = _tranpose_and_gather_feat(output, ind)
60 | right_offset,left_offset = torch.split(pred,[2,2],dim=-1)
61 | x1y1x2y2,ct,stride = torch.split(target,[4,2,1],dim=-1)
62 | stride = stride.expand_as(right_offset).float()
63 | predx1y1 = (ct + 0.5 - right_offset)*stride
64 | predx2y2 = (ct + 0.5 + left_offset )*stride
65 | predx1y1x2y2 = torch.cat([predx1y1,predx2y2],dim = -1)
66 | diou_loss = (1. - diou(predx1y1x2y2,x1y1x2y2)) * mask
67 | loss = diou_loss.sum() / ( mask.sum() + 1e-4)
68 |
69 | return loss
70 |
71 | class MaskBCELoss(nn.Module):
72 | def __init__(self):
73 | super(MaskBCELoss, self).__init__()
74 | self.register_buffer('pos_weight',torch.tensor(1.5,dtype=torch.float32))
75 |
76 | def forward(self, output, mask, ind, target):
77 | B,N,window=target.size(0),target.size(1),target.size(-1)
78 | output = output.view(B,-1,window*window)
79 | pred = _tranpose_and_gather_feat(output, ind).view(B,N,window,window)
80 | mask = mask.unsqueeze(2).unsqueeze(2).expand_as(pred).float()
81 | bce_loss =F.binary_cross_entropy_with_logits(pred,target,
82 | pos_weight=self.pos_weight,
83 | reduction='none')
84 | num_smaple = mask.sum()
85 | loss = (bce_loss*mask).sum()
86 | if num_smaple > 0:
87 | loss /= num_smaple
88 | return loss
89 |
90 | class TensorMaskLoss(nn.Module):
91 | def __init__(self,opt):
92 | super(TensorMaskLoss,self).__init__()
93 | self.cls_loss = SigmoidFocalLoss(gamma=3,alpha=0.3)
94 | self.box_loss = BOXLoss()
95 | self.mask_loss = MaskBCELoss()
96 | self.opt = opt
97 |
98 | def forward(self, ouput,batch):
99 | opt = self.opt
100 | mask_loss = 0
101 | num_sample = batch['reg_mask'].sum()
102 | cls_loss = self.cls_loss(ouput['cls'].view([-1,opt.num_class]),batch['cls'].view([-1]))
103 | box_loss = self.box_loss(ouput['box'],batch['reg_mask'],batch['ind'],batch['xywh'])
104 | for i in range(6):
105 | mask_loss += self.mask_loss(ouput['%d'%i],batch['seg_mask_%d'%i],batch['seg_ind_%d'%i],batch['seg_%d'%i])
106 | mask_loss /= 6
107 | if num_sample > 0:
108 | cls_loss /= num_sample
109 | loss = opt.cls_weights * cls_loss + opt.xywh_weights * box_loss + opt.mask_weights * mask_loss
110 | loss_stats = {'loss': loss, 'cls_loss': cls_loss,
111 | 'diou_loss': box_loss, 'mask_loss': mask_loss}
112 | return loss,loss_stats
113 |
--------------------------------------------------------------------------------
/models/res_fpn.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 | import torch
3 | from torch import nn
4 | import torch.nn.functional as F
5 | from torchvision.models import resnet
6 | from collections import OrderedDict
7 |
8 | class FrozenBatchNorm2d(nn.Module):
9 | """
10 | BatchNorm2d where the batch statistics and the affine parameters
11 | are fixed
12 | """
13 | def __init__(self, n):
14 | super(FrozenBatchNorm2d, self).__init__()
15 | self.register_buffer("weight", torch.ones(n))
16 | self.register_buffer("bias", torch.zeros(n))
17 | self.register_buffer("running_mean", torch.zeros(n))
18 | self.register_buffer("running_var", torch.ones(n))
19 |
20 | def forward(self, x):
21 | # move reshapes to the beginning
22 | # to make it fuser-friendly
23 | w = self.weight.reshape(1, -1, 1, 1)
24 | b = self.bias.reshape(1, -1, 1, 1)
25 | rv = self.running_var.reshape(1, -1, 1, 1)
26 | rm = self.running_mean.reshape(1, -1, 1, 1)
27 | scale = w * rv.rsqrt()
28 | bias = b - rm * scale
29 | return x * scale + bias
30 |
31 | class IntermediateLayerGetter(nn.ModuleDict):
32 | def __init__(self, model, return_layers):
33 | if not set(return_layers).issubset([name for name, _ in model.named_children()]):
34 | raise ValueError("return_layers are not present in model")
35 | orig_return_layers = return_layers
36 | return_layers = {str(k): str(v) for k, v in return_layers.items()}
37 | layers = OrderedDict()
38 | for name, module in model.named_children():
39 | layers[name] = module
40 | if name in return_layers:
41 | del return_layers[name]
42 | if not return_layers:
43 | break
44 |
45 | super(IntermediateLayerGetter, self).__init__(layers)
46 | self.return_layers = orig_return_layers
47 |
48 | def forward(self, x):
49 | outs = []
50 | for name, module in self.items():
51 | x = module(x)
52 | if name in self.return_layers:
53 | outs.append(x)
54 |
55 | return tuple(outs)
56 |
57 | class LastLevelP6P7(nn.Module):
58 | """
59 | This module is used in RetinaNet to generate extra layers, P6 and P7.
60 | """
61 | def __init__(self, in_channels, out_channels):
62 | super(LastLevelP6P7, self).__init__()
63 | self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
64 | self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1)
65 | for module in [self.p6, self.p7]:
66 | nn.init.kaiming_uniform_(module.weight, a=1)
67 | nn.init.constant_(module.bias, 0)
68 | self.use_P5 = in_channels == out_channels
69 |
70 | def forward(self, p, c):
71 | p5, c5 = p[-1], c[-1]
72 | x = p5 if self.use_P5 else c5
73 | p6 = self.p6(x)
74 | p7 = self.p7(F.relu(p6))
75 | p.extend([p6, p7])
76 | return p
77 |
78 | class FeaturePyramidNetwork(nn.Module):
79 |
80 | def __init__(self, in_channels_list, out_channels, extra_blocks=None ,align_corners=True):
81 | super(FeaturePyramidNetwork, self).__init__()
82 | self.align_corners = align_corners
83 | self.inner_blocks = nn.ModuleList()
84 | self.layer_blocks = nn.ModuleList()
85 | for in_channels in in_channels_list:
86 | if in_channels == 0:
87 | continue
88 | inner_block_module = nn.Conv2d(in_channels, out_channels, 1)
89 | layer_block_module = nn.Conv2d(out_channels, out_channels, 3, padding=1)
90 | self.inner_blocks.append(inner_block_module)
91 | self.layer_blocks.append(layer_block_module)
92 |
93 | # initialize parameters now to avoid modifying the initialization of top_blocks
94 | for m in self.children():
95 | if isinstance(m, nn.Conv2d):
96 | nn.init.kaiming_uniform_(m.weight, a=1)
97 | nn.init.constant_(m.bias, 0)
98 |
99 | self.extra_blocks = extra_blocks
100 |
101 | def forward(self, x):
102 | last_inner = self.inner_blocks[-1](x[-1])
103 | results = []
104 | results.append(self.layer_blocks[-1](last_inner))
105 | for feature, inner_block, layer_block in zip(
106 | x[:-1][::-1], self.inner_blocks[:-1][::-1], self.layer_blocks[:-1][::-1]
107 | ):
108 | if not inner_block:
109 | continue
110 | inner_lateral = inner_block(feature)
111 | feat_shape = inner_lateral.shape[-2:]
112 | inner_top_down = F.interpolate(last_inner, size=feat_shape, mode="bilinear",align_corners = self.align_corners)
113 | last_inner = inner_lateral + inner_top_down
114 | results.insert(0, layer_block(last_inner))
115 |
116 | if self.extra_blocks is not None:
117 | results = self.extra_blocks(results, x)
118 |
119 | return tuple(results)
120 |
121 | class BackboneWithFPN(nn.Module):
122 |
123 | def __init__(self, backbone, return_layers, in_channels_list, out_channels ,align_corners):
124 | super(BackboneWithFPN, self).__init__()
125 | self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)
126 | self.fpn = FeaturePyramidNetwork(
127 | in_channels_list=in_channels_list,
128 | out_channels=out_channels,
129 | extra_blocks=LastLevelP6P7(in_channels_list[-1],out_channels),
130 | align_corners=align_corners
131 | )
132 | self.out_channels = out_channels
133 |
134 | def forward(self, x):
135 | x = self.body(x)
136 | x = self.fpn(x)
137 | return x
138 |
139 |
140 | def resnet_fpn_backbone(backbone_name, pretrained,freezeBN = False , freezeLayers = False , align_corners = True ):
141 | backbone = resnet.__dict__[backbone_name](
142 | pretrained=pretrained,
143 | norm_layer=FrozenBatchNorm2d if freezeBN else None)
144 | # freeze layers
145 | if freezeLayers:
146 | for name, parameter in backbone.named_parameters():
147 | print(name)
148 | if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
149 | parameter.requires_grad_(False)
150 |
151 | return_layers = {'layer1': 'p2', 'layer2': 'p3', 'layer3': 'p4', 'layer4': 'p5'}
152 |
153 | in_channels_stage2 = backbone.inplanes // 8
154 | in_channels_list = [
155 | in_channels_stage2,
156 | in_channels_stage2 * 2,
157 | in_channels_stage2 * 4,
158 | in_channels_stage2 * 8,
159 | ]
160 | out_channels = 256
161 | return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels , align_corners)
162 |
163 | if __name__ == '__main__':
164 | input = torch.ones([1,3,512,512])
165 | model = resnet_fpn_backbone('resnet50',False)
166 | out = model(input)
167 | pass
--------------------------------------------------------------------------------
/models/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_cuda.cu:
--------------------------------------------------------------------------------
1 | // modify from
2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu
3 |
4 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
5 | // This file is modified from
6 | // https://github.com/pytorch/pytorch/blob/master/modules/detectron/sigmoid_focal_loss_op.cu
7 | // Cheng-Yang Fu
8 | // cyfu@cs.unc.edu
9 | #include
10 | #include
11 |
12 | #include
13 | #include
14 | #include
15 |
16 | #include
17 |
18 | // TODO make it in a common file
19 | #define CUDA_1D_KERNEL_LOOP(i, n) \
20 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
21 | i += blockDim.x * gridDim.x)
22 |
23 | template
24 | __global__ void SigmoidFocalLossForward(const int nthreads,
25 | const scalar_t *logits,
26 | const long *targets,
27 | const int num_classes,
28 | const float gamma, const float alpha,
29 | const int num, scalar_t *losses) {
30 | CUDA_1D_KERNEL_LOOP(i, nthreads) {
31 | int n = i / num_classes;
32 | int d = i % num_classes; // current class[0~79];
33 | int t = targets[n]; // target class [1~80];
34 |
35 | // Decide it is positive or negative case.
36 | scalar_t c1 = (t == (d + 1));
37 | scalar_t c2 = (t >= 0 & t != (d + 1));
38 |
39 | scalar_t zn = (1.0 - alpha);
40 | scalar_t zp = (alpha);
41 |
42 | // p = 1. / 1. + expf(-x); p = sigmoid(x)
43 | scalar_t p = 1. / (1. + expf(-logits[i]));
44 |
45 | // (1-p)**gamma * log(p) where
46 | scalar_t term1 = powf((1. - p), gamma) * logf(max(p, FLT_MIN));
47 |
48 | // p**gamma * log(1-p)
49 | scalar_t term2 =
50 | powf(p, gamma) *
51 | (-1. * logits[i] * (logits[i] >= 0) -
52 | logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0))));
53 |
54 | losses[i] = 0.0;
55 | losses[i] += -c1 * term1 * zp;
56 | losses[i] += -c2 * term2 * zn;
57 |
58 | } // CUDA_1D_KERNEL_LOOP
59 | } // SigmoidFocalLossForward
60 |
61 | template
62 | __global__ void SigmoidFocalLossBackward(
63 | const int nthreads, const scalar_t *logits, const long *targets,
64 | const scalar_t *d_losses, const int num_classes, const float gamma,
65 | const float alpha, const int num, scalar_t *d_logits) {
66 | CUDA_1D_KERNEL_LOOP(i, nthreads) {
67 | int n = i / num_classes;
68 | int d = i % num_classes; // current class[0~79];
69 | int t = targets[n]; // target class [1~80], 0 is background;
70 |
71 | // Decide it is positive or negative case.
72 | scalar_t c1 = (t == (d + 1));
73 | scalar_t c2 = (t >= 0 & t != (d + 1));
74 |
75 | scalar_t zn = (1.0 - alpha);
76 | scalar_t zp = (alpha);
77 | // p = 1. / 1. + expf(-x); p = sigmoid(x)
78 | scalar_t p = 1. / (1. + expf(-logits[i]));
79 |
80 | // (1-p)**g * (1 - p - g*p*log(p)
81 | scalar_t term1 =
82 | powf((1. - p), gamma) * (1. - p - (p * gamma * logf(max(p, FLT_MIN))));
83 |
84 | // (p**g) * (g*(1-p)*log(1-p) - p)
85 | scalar_t term2 =
86 | powf(p, gamma) *
87 | ((-1. * logits[i] * (logits[i] >= 0) -
88 | logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))) *
89 | (1. - p) * gamma -
90 | p);
91 | d_logits[i] = 0.0;
92 | d_logits[i] += -c1 * term1 * zp;
93 | d_logits[i] += -c2 * term2 * zn;
94 | d_logits[i] = d_logits[i] * d_losses[i];
95 |
96 | } // CUDA_1D_KERNEL_LOOP
97 | } // SigmoidFocalLossBackward
98 |
99 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,
100 | const at::Tensor &targets,
101 | const int num_classes,
102 | const float gamma, const float alpha) {
103 | AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor");
104 | AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor");
105 | AT_ASSERTM(logits.dim() == 2, "logits should be NxClass");
106 |
107 | const int num_samples = logits.size(0);
108 |
109 | auto losses = at::empty({num_samples, logits.size(1)}, logits.options());
110 | auto losses_size = num_samples * logits.size(1);
111 |
112 | dim3 grid(std::min(THCCeilDiv(losses_size, 512L), 4096L));
113 | dim3 block(512);
114 |
115 | if (losses.numel() == 0) {
116 | THCudaCheck(cudaGetLastError());
117 | return losses;
118 | }
119 |
120 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(
121 | logits.type(), "SigmoidFocalLoss_forward", [&] {
122 | SigmoidFocalLossForward<<>>(
123 | losses_size, logits.contiguous().data(),
124 | targets.contiguous().data(), num_classes, gamma, alpha,
125 | num_samples, losses.data());
126 | });
127 | THCudaCheck(cudaGetLastError());
128 | return losses;
129 | }
130 |
131 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,
132 | const at::Tensor &targets,
133 | const at::Tensor &d_losses,
134 | const int num_classes,
135 | const float gamma,
136 | const float alpha) {
137 | AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor");
138 | AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor");
139 | AT_ASSERTM(d_losses.type().is_cuda(), "d_losses must be a CUDA tensor");
140 |
141 | AT_ASSERTM(logits.dim() == 2, "logits should be NxClass");
142 |
143 | const int num_samples = logits.size(0);
144 | AT_ASSERTM(logits.size(1) == num_classes,
145 | "logits.size(1) should be num_classes");
146 |
147 | auto d_logits = at::zeros({num_samples, num_classes}, logits.options());
148 | auto d_logits_size = num_samples * logits.size(1);
149 |
150 | dim3 grid(std::min(THCCeilDiv(d_logits_size, 512L), 4096L));
151 | dim3 block(512);
152 |
153 | if (d_logits.numel() == 0) {
154 | THCudaCheck(cudaGetLastError());
155 | return d_logits;
156 | }
157 |
158 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(
159 | logits.type(), "SigmoidFocalLoss_backward", [&] {
160 | SigmoidFocalLossBackward<<>>(
161 | d_logits_size, logits.contiguous().data(),
162 | targets.contiguous().data(),
163 | d_losses.contiguous().data(), num_classes, gamma, alpha,
164 | num_samples, d_logits.data());
165 | });
166 |
167 | THCudaCheck(cudaGetLastError());
168 | return d_logits;
169 | }
170 |
--------------------------------------------------------------------------------
/lib/optimer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.optim.optimizer import Optimizer, required
3 | import math
4 |
5 |
6 | class SGD(Optimizer):
7 |
8 | def __init__(self, params, lr=required, momentum=0, dampening=0,
9 | weight_decay=0, nesterov=False ,warm_up = 1000 ):
10 | if lr is not required and lr < 0.0:
11 | raise ValueError("Invalid learning rate: {}".format(lr))
12 | if momentum < 0.0:
13 | raise ValueError("Invalid momentum value: {}".format(momentum))
14 | if weight_decay < 0.0:
15 | raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
16 |
17 | defaults = dict(lr=lr, momentum=momentum, dampening=dampening,
18 | weight_decay=weight_decay, nesterov=nesterov)
19 | if nesterov and (momentum <= 0 or dampening != 0):
20 | raise ValueError("Nesterov momentum requires a momentum and zero dampening")
21 | self.setp_num = 0
22 | self.warm_up = warm_up
23 | self.warm_up_end = False
24 | super(SGD, self).__init__(params, defaults)
25 |
26 | def __setstate__(self, state):
27 | super(SGD, self).__setstate__(state)
28 | for group in self.param_groups:
29 | group.setdefault('nesterov', False)
30 |
31 | def step(self, closure=None):
32 | self.setp_num += 1
33 | loss = None
34 | if closure is not None:
35 | loss = closure()
36 |
37 | for group in self.param_groups:
38 | if self.setp_num <= self.warm_up and not self.warm_up_end :
39 | lr = group['lr']*pow(self.setp_num/self.warm_up,1)
40 | else:
41 | lr = group['lr']
42 | self.warm_up_end = True
43 |
44 | weight_decay = group['weight_decay']
45 | momentum = group['momentum']
46 | dampening = group['dampening']
47 | nesterov = group['nesterov']
48 |
49 | for p in group['params']:
50 | if p.grad is None:
51 | continue
52 | d_p = p.grad.data
53 | if weight_decay != 0:
54 | d_p.add_(weight_decay, p.data)
55 | if momentum != 0:
56 | param_state = self.state[p]
57 | if 'momentum_buffer' not in param_state:
58 | buf = param_state['momentum_buffer'] = torch.zeros_like(p.data)
59 | buf.mul_(momentum).add_(d_p)
60 | else:
61 | buf = param_state['momentum_buffer']
62 | buf.mul_(momentum).add_(1 - dampening, d_p)
63 | if nesterov:
64 | d_p = d_p.add(momentum, buf)
65 | else:
66 | d_p = buf
67 |
68 | p.data.add_(-lr, d_p)
69 | return loss
70 |
71 | class Adam(Optimizer):
72 |
73 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
74 | weight_decay=0, amsgrad=False , warm_up = 1000 ):
75 | if not 0.0 <= lr:
76 | raise ValueError("Invalid learning rate: {}".format(lr))
77 | if not 0.0 <= eps:
78 | raise ValueError("Invalid epsilon value: {}".format(eps))
79 | if not 0.0 <= betas[0] < 1.0:
80 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
81 | if not 0.0 <= betas[1] < 1.0:
82 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
83 | defaults = dict(lr=lr, betas=betas, eps=eps,
84 | weight_decay=weight_decay, amsgrad=amsgrad)
85 | self.setp_num = 0
86 | self.warm_up = warm_up
87 | self.warm_up_end = False
88 | super(Adam, self).__init__(params, defaults)
89 |
90 | def __setstate__(self, state):
91 | super(Adam, self).__setstate__(state)
92 | for group in self.param_groups:
93 | group.setdefault('amsgrad', False)
94 |
95 | def step(self, closure=None):
96 | """Performs a single optimization step.
97 |
98 | Arguments:
99 | closure (callable, optional): A closure that reevaluates the model
100 | and returns the loss.
101 | """
102 | self.setp_num += 1
103 | loss = None
104 | if closure is not None:
105 | loss = closure()
106 |
107 | for group in self.param_groups:
108 |
109 | if self.setp_num <= self.warm_up and not self.warm_up_end :
110 | lr = group['lr']*pow(self.setp_num/self.warm_up,1)
111 | else:
112 | lr = group['lr']
113 | self.warm_up_end = True
114 |
115 | for p in group['params']:
116 |
117 | if p.grad is None:
118 | continue
119 | grad = p.grad.data
120 | if grad.is_sparse:
121 | raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
122 | amsgrad = group['amsgrad']
123 |
124 | state = self.state[p]
125 |
126 | # State initialization
127 | if len(state) == 0:
128 | state['step'] = 0
129 | # Exponential moving average of gradient values
130 | state['exp_avg'] = torch.zeros_like(p.data)
131 | # Exponential moving average of squared gradient values
132 | state['exp_avg_sq'] = torch.zeros_like(p.data)
133 | if amsgrad:
134 | # Maintains max of all exp. moving avg. of sq. grad. values
135 | state['max_exp_avg_sq'] = torch.zeros_like(p.data)
136 |
137 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
138 | if amsgrad:
139 | max_exp_avg_sq = state['max_exp_avg_sq']
140 | beta1, beta2 = group['betas']
141 |
142 | state['step'] += 1
143 |
144 | if group['weight_decay'] != 0:
145 | grad = grad.add(group['weight_decay'], p.data)
146 |
147 | # Decay the first and second moment running average coefficient
148 | exp_avg.mul_(beta1).add_(1 - beta1, grad)
149 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
150 | if amsgrad:
151 | # Maintains the maximum of all 2nd moment running avg. till now
152 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
153 | # Use the max. for normalizing running avg. of gradient
154 | denom = max_exp_avg_sq.sqrt().add_(group['eps'])
155 | else:
156 | denom = exp_avg_sq.sqrt().add_(group['eps'])
157 |
158 | bias_correction1 = 1 - beta1 ** state['step']
159 | bias_correction2 = 1 - beta2 ** state['step']
160 | step_size = lr * math.sqrt(bias_correction2) / bias_correction1
161 |
162 | p.data.addcdiv_(-step_size, exp_avg, denom)
163 |
164 | return loss
--------------------------------------------------------------------------------
/models/detector.py:
--------------------------------------------------------------------------------
1 | from models.tensormask import TensorMask
2 | from config import cfg as opt
3 | from lib.utils import load_model,save_model
4 | from lib.coco import COCO
5 | import numpy as np
6 | import torch
7 | import os
8 | import cv2
9 |
10 | def affine_transform(pt, t):
11 | new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T
12 | new_pt = np.dot(t, new_pt)
13 | return new_pt[:2]
14 |
15 | def cal_iou_np(boxes1, boxes2):
16 | boxes1 = np.array(boxes1)
17 | boxes2 = np.array(boxes2)
18 | boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
19 | boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
20 | left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
21 | right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
22 | inter_section = np.maximum(right_down - left_up, 0.0)
23 | inter_area = inter_section[..., 0] * inter_section[..., 1]
24 | union_area = boxes1_area + boxes2_area - inter_area
25 | IOU = 1.0 * inter_area / union_area
26 | return IOU
27 |
28 | class Detector():
29 | def __init__(self,opt):
30 | self.model = TensorMask(backbone=opt.backbone, num_cls=opt.num_class,
31 | base_window=opt.base_window,
32 | freezeBN=opt.frezeBN, freezeLayers=opt.frezeLayer,
33 | align_corners=opt.align_corners)
34 | self.model = load_model(self.model, opt.weights)
35 | self.model.eval()
36 | self.model.cuda()
37 | self.mean = COCO.mean
38 | self.std = COCO.std
39 | self.opt = opt
40 |
41 | self.strides = np.array([self.opt.base_stride * 2 ** i for i in range(self.opt.k + 1)])
42 | self.windows = np.array([self.opt.base_window * lamda for lamda in self.strides], np.int32)
43 |
44 | self.output_size = np.array(list(zip(self.opt.input_w // self.strides, self.opt.input_h // self.strides)))
45 | self.num_det = [output_w * output_h for output_w, output_h in self.output_size]
46 | self.det_offset = np.cumsum(self.num_det)
47 |
48 | def run(self,image,vis=True):
49 | if isinstance(image,str):
50 | image = cv2.imread(image)
51 | show = image.copy()
52 | image,trans_output = self.prepare_image(image)
53 | input = torch.from_numpy(image).cuda()
54 | output = self.model(input)
55 | box,mask = self.decode(output,show.shape[:2],trans_output)
56 | if vis:
57 | self.show_img(show,box,mask)
58 | return box,mask
59 |
60 | def prepare_image(self,image):
61 | height, width = image.shape[0], image.shape[1]
62 | ar = width/height
63 | new_h,new_w = (self.opt.input_h,ar*self.opt.input_h) if ar < 1 else (self.opt.input_w/ar,self.opt.input_w)
64 | dx, dy = (self.opt.input_w - new_w) / 2, (self.opt.input_h - new_h) / 2
65 | src = np.array([[0, 0], [0, height], [width, 0]], dtype=np.float32)
66 | dst = np.array([[dx, dy], [dx, new_h + dy], [new_w + dx, dy]], dtype=np.float32)
67 | trans_input = cv2.getAffineTransform(src, dst)
68 | trans_output = cv2.getAffineTransform(dst, src)
69 | image = cv2.warpAffine(image, trans_input, (self.opt.input_w, self.opt.input_h),
70 | flags=cv2.INTER_LINEAR, borderValue=(0, 0, 0))
71 | image = (image.astype(np.float32) / 255.)
72 | image = (image- self.mean) / self.std
73 | image = image.transpose(2, 0, 1)
74 | image = np.expand_dims(image,0).astype(np.float32)
75 | return image,trans_output
76 |
77 | def decode(self,output,img_hw,trans_ouput,method = 'nms',iou_threshold=0.45,sigma=0.3):
78 | socres,cls = torch.max(output['cls'].sigmoid_(),dim=-1)
79 | socres = socres.detach().cpu().numpy()
80 | cls = cls.detach().cpu().numpy()
81 | box = output['box'].detach().cpu().numpy()
82 | seg = [output['%d' % i].sigmoid_().detach().cpu().numpy() for i in range(self.opt.k + 1)]
83 | topk_inds = np.where(socres > self.opt.vis_thresh)
84 | result = []
85 | for det_num in topk_inds[1]:
86 | p = socres[0, det_num]
87 | cls_index = cls[0,det_num]
88 | b = box[0, det_num, :]
89 | for id, num in enumerate(self.det_offset):
90 | if num > det_num:
91 | break
92 | offset = det_num - self.det_offset[id - 1] if id > 0 else det_num
93 | width, hight = self.output_size[id]
94 |
95 | ### ct_int_feat
96 | y = int(offset / width)
97 | x = int(offset % width)
98 |
99 | b[0:2] = (x + 0.5 - b[0] )* self.strides[id],( y + 0.5 - b[1] )* self.strides[id]
100 | b[2:4] = (x + 0.5 + b[2] )* self.strides[id],( y + 0.5 + b[3] )* self.strides[id]
101 | b[0:2] = affine_transform(b[0:2],trans_ouput).astype(int)
102 | b[2:4] = affine_transform(b[2:4], trans_ouput).astype(int)
103 | result.append([*b,p,cls_index,x,y,id])
104 |
105 | result = np.array(result) ## x1 y1 x2 y2 p cls ct_feat_x ct_feat_y feat_id
106 |
107 | ### use box to nms
108 | class_index = result[:,5] if len(result) > 0 else []
109 | classes_in_img = list(set(class_index))
110 | best_bboxes = []
111 | for cls in classes_in_img:
112 | cls_mask = (class_index == cls)
113 | cls_bboxes = result[cls_mask]
114 | while len(cls_bboxes) > 0:
115 | max_ind = np.argmax(cls_bboxes[:, 4])
116 | best_bbox = cls_bboxes[max_ind]
117 | best_bboxes.append(best_bbox)
118 | cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]])
119 | iou = cal_iou_np(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
120 | assert method in ['nms', 'soft-nms']
121 | weight = np.ones((len(iou),), dtype=np.float32)
122 | if method == 'nms':
123 | iou_mask = iou > iou_threshold
124 | weight[iou_mask] = 0.0
125 | if method == 'soft-nms':
126 | weight = np.exp(-(1.0 * iou ** 2 / sigma))
127 | cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
128 | score_mask = cls_bboxes[:, 4] > self.opt.vis_thresh
129 | cls_bboxes = cls_bboxes[score_mask]
130 | mask_res= []
131 | for det in best_bboxes:
132 | mask = np.zeros([self.opt.input_h,self.opt.input_w],np.uint8)
133 | ct_feat_x,ct_feat_y,feat_id = int(det[-3]),int(det[-2]),int(det[-1])
134 | x, y = int((ct_feat_x + 0.5) * self.strides[feat_id]), int((ct_feat_y + 0.5) * self.strides[feat_id])
135 | window_seg = seg[feat_id][0, ct_feat_y, ct_feat_x, :, :]
136 | paste_x, paste_y, paste_x1, paste_y1 = x - self.windows[feat_id] // 2,\
137 | y - self.windows[feat_id] // 2, \
138 | x + self.windows[feat_id] // 2,\
139 | y + self.windows[feat_id] // 2
140 |
141 | window_x, window_y, window_x1, window_y1 = max(-paste_x, 0), max(-paste_y, 0), \
142 | self.windows[feat_id] - max(0, paste_x1 - self.opt.input_w), \
143 | self.windows[feat_id] - max(0, paste_y1 - self.opt.input_h)
144 | paste_x, paste_y, paste_x1, paste_y1 = max(paste_x, 0), max(paste_y, 0), \
145 | min(paste_x1, self.opt.input_w), \
146 | min(paste_y1,self.opt.input_h)
147 | window_seg = cv2.resize(window_seg, (self.windows[feat_id],self. windows[feat_id]))
148 | window_seg = (window_seg > 0.5).astype(np.uint8)
149 | mask[paste_y:paste_y1, paste_x:paste_x1] = window_seg[window_y:window_y1, window_x:window_x1]
150 | mask = cv2.warpAffine(mask, trans_ouput,
151 | (img_hw[1], img_hw[0]),
152 | flags=cv2.INTER_LINEAR)
153 |
154 | mask_res.append(mask)
155 | return best_bboxes,mask_res
156 |
157 | def show_img(self,img,box,mask):
158 | for i in range(len(box)):
159 | det = box[i].astype(np.int)
160 | if self.opt.show_box:
161 | cv2.rectangle(img, (det[0], det[1]), (det[2], det[3]), (255, 0, 0), 2)
162 | color = np.array([[np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)]])
163 | seg = mask[i]==1
164 | img[seg] = img[seg] * 0.2 + color * 0.8
165 |
166 | cv2.imshow('result',img)
167 |
168 |
169 | if __name__ == '__main__':
170 | os.environ['CUDA_VISIBLE_DEVICES'] = '3'
171 | opt.weights = '/data/yoloCao/pycharmProjects/tensormask/exp/coco_person/model_last.pth'
172 | detector = Detector(opt)
173 | img = '/data/yoloCao/DataSet/VOC2007/JPEGImages/2007_000027.jpg'
174 | opt.vis_thresh = 0.5
175 | detector.run(img)
176 |
177 |
--------------------------------------------------------------------------------
/lib/coco.py:
--------------------------------------------------------------------------------
1 | import pycocotools.coco as coco
2 | import pycocotools.cocoeval
3 | import numpy as np
4 | import os
5 | import cv2
6 | from torch.utils.data import Dataset
7 |
8 |
9 | def affine_transform(pt, t):
10 | new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T
11 | new_pt = np.dot(t, new_pt)
12 | return new_pt[:2]
13 |
14 | class COCO(Dataset):
15 | mean = np.array([0.40789654, 0.44719302, 0.47026115],
16 | dtype=np.float32).reshape(1, 1, 3)
17 | std = np.array([0.28863828, 0.27408164, 0.27809835],
18 | dtype=np.float32).reshape(1, 1, 3)
19 | def __init__(self, cfg, split = 'train',augment = True):
20 | super(COCO, self).__init__()
21 | self.data_dir = cfg.data_dir
22 | self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split))
23 | self.annot_path = os.path.join(
24 | self.data_dir, 'annotations',
25 | 'instances_{}2017.json').format(split)
26 | self.split = split
27 | print('==> initializing coco 2017 {} data.'.format(split))
28 | self.coco = coco.COCO(self.annot_path)
29 | self.images = self.coco.getImgIds()
30 | self.num_samples = len(self.images)
31 |
32 | self.class_name = [
33 | '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
34 | 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
35 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
36 | 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
37 | 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
38 | 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
39 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
40 | 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
41 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
42 | 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
43 | 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
44 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
45 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
46 | self._valid_ids = [
47 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
48 | 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
49 | 24, 25, 27, 28, 31, 32, 33, 34, 35, 36,
50 | 37, 38, 39, 40, 41, 42, 43, 44, 46, 47,
51 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
52 | 58, 59, 60, 61, 62, 63, 64, 65, 67, 70,
53 | 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
54 | 82, 84, 85, 86, 87, 88, 89, 90]
55 | if cfg.class_name != '*' :
56 | self._valid_ids = [self.class_name.index(cfg.class_name)]
57 | self.class_name = [cfg.class_name]
58 | catIds = self.coco.getCatIds(self.class_name[-1])
59 | assert catIds == self._valid_ids
60 | self.images = self.coco.getImgIds(self.images, catIds)
61 | self.num_samples = len(self.images)
62 |
63 | self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
64 | self.input_w = cfg.input_w
65 | self.input_h = cfg.input_h
66 | self.base_stride = cfg.base_stride
67 | self.base_window = cfg.base_window
68 | self.k = cfg.k
69 | self.num_class = len(self.class_name)
70 |
71 | self.augment=augment
72 | self.max_objs = cfg.max_objs
73 | self.jitter = cfg.jitter
74 | self.cfg = cfg
75 | if not self.augment:
76 | self.jitter = 0
77 | print('Loaded {} {} samples'.format(split, self.num_samples))
78 |
79 | def __len__(self):
80 | return self.num_samples
81 |
82 |
83 | def _coco_box_to_bbox(self, box):
84 | bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],dtype=np.float32)
85 | return bbox
86 |
87 | def get_image_name(self,img_id):
88 | return os.path.join(self.img_dir,self.coco.loadImgs(ids=[self.images[img_id]])[0]['file_name']).strip()
89 |
90 | def _to_float(self, x):
91 | return float("{:.2f}".format(x))
92 |
93 | def __getitem__(self, index):
94 | img_id = self.images[index]
95 | file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
96 | img_path = os.path.join(self.img_dir, file_name)
97 | ann_ids = self.coco.getAnnIds(imgIds=[img_id])
98 | anns = self.coco.loadAnns(ids=ann_ids)
99 | anns = list(filter(lambda x: x['category_id'] in self._valid_ids and x['iscrowd'] != 1, anns))
100 | image = cv2.imread(img_path)
101 |
102 | ## augment
103 | height, width = image.shape[0], image.shape[1]
104 | dw, dh = self.jitter * width, self.jitter * height
105 | new_ar = (width + np.random.uniform(-dw, dw)) / (height + np.random.uniform(-dh, dh))
106 | sclae = 1
107 | if new_ar < 1:
108 | new_h = sclae * self.input_h
109 | new_w = new_ar * new_h
110 | else:
111 | new_w = sclae * self.input_w
112 | new_h = new_w / new_ar
113 |
114 | dx, dy = (np.random.uniform(0, self.input_w - new_w), np.random.uniform(0, self.input_h - new_h)) \
115 | if self.augment else ((self.input_w - new_w) / 2, (self.input_h - new_h) / 2)
116 |
117 | flipped = False
118 | if np.random.random() < 0.5 and self.augment:
119 | image = np.copy(image[:, ::-1, :])
120 | flipped = True
121 |
122 | src = np.array([[0, 0], [0, height], [width, 0]], dtype=np.float32)
123 | dst = np.array([[dx, dy], [dx, new_h + dy], [new_w + dx, dy]], dtype=np.float32)
124 | trans_input = cv2.getAffineTransform(src, dst)
125 | image = cv2.warpAffine(image, trans_input, (self.input_w, self.input_h),
126 | flags=cv2.INTER_LINEAR, borderValue=(0, 0, 0))
127 | show = image.copy()
128 | image = (image.astype(np.float32) / 255.)
129 | image = (image- self.mean) / self.std
130 | image = image.transpose(2, 0, 1)
131 |
132 | strides = np.array([self.base_stride*2**i for i in range(self.k+1)])
133 | windows = np.array([self.base_window*lamda for lamda in strides],np.int32)
134 |
135 | output_size = np.array(list(zip(self.input_w // strides, self.input_h // strides)))
136 | num_det = [output_w*output_h for output_w, output_h in output_size]
137 | det_offset = np.cumsum(num_det)
138 | label_conf = np.zeros((sum(num_det)),dtype=np.int64)
139 | xywh = np.zeros((self.max_objs, 7), dtype=np.float32) # x1 y1 x2 y2 ct_x ct_y stride
140 | ind = np.zeros((self.max_objs), dtype=np.int64)
141 | reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
142 |
143 | seg = [np.zeros((self.max_objs,window//self.base_stride,window//self.base_stride),dtype=np.float32) for window in windows]
144 | seg_ind = [np.zeros((self.max_objs),dtype=np.int64) for _ in windows]
145 | seg_mask = [np.zeros((self.max_objs),dtype=np.uint8) for _ in windows]
146 | num_objs = min(len(anns),self.max_objs)
147 |
148 | if num_objs > 0 :
149 | np.random.shuffle(anns)
150 | for k in range(num_objs):
151 | ann = anns[k]
152 | bbox = self._coco_box_to_bbox(ann['bbox'])
153 | cls_id = int(self.cat_ids[ann['category_id']])
154 | segment = self.coco.annToMask(ann)
155 | if flipped:
156 | bbox[[0, 2]] = width - bbox[[2, 0]] - 1
157 | segment = segment[:, ::-1]
158 | bbox[:2] = affine_transform(bbox[:2], trans_input)
159 | bbox[2:] = affine_transform(bbox[2:], trans_input)
160 | bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.input_w - 1)
161 | bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.input_h - 1)
162 |
163 | w, h = bbox[2:] - bbox[:2]
164 | max_edge = max(w, h)
165 | min_edge = min(w, h)
166 | ratio = max_edge / windows
167 | window_mask = (ratio >= 0.5) * (ratio <= 1.) ## window > max(w,h) > window/2
168 | best_window = windows[window_mask]
169 | if len(best_window) == 0 and \
170 | min_edge > 0 and \
171 | min_edge < windows[0]: ### for small guys
172 | best_window = [windows[0]]
173 | window_mask[0] = True
174 |
175 | feat_stride = strides[window_mask]
176 | feat_size = output_size[window_mask]
177 | window_offset = det_offset[window_mask]
178 | if len(best_window) > 0 and min_edge > 0: ## min_edge must > 0
179 | segment = cv2.warpAffine(segment, trans_input,
180 | (self.input_w, self.input_h),
181 | flags=cv2.INTER_LINEAR)
182 | ct = np.array(
183 | [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
184 |
185 | for window_id in range(len(best_window)):
186 | best_window_size = best_window[window_id]
187 | feat_w, feat_h = feat_size[window_id]
188 | stride = feat_stride[window_id]
189 |
190 | xx, yy = np.arange(0, feat_w), np.arange(0, feat_h)
191 | xx, yy = (xx + 0.5) * stride, (yy + 0.5) * stride
192 | ct_feat_x, ct_feat_y = np.argmin(np.abs(ct[0] - xx)), np.argmin(
193 | np.abs(ct[1] - yy)) ## window ct close to box ct
194 | ct_img_x, ct_img_y = int(xx[ct_feat_x]), int(yy[ct_feat_y])
195 | paded_segmnet = np.pad(segment, ((best_window_size // 2, best_window_size // 2),
196 | (best_window_size // 2, best_window_size // 2)), 'constant',
197 | constant_values=0)
198 | window_segment = paded_segmnet[ct_img_y: ct_img_y + best_window_size,
199 | ct_img_x: ct_img_x + best_window_size]
200 |
201 | feat_offset = window_offset[window_id] - feat_w * feat_h
202 | output_offset = ct_feat_y * feat_w + ct_feat_x
203 | label_conf[feat_offset + output_offset] = (cls_id + 1)
204 |
205 | xywh[k, 0:4] = bbox[0:4]
206 | xywh[k, 4:6] = ct_feat_x, ct_feat_y
207 | xywh[k, 6] = stride
208 |
209 | ind[k] = feat_offset + output_offset
210 | reg_mask[k] = 1
211 |
212 | window_segment = cv2.resize(window_segment, (best_window_size // self.base_stride,
213 | best_window_size // self.base_stride))
214 | window_index = windows.tolist().index(best_window_size)
215 | seg[window_index][k] = window_segment.astype(np.float32).copy()
216 |
217 | seg_ind[window_index][k] = output_offset
218 | seg_mask[window_index][k] = 1
219 |
220 | ret = {'input':image ,'cls':label_conf,'ind': ind, 'xywh':xywh ,'reg_mask':reg_mask}
221 | for i in range(len(windows)):
222 | ret['seg_%d'%i] = seg[i]
223 | ret['seg_ind_%d' % i] = seg_ind[i]
224 | ret['seg_mask_%d' % i] = seg_mask[i]
225 |
226 | if self.cfg.test :
227 | ret['img'] = show
228 |
229 | return ret
230 |
231 | if __name__ == '__main__':
232 | from config import cfg
233 | import torch
234 | data = COCO(cfg,split='val',augment=False)
235 |
236 | for i,t in enumerate(data):
237 | print(i)
238 |
--------------------------------------------------------------------------------
/lib/utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import cv2
4 | from torch.utils.data import Sampler,RandomSampler,SequentialSampler,DataLoader
5 | import os
6 | import time
7 | import sys
8 | import math
9 | from torch.optim import Optimizer
10 |
11 | class AverageMeter(object):
12 | """Computes and stores the average and current value"""
13 | def __init__(self):
14 | self.reset()
15 |
16 | def reset(self):
17 | self.val = 0
18 | self.avg = 0
19 | self.sum = 0
20 | self.count = 0
21 |
22 | def update(self, val, n=1):
23 | self.val = val
24 | self.sum += val * n
25 | self.count += n
26 | if self.count > 0:
27 | self.avg = self.sum / self.count
28 |
29 |
30 | def load_model(model, model_path, optimizer=None, resume=False,
31 | lr=None, lr_step=None):
32 | start_epoch = 0
33 | checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
34 | print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
35 | state_dict_ = checkpoint['state_dict']
36 | state_dict = {}
37 |
38 | # convert data_parallal to model
39 | for k in state_dict_:
40 | if k.startswith('module') and not k.startswith('module_list'):
41 | state_dict[k[7:]] = state_dict_[k]
42 | else:
43 | state_dict[k] = state_dict_[k]
44 | model_state_dict = model.state_dict()
45 |
46 | # check loaded parameters and created model parameters
47 | msg = 'If you see this, your model does not fully load the ' + \
48 | 'pre-trained weight. Please make sure ' + \
49 | 'you have correctly specified --arch xxx ' + \
50 | 'or set the correct --num_classes for your own dataset.'
51 | for k in state_dict:
52 | if k in model_state_dict:
53 | if state_dict[k].shape != model_state_dict[k].shape:
54 | print('Skip loading parameter {}, required shape{}, ' \
55 | 'loaded shape{}. {}'.format(
56 | k, model_state_dict[k].shape, state_dict[k].shape, msg))
57 | state_dict[k] = model_state_dict[k]
58 | else:
59 | print('Drop parameter {}.'.format(k) + msg)
60 | for k in model_state_dict:
61 | if not (k in state_dict):
62 | print('No param {}.'.format(k) + msg)
63 | state_dict[k] = model_state_dict[k]
64 | model.load_state_dict(state_dict, strict=False)
65 |
66 | # resume optimizer parameters
67 | if optimizer is not None and resume:
68 | if 'optimizer' in checkpoint:
69 | optimizer.load_state_dict(checkpoint['optimizer'])
70 | start_epoch = checkpoint['epoch']
71 | start_lr = lr
72 | for step in lr_step:
73 | if start_epoch >= step:
74 | start_lr *= 0.1
75 | for param_group in optimizer.param_groups:
76 | param_group['lr'] = start_lr
77 | print('Resumed optimizer with start lr', start_lr)
78 | else:
79 | print('No optimizer parameters in checkpoint.')
80 | if optimizer is not None:
81 | return model, optimizer, start_epoch
82 | else:
83 | return model
84 |
85 |
86 | def save_model(path, epoch, model, optimizer=None):
87 | if isinstance(model, torch.nn.DataParallel):
88 | state_dict = model.module.state_dict()
89 | else:
90 | state_dict = model.state_dict()
91 | data = {'epoch': epoch,
92 | 'state_dict': state_dict}
93 | if not (optimizer is None):
94 | data['optimizer'] = optimizer.state_dict()
95 | torch.save(data, path)
96 |
97 |
98 |
99 |
100 | class BatchSampler(object):
101 | def __init__(self, sampler, batch_size, drop_last,multiscale_step=None,img_sizes = None):
102 | if not isinstance(sampler, Sampler):
103 | raise ValueError("sampler should be an instance of "
104 | "torch.utils.data.Sampler, but got sampler={}"
105 | .format(sampler))
106 | if not isinstance(drop_last, bool):
107 | raise ValueError("drop_last should be a boolean value, but got "
108 | "drop_last={}".format(drop_last))
109 | self.sampler = sampler
110 | self.batch_size = batch_size
111 | self.drop_last = drop_last
112 | if multiscale_step is not None and multiscale_step < 1 :
113 | raise ValueError("multiscale_step should be > 0, but got "
114 | "multiscale_step={}".format(multiscale_step))
115 | if multiscale_step is not None and img_sizes is None:
116 | raise ValueError("img_sizes must a list, but got img_sizes={} ".format(img_sizes))
117 |
118 | self.multiscale_step = multiscale_step
119 | self.img_sizes = np.array(img_sizes)
120 |
121 | def __iter__(self):
122 | num_batch = 0
123 | batch = []
124 | size = [608,608]
125 | for idx in self.sampler:
126 | batch.append([idx,*size])
127 | if len(batch) == self.batch_size:
128 | yield batch
129 | num_batch+=1
130 | batch = []
131 | if self.multiscale_step and num_batch % self.multiscale_step == 0 :
132 | size = self.img_sizes[np.random.randint(0,len(self.img_sizes))]
133 | if len(batch) > 0 and not self.drop_last:
134 | yield batch
135 |
136 | def __len__(self):
137 | if self.drop_last:
138 | return len(self.sampler) // self.batch_size
139 | else:
140 | return (len(self.sampler) + self.batch_size - 1) // self.batch_size
141 |
142 | class AdaBound(Optimizer):
143 | """Implements AdaBound algorithm.
144 | It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
145 | Arguments:
146 | params (iterable): iterable of parameters to optimize or dicts defining
147 | parameter groups
148 | lr (float, optional): Adam learning rate (default: 1e-3)
149 | betas (Tuple[float, float], optional): coefficients used for computing
150 | running averages of gradient and its square (default: (0.9, 0.999))
151 | final_lr (float, optional): final (SGD) learning rate (default: 0.1)
152 | gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
153 | eps (float, optional): term added to the denominator to improve
154 | numerical stability (default: 1e-8)
155 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
156 | amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
157 | .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
158 | https://openreview.net/forum?id=Bkg3g2R9FX
159 | """
160 |
161 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
162 | eps=1e-8, weight_decay=0, amsbound=False):
163 | if not 0.0 <= lr:
164 | raise ValueError("Invalid learning rate: {}".format(lr))
165 | if not 0.0 <= eps:
166 | raise ValueError("Invalid epsilon value: {}".format(eps))
167 | if not 0.0 <= betas[0] < 1.0:
168 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
169 | if not 0.0 <= betas[1] < 1.0:
170 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
171 | if not 0.0 <= final_lr:
172 | raise ValueError("Invalid final learning rate: {}".format(final_lr))
173 | if not 0.0 <= gamma < 1.0:
174 | raise ValueError("Invalid gamma parameter: {}".format(gamma))
175 | defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
176 | weight_decay=weight_decay, amsbound=amsbound)
177 | super(AdaBound, self).__init__(params, defaults)
178 |
179 | self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
180 |
181 | def __setstate__(self, state):
182 | super(AdaBound, self).__setstate__(state)
183 | for group in self.param_groups:
184 | group.setdefault('amsbound', False)
185 |
186 | def step(self, closure=None):
187 | """Performs a single optimization step.
188 | Arguments:
189 | closure (callable, optional): A closure that reevaluates the model
190 | and returns the loss.
191 | """
192 | loss = None
193 | if closure is not None:
194 | loss = closure()
195 |
196 | for group, base_lr in zip(self.param_groups, self.base_lrs):
197 | for p in group['params']:
198 | if p.grad is None:
199 | continue
200 | grad = p.grad.data
201 | if grad.is_sparse:
202 | raise RuntimeError(
203 | 'Adam does not support sparse gradients, please consider SparseAdam instead')
204 | amsbound = group['amsbound']
205 |
206 | state = self.state[p]
207 |
208 | # State initialization
209 | if len(state) == 0:
210 | state['step'] = 0
211 | # Exponential moving average of gradient values
212 | state['exp_avg'] = torch.zeros_like(p.data)
213 | # Exponential moving average of squared gradient values
214 | state['exp_avg_sq'] = torch.zeros_like(p.data)
215 | if amsbound:
216 | # Maintains max of all exp. moving avg. of sq. grad. values
217 | state['max_exp_avg_sq'] = torch.zeros_like(p.data)
218 |
219 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
220 | if amsbound:
221 | max_exp_avg_sq = state['max_exp_avg_sq']
222 | beta1, beta2 = group['betas']
223 |
224 | state['step'] += 1
225 |
226 | if group['weight_decay'] != 0:
227 | grad = grad.add(group['weight_decay'], p.data)
228 |
229 | # Decay the first and second moment running average coefficient
230 | exp_avg.mul_(beta1).add_(1 - beta1, grad)
231 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
232 | if amsbound:
233 | # Maintains the maximum of all 2nd moment running avg. till now
234 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
235 | # Use the max. for normalizing running avg. of gradient
236 | denom = max_exp_avg_sq.sqrt().add_(group['eps'])
237 | else:
238 | denom = exp_avg_sq.sqrt().add_(group['eps'])
239 |
240 | bias_correction1 = 1 - beta1 ** state['step']
241 | bias_correction2 = 1 - beta2 ** state['step']
242 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
243 |
244 | # Applies bounds on actual learning rate
245 | # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
246 | final_lr = group['final_lr'] * group['lr'] / base_lr
247 | lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
248 | upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
249 | step_size = torch.full_like(denom, step_size)
250 | step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
251 |
252 | p.data.add_(-step_size)
253 |
254 | return loss
255 |
256 |
257 |
258 | USE_TENSORBOARD = True
259 | try:
260 | import tensorboardX
261 |
262 | print('Using tensorboardX')
263 | except:
264 | USE_TENSORBOARD = False
265 |
266 |
267 | class Logger(object):
268 | def __init__(self, opt):
269 | """Create a summary writer logging to log_dir."""
270 | if not os.path.exists(opt.save_dir):
271 | os.makedirs(opt.save_dir)
272 |
273 |
274 | time_str = time.strftime('%Y-%m-%d-%H-%M')
275 |
276 | args = dict((name, getattr(opt, name)) for name in dir(opt)
277 | if not name.startswith('_'))
278 | file_name = os.path.join(opt.save_dir, 'opt.txt')
279 | with open(file_name, 'wt') as opt_file:
280 | opt_file.write('==> torch version: {}\n'.format(torch.__version__))
281 | opt_file.write('==> cudnn version: {}\n'.format(
282 | torch.backends.cudnn.version()))
283 | opt_file.write('==> Cmd:\n')
284 | opt_file.write(str(sys.argv))
285 | opt_file.write('\n==> Opt:\n')
286 | for k, v in sorted(args.items()):
287 | opt_file.write(' %s: %s\n' % (str(k), str(v)))
288 |
289 | log_dir = opt.save_dir + '/logs_{}'.format(time_str)
290 | if USE_TENSORBOARD:
291 | self.writer = tensorboardX.SummaryWriter(log_dir=log_dir)
292 | else:
293 | if not os.path.exists(os.path.dirname(log_dir)):
294 | os.mkdir(os.path.dirname(log_dir))
295 | if not os.path.exists(log_dir):
296 | os.mkdir(log_dir)
297 | self.log = open(log_dir + '/log.txt', 'w')
298 | try:
299 | os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir))
300 | except:
301 | pass
302 | self.start_line = True
303 |
304 | def write(self, txt):
305 | if self.start_line:
306 | time_str = time.strftime('%Y-%m-%d-%H-%M')
307 | self.log.write('{}: {}'.format(time_str, txt))
308 | else:
309 | self.log.write(txt)
310 | self.start_line = False
311 | if '\n' in txt:
312 | self.start_line = True
313 | self.log.flush()
314 |
315 | def close(self):
316 | self.log.close()
317 |
318 | def scalar_summary(self, tag, value, step):
319 | """Log a scalar variable."""
320 | if USE_TENSORBOARD:
321 | self.writer.add_scalar(tag, value, step)
--------------------------------------------------------------------------------
/models/ops/align2nat/src/swap_align2nat_kernel.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 |
6 | #define CUDA_1D_KERNEL_LOOP(i, n) \
7 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
8 | i += blockDim.x * gridDim.x)
9 |
10 | #define THREADS_PER_BLOCK 1024
11 |
12 | inline int GET_BLOCKS(const int N) {
13 | int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
14 | int max_block_num = 65000;
15 | return min(optimal_block_num, max_block_num);
16 | }
17 |
18 | template
19 | __device__ __forceinline__ static accscalar_t area_pixel_compute_source_index(
20 | accscalar_t scale,
21 | int dst_index,
22 | bool align_corners) {
23 | if (align_corners) {
24 | return scale * dst_index;
25 | } else {
26 | accscalar_t src_idx = scale * (dst_index + static_cast(0.5)) -
27 | static_cast(0.5);
28 | // See Note[Follow Opencv resize logic]
29 | return (src_idx < static_cast(0))
30 | ? static_cast(0)
31 | : src_idx;
32 | }
33 | }
34 |
35 | __device__ __forceinline__ size_t
36 | loaction(const size_t n,const size_t v,const size_t u,const size_t y,const size_t x,
37 | const size_t V,const size_t U,const size_t H,const size_t W) {
38 | return x + W*(y + H*(u + U*(v + V*n)));
39 | }
40 |
41 | template
42 | __device__ __forceinline__ scalar_t get_val(const scalar_t*data,
43 | const size_t n,const size_t v,const size_t u,const size_t y,const size_t x,
44 | const size_t V,const size_t U,const size_t H,const size_t W,const scalar_t pad_val) {
45 | if (x <0 || x >= W || y < 0 || y >= H ){
46 | return pad_val;
47 | }else{
48 | return data[x + W*(y + H*(u + U*(v + V*n)))];
49 | }
50 | }
51 |
52 | template
53 | __global__ void SwapAlign2NatForward(const int nthreads, const scalar_t *bottom_data,scalar_t *top_data,
54 | const accscalar_t scaleV,const accscalar_t scaleU,
55 | const int newV,const int newU,
56 | const int newH,const int newW,
57 | const accscalar_t scaleH,const accscalar_t scaleW,
58 | const int orgV,const int orgU,
59 | const int orgH,const int orgW,
60 | const int alpha,const bool align_corners,const scalar_t pad_val
61 | ) {
62 | const float v_offset = -newV/2;
63 | const float u_offset = -newU/2;
64 | int n,ov,ou,oh,ow,bottom_h,bottom_w;
65 | CUDA_1D_KERNEL_LOOP(index,nthreads){
66 | // (n, ov, ou, oh, ow) is an element in the top_data
67 | ow = index % newW;
68 | oh = (index / newW) % newH;
69 | ou = (index / newW / newH) % newU;
70 | ov = (index / newW / newH / newU) % newV;
71 | n = index / newW / newH / newU / newV;
72 | if (newV==orgV && newU==orgU && newW==orgW && newH==orgH){
73 | bottom_h = oh + alpha * (ov + v_offset);
74 | bottom_w = ow + alpha * (ou + u_offset);
75 | top_data[index] = get_val(bottom_data,n,ov,ou,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val);
76 |
77 | } else {
78 | // h,w
79 |
80 | const accscalar_t h1r = area_pixel_compute_source_index(
81 | scaleH, oh, align_corners);
82 | const int h1 = h1r;
83 | const int h1p = (h1 < orgH - 1) ? 1 : 0;
84 | const accscalar_t h1lambda = h1r - h1;
85 | const accscalar_t h0lambda = static_cast(1) - h1lambda;
86 | //
87 | const accscalar_t w1r = area_pixel_compute_source_index(
88 | scaleW, ow, align_corners);
89 | const int w1 = w1r;
90 | const int w1p = (w1 < orgW - 1) ? 1 : 0;
91 | const accscalar_t w1lambda = w1r - w1;
92 | const accscalar_t w0lambda = static_cast(1) - w1lambda;
93 |
94 |
95 | // v,u
96 |
97 | const accscalar_t v1r = area_pixel_compute_source_index(
98 | scaleV, ov, align_corners);
99 | const int v1 = v1r;
100 | const int v1p = (v1 < orgV - 1) ? 1 : 0;
101 | const accscalar_t v1lambda = v1r - v1;
102 | const accscalar_t v0lambda = static_cast(1) - v1lambda;
103 |
104 |
105 | const accscalar_t u1r = area_pixel_compute_source_index(
106 | scaleU, ou, align_corners);
107 | const int u1 = u1r;
108 | const int u1p = (u1 < orgU - 1) ? 1 : 0;
109 | const accscalar_t u1lambda = u1r - u1;
110 | const accscalar_t u0lambda = static_cast(1) - u1lambda;
111 |
112 | accscalar_t h0w0,h0w1,h1w0,h1w1;
113 |
114 | bottom_h = h1 + alpha * (ov + v_offset);
115 | bottom_w = w1 + alpha * (ou + u_offset);
116 | h0w0 = v0lambda * u0lambda * static_cast(get_val(bottom_data,n,v1,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val))+
117 | v0lambda * u1lambda * static_cast(get_val(bottom_data,n,v1,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) +
118 | v1lambda * u0lambda * static_cast(get_val(bottom_data,n,v1+v1p,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) +
119 | v1lambda * u1lambda * static_cast(get_val(bottom_data,n,v1+v1p,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val));
120 |
121 | bottom_h = h1 + alpha * (ov + v_offset);
122 | bottom_w = w1 + w1p + alpha * (ou + u_offset);
123 | h0w1 = v0lambda * u0lambda * static_cast(get_val(bottom_data,n,v1,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val))+
124 | v0lambda * u1lambda * static_cast(get_val(bottom_data,n,v1,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) +
125 | v1lambda * u0lambda * static_cast(get_val(bottom_data,n,v1+v1p,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) +
126 | v1lambda * u1lambda * static_cast(get_val(bottom_data,n,v1+v1p,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val));
127 |
128 | bottom_h = h1 + h1p + alpha * (ov + v_offset);
129 | bottom_w = w1 + alpha * (ou + u_offset);
130 | h1w0 = v0lambda * u0lambda * static_cast(get_val(bottom_data,n,v1,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val))+
131 | v0lambda * u1lambda * static_cast(get_val(bottom_data,n,v1,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) +
132 | v1lambda * u0lambda * static_cast(get_val(bottom_data,n,v1+v1p,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) +
133 | v1lambda * u1lambda * static_cast(get_val(bottom_data,n,v1+v1p,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val));
134 |
135 | bottom_h = h1 + h1p + alpha * (ov + v_offset);
136 | bottom_w = w1 + w1p + alpha * (ou + u_offset);
137 | h1w1 = v0lambda * u0lambda * static_cast(get_val(bottom_data,n,v1,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val))+
138 | v0lambda * u1lambda * static_cast(get_val(bottom_data,n,v1,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) +
139 | v1lambda * u0lambda * static_cast(get_val(bottom_data,n,v1+v1p,u1,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val)) +
140 | v1lambda * u1lambda * static_cast(get_val(bottom_data,n,v1+v1p,u1+u1p,bottom_h,bottom_w,orgV,orgU,orgH,orgW,pad_val));
141 |
142 | const accscalar_t val = h0lambda * w0lambda * h0w0 +
143 | h0lambda * w1lambda * h0w1 +
144 | h1lambda * w0lambda * h1w0 +
145 | h1lambda * w1lambda * h1w1 ;
146 |
147 | top_data[index] = static_cast(val);
148 | }
149 |
150 | }
151 | }
152 |
153 |
154 | template
155 | __global__ void SwapAlign2NatBackward(const int nthreads, const scalar_t *bottom_data,scalar_t *top_data,
156 | const accscalar_t scaleV,const accscalar_t scaleU,
157 | const int newV,const int newU,
158 | const int newH,const int newW,
159 | const accscalar_t scaleH,const accscalar_t scaleW,
160 | const int orgV,const int orgU,
161 | const int orgH,const int orgW,
162 | const int alpha,const bool align_corners
163 | ) {
164 | const int v_offset = -newV/2;
165 | const int u_offset = -newU/2;
166 | int n,ov,ou,oh,ow,top_h,top_w;
167 | size_t top_offset ;
168 | CUDA_1D_KERNEL_LOOP(index,nthreads){
169 | // (n, ov, ou, oh, ow) is an element in the bottom_data
170 | ow = index % newW;
171 | oh = (index / newW) % newH;
172 | ou = (index / newW / newH) % newU;
173 | ov = (index / newW / newH / newU) % newV;
174 | n = index / newW / newH / newU / newV;
175 | if (newV==orgV && newU==orgU && newW==orgW && newH==orgH){
176 | top_h = oh + alpha * (ov + v_offset);
177 | top_w = ow + alpha * (ou + u_offset);
178 | if (!(top_w <0 || top_w >= orgW || top_h < 0 || top_h >= orgH))
179 | {
180 | top_offset = loaction(n,ov,ou,top_h,top_w,newV,newU,newH,newW);
181 | top_data[top_offset] = bottom_data[index];
182 | }
183 | } else {
184 | // h,w
185 |
186 | const accscalar_t h1r = area_pixel_compute_source_index(
187 | scaleH, oh, align_corners);
188 | const int h1 = h1r;
189 | const int h1p = (h1 < orgH - 1) ? 1 : 0;
190 | const accscalar_t h1lambda = h1r - h1;
191 | const accscalar_t h0lambda = static_cast(1) - h1lambda;
192 | //
193 | const accscalar_t w1r = area_pixel_compute_source_index(
194 | scaleW, ow, align_corners);
195 | const int w1 = w1r;
196 | const int w1p = (w1 < orgW - 1) ? 1 : 0;
197 | const accscalar_t w1lambda = w1r - w1;
198 | const accscalar_t w0lambda = static_cast(1) - w1lambda;
199 |
200 |
201 | // v,u
202 |
203 | const accscalar_t v1r = area_pixel_compute_source_index(
204 | scaleV, ov, align_corners);
205 | const int v1 = v1r;
206 | const int v1p = (v1 < orgV - 1) ? 1 : 0;
207 | const accscalar_t v1lambda = v1r - v1;
208 | const accscalar_t v0lambda = static_cast(1) - v1lambda;
209 |
210 |
211 | const accscalar_t u1r = area_pixel_compute_source_index(
212 | scaleU, ou, align_corners);
213 | const int u1 = u1r;
214 | const int u1p = (u1 < orgU - 1) ? 1 : 0;
215 | const accscalar_t u1lambda = u1r - u1;
216 | const accscalar_t u0lambda = static_cast(1) - u1lambda;
217 |
218 | const accscalar_t d2val = static_cast(bottom_data[index]);
219 |
220 | top_h = h1 + alpha * (ov + v_offset);
221 | top_w = w1 + alpha * (ou + u_offset);
222 | if (!(top_w <0 || top_w >= orgW || top_h < 0 || top_h >= orgH) ){
223 | top_offset = loaction(n,v1,u1,top_h,top_w,orgV,orgU,orgH,orgW);
224 | atomicAdd(top_data + top_offset, static_cast(h0lambda * w0lambda * v0lambda * u0lambda * d2val));
225 | top_offset = loaction(n,v1,u1 + u1p,top_h,top_w,orgV,orgU,orgH,orgW);
226 | atomicAdd(top_data + top_offset, static_cast(h0lambda * w0lambda * v0lambda * u1lambda * d2val));
227 | top_offset = loaction(n,v1+v1p,u1,top_h,top_w,orgV,orgU,orgH,orgW);
228 | atomicAdd(top_data + top_offset, static_cast(h0lambda * w0lambda * v1lambda * u0lambda * d2val));
229 | top_offset = loaction(n,v1+v1p,u1+u1p,top_h,top_w,orgV,orgU,orgH,orgW);
230 | atomicAdd(top_data + top_offset, static_cast(h0lambda * w0lambda * v1lambda * u1lambda * d2val));
231 | }
232 |
233 | top_h = h1 + alpha * (ov + v_offset);
234 | top_w = w1 + w1p + alpha * (ou + u_offset);
235 | if (!(top_w <0 || top_w >= orgW || top_h < 0 || top_h >= orgH) ){
236 | top_offset = loaction(n,v1,u1,top_h,top_w,orgV,orgU,orgH,orgW);
237 | atomicAdd(top_data + top_offset, static_cast(h0lambda * w1lambda * v0lambda * u0lambda * d2val));
238 | top_offset = loaction(n,v1,u1 + u1p,top_h,top_w,orgV,orgU,orgH,orgW);
239 | atomicAdd(top_data + top_offset, static_cast(h0lambda * w1lambda * v0lambda * u1lambda * d2val));
240 | top_offset = loaction(n,v1+v1p,u1,top_h,top_w,orgV,orgU,orgH,orgW);
241 | atomicAdd(top_data + top_offset, static_cast(h0lambda * w1lambda * v1lambda * u0lambda * d2val));
242 | top_offset = loaction(n,v1+v1p,u1+u1p,top_h,top_w,orgV,orgU,orgH,orgW);
243 | atomicAdd(top_data + top_offset, static_cast(h0lambda * w1lambda * v1lambda * u1lambda * d2val));
244 | }
245 |
246 | top_h = h1 + h1p + alpha * (ov + v_offset);
247 | top_w = w1 + alpha * (ou + u_offset);
248 | if (!(top_w <0 || top_w >= orgW || top_h < 0 || top_h >= orgH) ){
249 | top_offset = loaction(n,v1,u1,top_h,top_w,orgV,orgU,orgH,orgW);
250 | atomicAdd(top_data + top_offset, static_cast(h1lambda * w0lambda * v0lambda * u0lambda * d2val));
251 | top_offset = loaction(n,v1,u1 + u1p,top_h,top_w,orgV,orgU,orgH,orgW);
252 | atomicAdd(top_data + top_offset, static_cast(h1lambda * w0lambda * v0lambda * u1lambda * d2val));
253 | top_offset = loaction(n,v1+v1p,u1,top_h,top_w,orgV,orgU,orgH,orgW);
254 | atomicAdd(top_data + top_offset, static_cast(h1lambda * w0lambda * v1lambda * u0lambda * d2val));
255 | top_offset = loaction(n,v1+v1p,u1+u1p,top_h,top_w,orgV,orgU,orgH,orgW);
256 | atomicAdd(top_data + top_offset, static_cast(h1lambda * w0lambda * v1lambda * u1lambda * d2val));
257 | }
258 |
259 | top_h = h1 + h1p + alpha * (ov + v_offset);
260 | top_w = w1 + w1p + alpha * (ou + u_offset);
261 | if (!(top_w <0 || top_w >= orgW || top_h < 0 || top_h >= orgH) ){
262 | top_offset = loaction(n,v1,u1,top_h,top_w,orgV,orgU,orgH,orgW);
263 | atomicAdd(top_data + top_offset, static_cast(h1lambda * w1lambda * v0lambda * u0lambda * d2val));
264 | top_offset = loaction(n,v1,u1 + u1p,top_h,top_w,orgV,orgU,orgH,orgW);
265 | atomicAdd(top_data + top_offset, static_cast(h1lambda * w1lambda * v0lambda * u1lambda * d2val));
266 | top_offset = loaction(n,v1+v1p,u1,top_h,top_w,orgV,orgU,orgH,orgW);
267 | atomicAdd(top_data + top_offset, static_cast(h1lambda * w1lambda * v1lambda * u0lambda * d2val));
268 | top_offset = loaction(n,v1+v1p,u1+u1p,top_h,top_w,orgV,orgU,orgH,orgW);
269 | atomicAdd(top_data + top_offset, static_cast(h1lambda * w1lambda * v1lambda * u1lambda * d2val));
270 | }
271 | }
272 |
273 | }
274 | }
275 |
276 |
277 | template
278 | static inline scalar_t area_pixel_compute_scale(
279 | int64_t input_size,
280 | int64_t output_size,
281 | bool align_corners) {
282 |
283 | if (output_size > 1) {
284 | return align_corners
285 | ? static_cast(input_size - 1) / (output_size - 1)
286 | : static_cast(input_size) / output_size;
287 | } else {
288 | return scalar_t(0);
289 | }
290 | }
291 | int SwapAlign2NatForwardLaucher(const at::Tensor& input,at::Tensor& output,
292 | const int alpha,const bool align_corners,const float pad_val){
293 | const int B = output.size(0);
294 | const int newV = output.size(1);
295 | const int newU = output.size(2);
296 | const int newH = output.size(3);
297 | const int newW = output.size(4);
298 | const int orgV = input.size(1);
299 | const int orgU = input.size(2);
300 | const int orgH = input.size(3);
301 | const int orgW = input.size(4);
302 | const int output_size = B*newV*newU*newH*newW;
303 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(
304 | input.type(), "SwapAlign2NatForwardLaucher", ([&] {
305 | const scalar_t *bottom_data = input.data();
306 | scalar_t *top_data = output.data();
307 | using accscalar_t = at::acc_type;
308 | const accscalar_t rV = area_pixel_compute_scale(
309 | orgV, newV, align_corners);
310 | const accscalar_t rU = area_pixel_compute_scale(
311 | orgU, newV, align_corners);
312 | const accscalar_t rH = area_pixel_compute_scale(
313 | orgH, newH, align_corners);
314 | const accscalar_t rW = area_pixel_compute_scale(
315 | orgW, newW, align_corners);
316 | SwapAlign2NatForward
317 | <<>>(output_size,bottom_data,top_data,
318 | rV,rU,newV,newU,newH,newW,
319 | rH,rW,orgV,orgU,orgH,orgW,
320 | alpha,align_corners,static_cast(pad_val)
321 | );
322 | }));
323 | THCudaCheck(cudaGetLastError());
324 | return 1;
325 | }
326 |
327 |
328 | int SwapAlign2NatBackwardLaucher(const at::Tensor& grad_output,at::Tensor& grad_input,
329 | const int alpha,const bool align_corners){
330 | int B = grad_output.size(0);
331 | int newV = grad_output.size(1);
332 | int newU = grad_output.size(2);
333 | int newH = grad_output.size(3);
334 | int newW = grad_output.size(4);
335 | int orgV = grad_input.size(1);
336 | int orgU = grad_input.size(2);
337 | int orgH = grad_input.size(3);
338 | int orgW = grad_input.size(4);
339 | const int output_size = B*newV*newU*newH*newW;
340 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(
341 | grad_output.type(), "SwapAlign2NatBackwardLaucher", ([&] {
342 | const scalar_t *bottom_data = grad_output.data();
343 | scalar_t *top_data = grad_input.data();
344 | using accscalar_t = at::acc_type;
345 | const accscalar_t rV = area_pixel_compute_scale(
346 | orgV, newV, align_corners);
347 | const accscalar_t rU = area_pixel_compute_scale(
348 | orgU, newV, align_corners);
349 | const accscalar_t rH = area_pixel_compute_scale(
350 | orgH, newH, align_corners);
351 | const accscalar_t rW = area_pixel_compute_scale(
352 | orgW, newW, align_corners);
353 | SwapAlign2NatBackward
354 | <<>>(output_size,bottom_data,top_data,
355 | rV,rU,newV,newU,newH,newW,
356 | rH,rW,orgV,orgU,orgH,orgW,
357 | alpha,align_corners
358 | );
359 | }));
360 | THCudaCheck(cudaGetLastError());
361 | return 1;
362 | }
--------------------------------------------------------------------------------