├── README.md
├── src
    ├── ops
    │   ├── _cpools
    │   │   ├── .gitignore
    │   │   ├── setup.py
    │   │   ├── __init__.py
    │   │   └── src
    │   │   │   ├── bottom_pool.cpp
    │   │   │   ├── right_pool.cpp
    │   │   │   ├── left_pool.cpp
    │   │   │   └── top_pool.cpp
    │   └── __init__.py
    ├── models
    │   ├── backbones
    │   │   ├── __init__.py
    │   │   └── hourglass.py
    │   ├── bbox_heads
    │   │   ├── __init__.py
    │   │   └── centripetal_mask.py
    │   └── detectors
    │   │   ├── __init__.py
    │   │   ├── centripetal.py
    │   │   └── test_mixins.py
    ├── core
    │   ├── __init__.py
    │   └── corner
    │   │   ├── __init__.py
    │   │   ├── corner_target.py
    │   │   └── kp_utils.py
    └── datasets
    │   ├── coco.py
    │   ├── transforms.py
    │   ├── extra_aug.py
    │   └── custom.py
├── .gitmodules
├── init.sh
├── compile.sh
├── LICENSE
├── .gitignore
└── configs
    └── centripetalnet_mask_hg104.py


/README.md:
--------------------------------------------------------------------------------
1 | # CentripetalNet


--------------------------------------------------------------------------------
/src/ops/_cpools/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | cpools.egg-info/
3 | dist/
4 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "mmdetection"]
2 | 	path = mmdetection
3 | 	url = https://github.com/open-mmlab/mmdetection.git
4 | 


--------------------------------------------------------------------------------
/src/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import ResNet
2 | from .resnext import ResNeXt
3 | from .ssd_vgg import SSDVGG
4 | from .hourglass import Hourglass
5 | 
6 | __all__ = ['ResNet', 'ResNeXt', 'SSDVGG', 'Hourglass']
7 | 


--------------------------------------------------------------------------------
/src/models/bbox_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox_head import BBoxHead
2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead
3 | from .centripetal_mask import Centripetal_mask
4 | 
5 | __all__ = [
6 |     'BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead', 'Centripetal_mask'
7 | ]
8 | 


--------------------------------------------------------------------------------
/src/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor import *  # noqa: F401, F403
2 | from .bbox import *  # noqa: F401, F403
3 | from .corner import *
4 | from .mask import *
5 | from .loss import *  # noqa: F401, F403
6 | from .evaluation import *  # noqa: F401, F403
7 | from .post_processing import *  # noqa: F401, F403
8 | from .utils import *  # noqa: F401, F403
9 | 


--------------------------------------------------------------------------------
/init.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | PYTHON=${PYTHON:-"python"}
 4 | 
 5 | # copy files
 6 | cp compile.sh mmdetection/
 7 | cp -r src/core/* mmdetection/mmdet/core/
 8 | cp -r src/datasets/* mmdetection/mmdet/datasets/
 9 | cp -r src/models/* mmdetection/mmdet/models/
10 | cp -r src/ops/* mmdetection/mmdet/ops/
11 | 
12 | # compile and setup
13 | cd mmdetection
14 | ./compile.sh
15 | $PYTHON setup.py install --user
16 | 


--------------------------------------------------------------------------------
/src/core/corner/__init__.py:
--------------------------------------------------------------------------------
1 | from .corner_target import corner_target
2 | from .kp_utils import _gather_feat,_nms,_tranpose_and_gather_feat,_topk,_neg_loss,_sigmoid,_ae_loss,_regr_loss,gaussian2D,draw_gaussian,gaussian_radius, _decode_center
3 | 
4 | __all__ = ['corner_target','_gather_feat','_nms','_tranpose_and_gather_feat','_topk','_decode_center','_neg_loss','_sigmoid','_ae_loss','_regr_loss','gaussian2D','draw_gaussian','gaussian_radius']
5 | 
6 | 


--------------------------------------------------------------------------------
/src/ops/_cpools/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CppExtension
 3 | 
 4 | setup(
 5 |     name="cpools",
 6 |     ext_modules=[
 7 |         CppExtension("top_pool", ["src/top_pool.cpp"]),
 8 |         CppExtension("bottom_pool", ["src/bottom_pool.cpp"]),
 9 |         CppExtension("left_pool", ["src/left_pool.cpp"]),
10 |         CppExtension("right_pool", ["src/right_pool.cpp"])
11 |     ],
12 |     cmdclass={
13 |         "build_ext": BuildExtension
14 |     }
15 | )
16 | 


--------------------------------------------------------------------------------
/src/models/detectors/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import BaseDetector
 2 | from .cascade_rcnn import CascadeRCNN
 3 | from .centripetal import CentripetalNet
 4 | from .fast_rcnn import FastRCNN
 5 | from .faster_rcnn import FasterRCNN
 6 | from .mask_rcnn import MaskRCNN
 7 | from .retinanet import RetinaNet
 8 | from .rpn import RPN
 9 | from .single_stage import SingleStageDetector
10 | from .two_stage import TwoStageDetector
11 | 
12 | __all__ = [
13 |     'BaseDetector', 'CentripetalNet', 'SingleStageDetector', 'TwoStageDetector', 'RPN',
14 |     'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'RetinaNet' 
15 | ]
16 | 


--------------------------------------------------------------------------------
/compile.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | PYTHON=${PYTHON:-"python"}
 4 | 
 5 | echo "Building roi align op..."
 6 | cd mmdet/ops/roi_align
 7 | if [ -d "build" ]; then
 8 |     rm -r build
 9 | fi
10 | $PYTHON setup.py build_ext --inplace
11 | 
12 | echo "Building roi pool op..."
13 | cd ../roi_pool
14 | if [ -d "build" ]; then
15 |     rm -r build
16 | fi
17 | $PYTHON setup.py build_ext --inplace
18 | 
19 | echo "Building nms op..."
20 | cd ../nms
21 | make clean
22 | make PYTHON=${PYTHON}
23 | 
24 | echo "Building dcn..."
25 | cd ../dcn
26 | if [ -d "build" ]; then
27 |     rm -r build
28 | fi
29 | $PYTHON setup.py build_ext --inplace
30 | 
31 | echo "Building corner pooling..."
32 | cd ../_cpools
33 | if [ -d "build" ]; then
34 |     rm -r build
35 | fi
36 | $PYTHON setup.py build_ext --inplace
37 | 


--------------------------------------------------------------------------------
/src/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | from .dcn import (DeformConv, DeformRoIPooling, DeformRoIPoolingPack,
 2 |                   ModulatedDeformRoIPoolingPack, ModulatedDeformConv,
 3 |                   ModulatedDeformConvPack, deform_conv, modulated_deform_conv,
 4 |                   deform_roi_pooling)
 5 | from .nms import nms, soft_nms
 6 | from .roi_align import RoIAlign, roi_align
 7 | from .roi_pool import RoIPool, roi_pool
 8 | from ._cpools import TopPool, BottomPool, LeftPool, RightPool
 9 | 
10 | __all__ = [
11 |     'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool',
12 |     'DeformConv', 'DeformRoIPooling', 'DeformRoIPoolingPack',
13 |     'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv',
14 |     'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv',
15 |     'deform_roi_pooling', 'TopPool', 'BottomPool', 'LeftPool', 'RightPool'
16 | ]
17 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 KiveeDong
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # cython generated cpp
107 | mmdet/ops/nms/*.cpp
108 | mmdet/version.py
109 | data
110 | .vscode
111 | .idea
112 | 
113 | work_dirs/
114 | 


--------------------------------------------------------------------------------
/src/ops/_cpools/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | import sys
 6 | import os
 7 | sys.path.append(os.path.join(os.path.dirname(__file__),'dist/cpools-0.0.0-py3.6-linux-x86_64.egg'))
 8 | import top_pool, bottom_pool, left_pool, right_pool
 9 | 
10 | class TopPoolFunction(Function):
11 |     @staticmethod
12 |     def forward(ctx, input):
13 |         output = top_pool.forward(input)[0]
14 |         ctx.save_for_backward(input)
15 |         return output
16 | 
17 |     @staticmethod
18 |     def backward(ctx, grad_output):
19 |         input  = ctx.saved_variables[0]
20 |         output = top_pool.backward(input, grad_output)[0]
21 |         return output
22 | 
23 | class BottomPoolFunction(Function):
24 |     @staticmethod
25 |     def forward(ctx, input):
26 |         output = bottom_pool.forward(input)[0]
27 |         ctx.save_for_backward(input)
28 |         return output
29 | 
30 |     @staticmethod
31 |     def backward(ctx, grad_output):
32 |         input  = ctx.saved_variables[0]
33 |         output = bottom_pool.backward(input, grad_output)[0]
34 |         return output
35 | 
36 | class LeftPoolFunction(Function):
37 |     @staticmethod
38 |     def forward(ctx, input):
39 |         output = left_pool.forward(input)[0]
40 |         ctx.save_for_backward(input)
41 |         return output
42 | 
43 |     @staticmethod
44 |     def backward(ctx, grad_output):
45 |         input  = ctx.saved_variables[0]
46 |         output = left_pool.backward(input, grad_output)[0]
47 |         return output
48 | 
49 | class RightPoolFunction(Function):
50 |     @staticmethod
51 |     def forward(ctx, input):
52 |         output = right_pool.forward(input)[0]
53 |         ctx.save_for_backward(input)
54 |         return output
55 | 
56 |     @staticmethod
57 |     def backward(ctx, grad_output):
58 |         input  = ctx.saved_variables[0]
59 |         output = right_pool.backward(input, grad_output)[0]
60 |         return output
61 | 
62 | class TopPool(nn.Module):
63 |     def forward(self, x):
64 |         return TopPoolFunction.apply(x)
65 | 
66 | class BottomPool(nn.Module):
67 |     def forward(self, x):
68 |         return BottomPoolFunction.apply(x)
69 | 
70 | class LeftPool(nn.Module):
71 |     def forward(self, x):
72 |         return LeftPoolFunction.apply(x)
73 | 
74 | class RightPool(nn.Module):
75 |     def forward(self, x):
76 |         return RightPoolFunction.apply(x)
77 | 


--------------------------------------------------------------------------------
/src/ops/_cpools/src/bottom_pool.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | std::vector<at::Tensor> pool_forward(
 6 |     at::Tensor input
 7 | ) {
 8 |     // Initialize output
 9 |     at::Tensor output = at::zeros_like(input);
10 | 
11 |     // Get height
12 |     int64_t height = input.size(2);
13 | 
14 |     // Copy the last column
15 |     at::Tensor input_temp  = input.select(2, 0);
16 |     at::Tensor output_temp = output.select(2, 0);
17 |     output_temp.copy_(input_temp);
18 | 
19 |     at::Tensor max_temp;
20 |     for (int64_t ind = 0; ind < height - 1; ++ind) {
21 |         input_temp  = input.select(2, ind + 1);
22 |         output_temp = output.select(2, ind);
23 |         max_temp    = output.select(2, ind + 1);
24 | 
25 |         at::max_out(max_temp, input_temp, output_temp);
26 |     }
27 | 
28 |     return { 
29 |         output
30 |     };
31 | }
32 | 
33 | std::vector<at::Tensor> pool_backward(
34 |     at::Tensor input,
35 |     at::Tensor grad_output
36 | ) {
37 |     auto output = at::zeros_like(input);
38 | 
39 |     int32_t batch   = input.size(0);
40 |     int32_t channel = input.size(1);
41 |     int32_t height  = input.size(2);
42 |     int32_t width   = input.size(3);
43 | 
44 |     auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width});
45 |     auto max_ind = at::zeros(torch::CUDA(at::kLong),  {batch, channel, width});
46 | 
47 |     auto input_temp = input.select(2, 0);
48 |     max_val.copy_(input_temp);
49 | 
50 |     max_ind.fill_(0);
51 | 
52 |     auto output_temp      = output.select(2, 0);
53 |     auto grad_output_temp = grad_output.select(2, 0);
54 |     output_temp.copy_(grad_output_temp);
55 | 
56 |     auto un_max_ind = max_ind.unsqueeze(2);
57 |     auto gt_mask    = at::zeros(torch::CUDA(at::kByte),  {batch, channel, width});
58 |     auto max_temp   = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width});
59 |     for (int32_t ind = 0; ind < height - 1; ++ind) {
60 |         input_temp = input.select(2, ind + 1);
61 |         at::gt_out(gt_mask, input_temp, max_val);
62 | 
63 |         at::masked_select_out(max_temp, input_temp, gt_mask);
64 |         max_val.masked_scatter_(gt_mask, max_temp);
65 |         max_ind.masked_fill_(gt_mask, ind + 1);
66 | 
67 |         grad_output_temp = grad_output.select(2, ind + 1).unsqueeze(2);
68 |         output.scatter_add_(2, un_max_ind, grad_output_temp);
69 |     }
70 | 
71 |     return {
72 |         output
73 |     };
74 | }
75 | 
76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
77 |     m.def(
78 |         "forward", &pool_forward, "Bottom Pool Forward",
79 |         py::call_guard<py::gil_scoped_release>()
80 |     );
81 |     m.def(
82 |         "backward", &pool_backward, "Bottom Pool Backward",
83 |         py::call_guard<py::gil_scoped_release>()
84 |     );
85 | }
86 | 


--------------------------------------------------------------------------------
/src/ops/_cpools/src/right_pool.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | std::vector<at::Tensor> pool_forward(
 6 |     at::Tensor input
 7 | ) {
 8 |     // Initialize output
 9 |     at::Tensor output = at::zeros_like(input);
10 | 
11 |     // Get width
12 |     int64_t width = input.size(3);
13 | 
14 |     // Copy the last column
15 |     at::Tensor input_temp  = input.select(3, 0);
16 |     at::Tensor output_temp = output.select(3, 0);
17 |     output_temp.copy_(input_temp);
18 | 
19 |     at::Tensor max_temp;
20 |     for (int64_t ind = 0; ind < width - 1; ++ind) {
21 |         input_temp  = input.select(3, ind + 1);
22 |         output_temp = output.select(3, ind);
23 |         max_temp    = output.select(3, ind + 1);
24 | 
25 |         at::max_out(max_temp, input_temp, output_temp);
26 |     }
27 | 
28 |     return { 
29 |         output
30 |     };
31 | }
32 | 
33 | std::vector<at::Tensor> pool_backward(
34 |     at::Tensor input,
35 |     at::Tensor grad_output
36 | ) {
37 |     at::Tensor output = at::zeros_like(input);
38 | 
39 |     int32_t batch   = input.size(0);
40 |     int32_t channel = input.size(1);
41 |     int32_t height  = input.size(2);
42 |     int32_t width   = input.size(3);
43 | 
44 |     auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height});
45 |     auto max_ind = at::zeros(torch::CUDA(at::kLong),  {batch, channel, height});
46 | 
47 |     auto input_temp = input.select(3, 0);
48 |     max_val.copy_(input_temp);
49 | 
50 |     max_ind.fill_(0);
51 | 
52 |     auto output_temp      = output.select(3, 0);
53 |     auto grad_output_temp = grad_output.select(3, 0);
54 |     output_temp.copy_(grad_output_temp);
55 | 
56 |     auto un_max_ind = max_ind.unsqueeze(3);
57 |     auto gt_mask    = at::zeros(torch::CUDA(at::kByte),  {batch, channel, height});
58 |     auto max_temp   = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height});
59 |     for (int32_t ind = 0; ind < width - 1; ++ind) {
60 |         input_temp = input.select(3, ind + 1);
61 |         at::gt_out(gt_mask, input_temp, max_val);
62 | 
63 |         at::masked_select_out(max_temp, input_temp, gt_mask);
64 |         max_val.masked_scatter_(gt_mask, max_temp);
65 |         max_ind.masked_fill_(gt_mask, ind + 1);
66 | 
67 |         grad_output_temp = grad_output.select(3, ind + 1).unsqueeze(3);
68 |         output.scatter_add_(3, un_max_ind, grad_output_temp);
69 |     }
70 | 
71 |     return {
72 |         output
73 |     };
74 | }
75 | 
76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
77 |     m.def(
78 |         "forward", &pool_forward, "Right Pool Forward",
79 |         py::call_guard<py::gil_scoped_release>()
80 |     );
81 |     m.def(
82 |         "backward", &pool_backward, "Right Pool Backward",
83 |         py::call_guard<py::gil_scoped_release>()     
84 |     );
85 | }
86 | 


--------------------------------------------------------------------------------
/src/ops/_cpools/src/left_pool.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | std::vector<at::Tensor> pool_forward(
 6 |     at::Tensor input
 7 | ) {
 8 |     // Initialize output
 9 |     at::Tensor output = at::zeros_like(input);
10 | 
11 |     // Get width
12 |     int64_t width = input.size(3);
13 | 
14 |     // Copy the last column
15 |     at::Tensor input_temp  = input.select(3, width - 1);
16 |     at::Tensor output_temp = output.select(3, width - 1);
17 |     output_temp.copy_(input_temp);
18 | 
19 |     at::Tensor max_temp;
20 |     for (int64_t ind = 1; ind < width; ++ind) {
21 |         input_temp  = input.select(3, width - ind - 1);
22 |         output_temp = output.select(3, width - ind);
23 |         max_temp    = output.select(3, width - ind - 1);
24 | 
25 |         at::max_out(max_temp, input_temp, output_temp);
26 |     }
27 | 
28 |     return { 
29 |         output
30 |     };
31 | }
32 | 
33 | std::vector<at::Tensor> pool_backward(
34 |     at::Tensor input,
35 |     at::Tensor grad_output
36 | ) {
37 |     auto output = at::zeros_like(input);
38 | 
39 |     int32_t batch   = input.size(0);
40 |     int32_t channel = input.size(1);
41 |     int32_t height  = input.size(2);
42 |     int32_t width   = input.size(3);
43 | 
44 |     auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height});
45 |     auto max_ind = at::zeros(torch::CUDA(at::kLong),  {batch, channel, height});
46 | 
47 |     auto input_temp = input.select(3, width - 1);
48 |     max_val.copy_(input_temp);
49 | 
50 |     max_ind.fill_(width - 1);
51 | 
52 |     auto output_temp      = output.select(3, width - 1);
53 |     auto grad_output_temp = grad_output.select(3, width - 1);
54 |     output_temp.copy_(grad_output_temp);
55 | 
56 |     auto un_max_ind = max_ind.unsqueeze(3);
57 |     auto gt_mask    = at::zeros(torch::CUDA(at::kByte),  {batch, channel, height});
58 |     auto max_temp   = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height});
59 |     for (int32_t ind = 1; ind < width; ++ind) {
60 |         input_temp = input.select(3, width - ind - 1);
61 |         at::gt_out(gt_mask, input_temp, max_val);
62 | 
63 |         at::masked_select_out(max_temp, input_temp, gt_mask);
64 |         max_val.masked_scatter_(gt_mask, max_temp);
65 |         max_ind.masked_fill_(gt_mask, width - ind - 1);
66 | 
67 |         grad_output_temp = grad_output.select(3, width - ind - 1).unsqueeze(3);
68 |         output.scatter_add_(3, un_max_ind, grad_output_temp);
69 |     }
70 | 
71 |     return {
72 |         output
73 |     };
74 | }
75 | 
76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
77 |     m.def(
78 |         "forward", &pool_forward, "Left Pool Forward", 
79 |         py::call_guard<py::gil_scoped_release>()
80 |     );
81 |     m.def(
82 |         "backward", &pool_backward, "Left Pool Backward", 
83 |         py::call_guard<py::gil_scoped_release>()
84 |     );
85 | }
86 | 


--------------------------------------------------------------------------------
/src/ops/_cpools/src/top_pool.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | std::vector<at::Tensor> top_pool_forward(
 6 |     at::Tensor input
 7 | ) {
 8 |     // Initialize output
 9 |     at::Tensor output = at::zeros_like(input);
10 | 
11 |     // Get height
12 |     int64_t height = input.size(2);
13 | 
14 |     // Copy the last column
15 |     at::Tensor input_temp  = input.select(2, height - 1);
16 |     at::Tensor output_temp = output.select(2, height - 1);
17 |     output_temp.copy_(input_temp);
18 | 
19 |     at::Tensor max_temp;
20 |     for (int64_t ind = 1; ind < height; ++ind) {
21 |         input_temp  = input.select(2, height - ind - 1);
22 |         output_temp = output.select(2, height - ind);
23 |         max_temp    = output.select(2, height - ind - 1);
24 | 
25 |         at::max_out(max_temp, input_temp, output_temp);
26 |     }
27 | 
28 |     return { 
29 |         output
30 |     };
31 | }
32 | 
33 | std::vector<at::Tensor> top_pool_backward(
34 |     at::Tensor input,
35 |     at::Tensor grad_output
36 | ) {
37 |     auto output = at::zeros_like(input);
38 | 
39 |     int32_t batch   = input.size(0);
40 |     int32_t channel = input.size(1);
41 |     int32_t height  = input.size(2);
42 |     int32_t width   = input.size(3);
43 | 
44 |     auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width});
45 |     auto max_ind = at::zeros(torch::CUDA(at::kLong),  {batch, channel, width});
46 | 
47 |     auto input_temp = input.select(2, height - 1);
48 |     max_val.copy_(input_temp);
49 | 
50 |     max_ind.fill_(height - 1);
51 | 
52 |     auto output_temp      = output.select(2, height - 1);
53 |     auto grad_output_temp = grad_output.select(2, height - 1);
54 |     output_temp.copy_(grad_output_temp);
55 | 
56 |     auto un_max_ind = max_ind.unsqueeze(2);
57 |     auto gt_mask    = at::zeros(torch::CUDA(at::kByte),  {batch, channel, width});
58 |     auto max_temp   = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width});
59 |     for (int32_t ind = 1; ind < height; ++ind) {
60 |         input_temp = input.select(2, height - ind - 1);
61 |         at::gt_out(gt_mask, input_temp, max_val);
62 | 
63 |         at::masked_select_out(max_temp, input_temp, gt_mask);
64 |         max_val.masked_scatter_(gt_mask, max_temp);
65 |         max_ind.masked_fill_(gt_mask, height - ind - 1);
66 | 
67 |         grad_output_temp = grad_output.select(2, height - ind - 1).unsqueeze(2);
68 |         output.scatter_add_(2, un_max_ind, grad_output_temp);
69 |     }
70 | 
71 |     return {
72 |         output
73 |     };
74 | }
75 | 
76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
77 |     m.def(
78 |         "forward", &top_pool_forward, "Top Pool Forward",
79 |         py::call_guard<py::gil_scoped_release>()
80 |     );
81 |     m.def(
82 |         "backward", &top_pool_backward, "Top Pool Backward",
83 |         py::call_guard<py::gil_scoped_release>()
84 |     );
85 | }
86 | 


--------------------------------------------------------------------------------
/configs/centripetalnet_mask_hg104.py:
--------------------------------------------------------------------------------
  1 | #model settings
  2 | model = dict(
  3 |     type='CentripetalNet',
  4 |     backbone=dict(
  5 |         type='Hourglass',
  6 |         n=5,
  7 |         nstack=2,
  8 |         dims=[256, 256, 384, 384, 384, 512],
  9 |         modules=[2, 2, 2, 2, 2, 4],
 10 |         out_dim=80,),
 11 |     neck=None,
 12 |     bbox_head=dict(
 13 |         type='Centripetal_mask',
 14 |         num_classes=81,
 15 |         in_channels=256,
 16 |         with_mask=True,
 17 |         ))
 18 | # training and testing settings
 19 | train_cfg = dict(
 20 |     assigner=dict(
 21 |         type='MaxIoUAssigner',
 22 |         pos_iou_thr=0.5,
 23 |         neg_iou_thr=0.4,
 24 |         min_pos_iou=0,
 25 |         ignore_iof_thr=-1),
 26 |     smoothl1_beta=0.11,
 27 |     gamma=2.0,
 28 |     alpha=0.25,
 29 |     allowed_border=-1,
 30 |     pos_weight=-1,
 31 |     debug=False)
 32 | test_cfg = dict(
 33 |     nms_pre=1000,
 34 |     min_bbox_size=0,
 35 |     score_thr=0.05,
 36 |     nms=dict(type='nms', iou_thr=0.5),
 37 |     max_per_img=100)
 38 | # dataset settings
 39 | dataset_type = 'CocoDataset'
 40 | data_root = 'data/mscoco2017/'
 41 | img_norm_cfg = dict(
 42 |     mean=[103.53, 116.28, 123.675], std=[57.375, 57.12, 58.395], to_rgb=False)
 43 | 
 44 | cornernet_mode = True
 45 |     
 46 | data = dict(
 47 |     imgs_per_gpu=6,#3
 48 |     workers_per_gpu=3,#3
 49 |     train=dict(
 50 |         type=dataset_type,
 51 |         ann_file=data_root + 'annotations/instances_train2017.json',
 52 |         img_prefix=data_root + 'train2017/',
 53 |         img_scale=(511, 511),
 54 |         img_norm_cfg=img_norm_cfg,
 55 |         size_divisor=None,
 56 |         flip_ratio=0.5,
 57 |         with_mask=True,
 58 |         with_crowd=False,
 59 |         with_label=True,
 60 |         resize_keep_ratio=False,
 61 |         cornernet_mode=cornernet_mode,
 62 |         extra_aug=dict(
 63 |             photo_metric_distortion=dict(
 64 |                 brightness_delta=32,
 65 |                 contrast_range=(0.5, 1.5),
 66 |                 saturation_range=(0.5, 1.5),
 67 |                 hue_delta=18),
 68 |             expand=dict(
 69 |                 mean=img_norm_cfg['mean'],
 70 |                 to_rgb=img_norm_cfg['to_rgb'],
 71 |                 ratio_range=(1, 4)),
 72 |             random_crop=dict(
 73 |                 min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3))),
 74 |     val=dict(
 75 |         type=dataset_type,
 76 |         ann_file=data_root + 'annotations/instances_val2017.json',
 77 |         img_prefix=data_root + 'val2017/',
 78 |         img_scale=(511, 511),
 79 |         img_norm_cfg=img_norm_cfg,
 80 |         size_divisor=None,
 81 |         flip_ratio=1,
 82 |         with_mask=False,
 83 |         with_crowd=False,
 84 |         with_label=True,
 85 |         cornernet_mode=cornernet_mode,
 86 |         resize_keep_ratio=False),
 87 |     test=dict(
 88 |         type=dataset_type,
 89 |         ann_file=data_root + 'annotations/image_info_test-dev2017.json',
 90 |         img_prefix=data_root + 'test2017/',
 91 |         img_scale=(511, 511),
 92 |         img_norm_cfg=img_norm_cfg,
 93 |         size_divisor=None,
 94 |         flip_ratio=1,
 95 |         with_mask=False,
 96 |         with_crowd=False,
 97 |         with_label=False,
 98 |         test_mode=True,
 99 |         cornernet_mode=cornernet_mode,
100 |         resize_keep_ratio=False))
101 | # optimizer
102 | optimizer = dict(type='Adam', lr=0.00005)
103 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
104 | 
105 | # learning policy
106 | # policy='fixed'
107 | lr_config = dict(
108 |     policy='step',
109 |     warmup='linear',
110 |     warmup_iters=500,
111 |     warmup_ratio=1.0 / 3,
112 |     step=[190])
113 | checkpoint_config = dict(interval=1)
114 | # yapf:disable
115 | log_config = dict(
116 |     interval=50,
117 |     hooks=[
118 |         dict(type='TextLoggerHook'),
119 |         # dict(type='TensorboardLoggerHook')
120 |     ])
121 | # yapf:enable
122 | # runtime settings
123 | total_epochs = 210
124 | device_ids = range(8)
125 | dist_params = dict(backend='nccl')
126 | log_level = 'INFO'
127 | work_dir = './work_dirs/centripetalnet_mask_hg104'
128 | resume_from = None
129 | load_from = None
130 | workflow = [('train', 1)]
131 | 
132 | 


--------------------------------------------------------------------------------
/src/datasets/coco.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from pycocotools.coco import COCO
  3 | 
  4 | from .custom import CustomDataset
  5 | 
  6 | 
  7 | class CocoDataset(CustomDataset):
  8 | 
  9 |     CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
 10 |                'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
 11 |                'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
 12 |                'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
 13 |                'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 14 |                'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
 15 |                'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
 16 |                'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
 17 |                'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
 18 |                'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
 19 |                'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
 20 |                'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
 21 |                'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
 22 |                'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush')
 23 | 
 24 |     def load_annotations(self, ann_file):
 25 |         self.coco = COCO(ann_file)
 26 |         self.cat_ids = self.coco.getCatIds()
 27 |         self.cat2label = {
 28 |             cat_id: i + 1
 29 |             for i, cat_id in enumerate(self.cat_ids)
 30 |         }
 31 |         self.img_ids = self.coco.getImgIds()
 32 |         img_infos = []
 33 |         for i in self.img_ids:
 34 |             info = self.coco.loadImgs([i])[0]
 35 |             info['filename'] = info['file_name']
 36 |             img_infos.append(info)
 37 |         return img_infos
 38 | 
 39 |     def get_ann_info(self, idx):
 40 |         img_id = self.img_infos[idx]['id']
 41 |         ann_ids = self.coco.getAnnIds(imgIds=[img_id])
 42 |         ann_info = self.coco.loadAnns(ann_ids)
 43 |         return self._parse_ann_info(ann_info, self.with_mask)
 44 | 
 45 |     def _filter_imgs(self, min_size=32):
 46 |         """Filter images too small or without ground truths."""
 47 |         valid_inds = []
 48 |         ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values())
 49 |         for i, img_info in enumerate(self.img_infos):
 50 |             if self.img_ids[i] not in ids_with_ann:
 51 |                 continue
 52 |             if min(img_info['width'], img_info['height']) >= min_size:
 53 |                 valid_inds.append(i)
 54 |         return valid_inds
 55 | 
 56 |     def _parse_ann_info(self, ann_info, with_mask=True):
 57 |         """Parse bbox and mask annotation.
 58 | 
 59 |         Args:
 60 |             ann_info (list[dict]): Annotation info of an image.
 61 |             with_mask (bool): Whether to parse mask annotations.
 62 | 
 63 |         Returns:
 64 |             dict: A dict containing the following keys: bboxes, bboxes_ignore,
 65 |                 labels, masks, mask_polys, poly_lens.
 66 |         """
 67 |         gt_bboxes = []
 68 |         gt_labels = []
 69 |         gt_bboxes_ignore = []
 70 |         # Two formats are provided.
 71 |         # 1. mask: a binary map of the same size of the image.
 72 |         # 2. polys: each mask consists of one or several polys, each poly is a
 73 |         # list of float.
 74 |         if with_mask:
 75 |             gt_masks = []
 76 |             gt_mask_polys = []
 77 |             gt_poly_lens = []
 78 |         for i, ann in enumerate(ann_info):
 79 |             if ann.get('ignore', False):
 80 |                 continue
 81 |             x1, y1, w, h = ann['bbox']
 82 |             if ann['area'] <= 0 or w < 1 or h < 1:
 83 |                 continue
 84 |             bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
 85 |             if ann['iscrowd']:
 86 |                 gt_bboxes_ignore.append(bbox)
 87 |             else:
 88 |                 gt_bboxes.append(bbox)
 89 |                 gt_labels.append(self.cat2label[ann['category_id']])
 90 |             if with_mask and not ann['iscrowd']:
 91 |                 gt_masks.append(self.coco.annToMask(ann))
 92 |                 mask_polys = [
 93 |                     p for p in ann['segmentation'] if len(p) >= 6
 94 |                 ]  # valid polygons have >= 3 points (6 coordinates)
 95 |                 poly_lens = [len(p) for p in mask_polys]
 96 |                 gt_mask_polys.append(mask_polys)
 97 |                 gt_poly_lens.extend(poly_lens)
 98 |         if gt_bboxes:
 99 |             gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
100 |             gt_labels = np.array(gt_labels, dtype=np.int64)
101 |         else:
102 |             gt_bboxes = np.zeros((0, 4), dtype=np.float32)
103 |             gt_labels = np.array([], dtype=np.int64)
104 | 
105 |         if gt_bboxes_ignore:
106 |             gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
107 |         else:
108 |             gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
109 | 
110 |         ann = dict(
111 |             bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)
112 | 
113 |         if with_mask:
114 |             ann['masks'] = gt_masks
115 |             # poly format is not used in the current implementation
116 |             ann['mask_polys'] = gt_mask_polys
117 |             ann['poly_lens'] = gt_poly_lens
118 |         return ann
119 | 


--------------------------------------------------------------------------------
/src/core/corner/corner_target.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from random import randint
  4 | from .kp_utils import gaussian_radius, draw_gaussian
  5 | import math
  6 | 
  7 | 
  8 | def corner_target(gt_bboxes, gt_labels, feats, imgscale, num_classes=80, direct=False, obj=False, scale=8.0, dcn=False):
  9 |     """
 10 |     :param gt_bboxes: list of boxes (xmin, ymin, xmax, ymax)
 11 |     :param gt_labels: list of labels
 12 |     :param featsize:
 13 |     :return:
 14 |     """
 15 |     b, _, h, w = feats.size()
 16 |     im_h, im_w = imgscale
 17 | 
 18 |     width_ratio = float(w / im_w)
 19 |     height_ratio = float(h / im_h)
 20 | 
 21 |     gt_tl_corner_heatmap = np.zeros([b, num_classes, h, w]) * 1.0
 22 |     gt_br_corner_heatmap = np.zeros([b, num_classes, h, w]) * 1.0
 23 | 
 24 |     gt_tl_obj = np.zeros([b, 1, h, w]) * 1.0
 25 |     gt_br_obj = np.zeros([b, 1, h, w]) * 1.0
 26 | 
 27 |     gt_tl_off_c = np.zeros([b, 2, h, w]) * 1.0
 28 |     gt_br_off_c = np.zeros([b, 2, h, w]) * 1.0
 29 | 
 30 |     gt_tl_off_c2 = np.zeros([b, 2, h, w]) * 1.0
 31 |     gt_br_off_c2 = np.zeros([b, 2, h, w]) * 1.0
 32 | 
 33 | 
 34 |     gt_tl_offsets = np.zeros([b, 2, h, w]) * 1.0
 35 |     gt_br_offsets = np.zeros([b, 2, h, w]) * 1.0
 36 | 
 37 | 
 38 |     for b_id in range(b):
 39 |         #match = []
 40 |         for box_id in range(len(gt_labels[b_id])):
 41 |             tl_x, tl_y, br_x, br_y = gt_bboxes[b_id][box_id]
 42 |             c_x = (tl_x + br_x)/2.0
 43 |             c_y = (tl_y + br_y)/2.0
 44 | 
 45 |             label = gt_labels[b_id][box_id]  # label is between(1,80)
 46 | 
 47 |             ftlx = float(tl_x * width_ratio)
 48 |             fbrx = float(br_x * width_ratio)
 49 |             ftly = float(tl_y * height_ratio)
 50 |             fbry = float(br_y * height_ratio)
 51 |             fcx  = float(c_x  * width_ratio)
 52 |             fcy  = float(c_y  * height_ratio)
 53 | 
 54 | 
 55 |             #tl_x_idx = int(min(ftlx, w - 1))
 56 |             #br_x_idx = int(min(fbrx, w - 1))
 57 |             #tl_y_idx = int(min(ftly, h - 1))
 58 |             #br_y_idx = int(min(fbry, h - 1))
 59 |             tl_x_idx = int(ftlx)
 60 |             br_x_idx = int(fbrx)
 61 |             tl_y_idx = int(ftly)
 62 |             br_y_idx = int(fbry)
 63 | 
 64 |             width = float(br_x - tl_x)
 65 |             height = float(br_y - tl_y)
 66 | 
 67 |             width = math.ceil(width * width_ratio)
 68 |             height = math.ceil(height * height_ratio)
 69 | 
 70 |             radius = gaussian_radius((height, width), min_overlap=0.3)
 71 |             radius = max(0, int(radius))
 72 |             # radius = 10
 73 | 
 74 |             draw_gaussian(gt_tl_corner_heatmap[b_id, label.long() - 1], [tl_x_idx, tl_y_idx], radius)#, mode='tl')
 75 |             draw_gaussian(gt_br_corner_heatmap[b_id, label.long() - 1], [br_x_idx, br_y_idx], radius)#, mode='br')
 76 |             draw_gaussian(gt_tl_obj[b_id, 0], [tl_x_idx, tl_y_idx], radius)
 77 |             draw_gaussian(gt_br_obj[b_id, 0], [br_x_idx, br_y_idx], radius)
 78 | 
 79 |             # gt_tl_corner_heatmap[b_id, label.long()-1, tl_x_idx.long(), tl_y_idx.long()] += 1
 80 |             # gt_br_corner_heatmap[b_id, label.long()-1, br_x_idx.long(), br_y_idx.long()] += 1
 81 | 
 82 |             tl_x_offset = ftlx - tl_x_idx
 83 |             tl_y_offset = ftly - tl_y_idx
 84 |             br_x_offset = fbrx - br_x_idx
 85 |             br_y_offset = fbry - br_y_idx
 86 | 
 87 |             if direct:    
 88 |                 tl_x_off_c  = (fcx - tl_x_idx)/scale
 89 |                 tl_y_off_c  = (fcy - tl_y_idx)/scale
 90 |                 br_x_off_c  = (br_x_idx - fcx)/scale
 91 |                 br_y_off_c  = (br_y_idx - fcy)/scale
 92 |             else:
 93 |                 tl_x_off_c  = np.log(fcx - ftlx)
 94 |                 tl_y_off_c  = np.log(fcy - ftly)
 95 |                 br_x_off_c  = np.log(fbrx - fcx)
 96 |                 br_y_off_c  = np.log(fbry - fcy)
 97 | 
 98 |             gt_tl_offsets[b_id, 0, tl_y_idx, tl_x_idx] = tl_x_offset
 99 |             gt_tl_offsets[b_id, 1, tl_y_idx, tl_x_idx] = tl_y_offset
100 |             gt_br_offsets[b_id, 0, br_y_idx, br_x_idx] = br_x_offset
101 |             gt_br_offsets[b_id, 1, br_y_idx, br_x_idx] = br_y_offset
102 | 
103 |             gt_tl_off_c[b_id, 0, tl_y_idx, tl_x_idx] = tl_x_off_c
104 |             gt_tl_off_c[b_id, 1, tl_y_idx, tl_x_idx] = tl_y_off_c
105 |             gt_br_off_c[b_id, 0, br_y_idx, br_x_idx] = br_x_off_c
106 |             gt_br_off_c[b_id, 1, br_y_idx, br_x_idx] = br_y_off_c
107 | 
108 |             gt_tl_off_c2[b_id, 0, tl_y_idx, tl_x_idx] = np.log(fcx - ftlx)
109 |             gt_tl_off_c2[b_id, 1, tl_y_idx, tl_x_idx] = np.log(fcy - ftly)
110 |             gt_br_off_c2[b_id, 0, br_y_idx, br_x_idx] = np.log(fbrx - fcx)
111 |             gt_br_off_c2[b_id, 1, br_y_idx, br_x_idx] = np.log(fbry - fcy)
112 |     gt_tl_corner_heatmap = torch.from_numpy(gt_tl_corner_heatmap).type_as(feats)
113 |     gt_br_corner_heatmap = torch.from_numpy(gt_br_corner_heatmap).type_as(feats)
114 |     gt_tl_obj = torch.from_numpy(gt_tl_obj).type_as(feats)
115 |     gt_br_obj = torch.from_numpy(gt_br_obj).type_as(feats)
116 |     gt_tl_off_c   = torch.from_numpy(gt_tl_off_c).type_as(feats)
117 |     gt_br_off_c   = torch.from_numpy(gt_br_off_c).type_as(feats)
118 |     gt_tl_off_c2  = torch.from_numpy(gt_tl_off_c2).type_as(feats)
119 |     gt_br_off_c2  = torch.from_numpy(gt_br_off_c2).type_as(feats)
120 |     gt_tl_offsets = torch.from_numpy(gt_tl_offsets).type_as(feats)
121 |     gt_br_offsets = torch.from_numpy(gt_br_offsets).type_as(feats)
122 | 
123 |     if obj:
124 |         return gt_tl_obj, gt_br_obj, gt_tl_corner_heatmap, gt_br_corner_heatmap, gt_tl_offsets, gt_br_offsets, gt_tl_off_c, gt_br_off_c
125 |     else:
126 |         if not dcn:
127 |             return gt_tl_corner_heatmap, gt_br_corner_heatmap, gt_tl_offsets, gt_br_offsets, gt_tl_off_c, gt_br_off_c
128 |         else:
129 |             return gt_tl_corner_heatmap, gt_br_corner_heatmap, gt_tl_offsets, gt_br_offsets, gt_tl_off_c, gt_br_off_c, gt_tl_off_c2, gt_br_off_c2
130 | 
131 | 


--------------------------------------------------------------------------------
/src/models/detectors/centripetal.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | import mmcv
  4 | import cv2
  5 | from .base import BaseDetector
  6 | from .. import builder
  7 | from ..registry import DETECTORS
  8 | from mmdet.core import bbox2result
  9 | from collections import OrderedDict
 10 | from mmcv.runner import get_dist_info
 11 | import numpy as np
 12 | import json
 13 | from .test_mixins import MaskTestMixin_kpt
 14 | from numpy.random import randint
 15 | 
 16 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
 17 |            'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
 18 |            'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
 19 |            'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
 20 |            'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 21 |            'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
 22 |            'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
 23 |            'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
 24 |            'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
 25 |            'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
 26 |            'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
 27 |            'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
 28 |            'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
 29 |            'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
 30 | 
 31 | @DETECTORS.register_module
 32 | class CentripetalNet(BaseDetector, MaskTestMixin_kpt):
 33 | 
 34 |     def __init__(self,
 35 |                  backbone,
 36 |                  neck=None,
 37 |                  bbox_head=None,
 38 |                  train_cfg=None,
 39 |                  test_cfg=None,
 40 |                  pretrained=None):
 41 |         super(CentripetalNet, self).__init__()
 42 |         self.backbone = builder.build_backbone(backbone)       
 43 |         if neck is not None:
 44 |             self.neck = builder.build_neck(neck)
 45 |         self.bbox_head = builder.build_head(bbox_head)
 46 |         self.train_cfg = train_cfg
 47 |         self.test_cfg = test_cfg
 48 |         self.init_weights(pretrained=pretrained)
 49 |         if self.bbox_head.with_mask:
 50 |             self.mask_head = True
 51 | 
 52 |     def init_weights(self, pretrained=None):
 53 |         super(CentripetalNet, self).init_weights(pretrained)
 54 |         self.backbone.init_weights(pretrained=pretrained)
 55 |         if self.with_neck:
 56 |             if isinstance(self.neck, nn.Sequential):
 57 |                 for m in self.neck:
 58 |                     m.init_weights()
 59 |             else:
 60 |                 self.neck.init_weights()
 61 |         self.bbox_head.init_weights()
 62 | 
 63 |     def extract_feat(self, img):
 64 |         x = self.backbone(img)
 65 |         if self.with_neck:
 66 |             x = self.neck(x)
 67 |         return x
 68 | 
 69 |     def forward_train(self, img, img_metas, gt_bboxes, gt_labels, gt_masks):
 70 |         """
 71 |         :param img:
 72 |         :param img_metas:
 73 |         :param gt_bboxes: (xmin, ymin, xmax, ymax)
 74 |         :param gt_labels:
 75 |         :return:
 76 |         """
 77 |         _,_,h,w = img.size()
 78 |         imgscale = (h,w)
 79 |         x = self.extract_feat(img)
 80 |         outs = self.bbox_head(x)
 81 |         loss_inputs = outs + (gt_bboxes, gt_labels, gt_masks, img_metas, self.train_cfg,imgscale)
 82 |         losses = self.bbox_head.loss(*loss_inputs)
 83 |         return losses
 84 | 
 85 |     def simple_test(self, img, img_meta, rescale=False):
 86 |         
 87 |         x = self.extract_feat(img)
 88 |         outs = self.bbox_head(x)
 89 |         bbox_inputs = outs + (img_meta, self.test_cfg, rescale)
 90 |         bbox_list = self.bbox_head.get_bboxes(*bbox_inputs)
 91 |         bbox_results = [
 92 |             bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes+1)
 93 |             for det_bboxes, det_labels in bbox_list
 94 |         ]
 95 |         return bbox_results[0]
 96 | 
 97 | #    def aug_test_old(self, imgs, img_meta, rescale=False):
 98 | #        imgs=torch.cat(imgs)
 99 | #        x = self.extract_feat(imgs)
100 | #        outs = self.bbox_head(x)
101 | #        bbox_inputs = outs + (img_meta, self.test_cfg, rescale)
102 | #        bbox_list = self.bbox_head.get_bboxes(*bbox_inputs)
103 | #        bbox_results = [
104 | #            bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes+1)
105 | #            for det_bboxes, det_labels in bbox_list
106 | #        ]
107 | #        return bbox_results[0]
108 | 
109 |     def aug_test(self, imgs_l, img_meta, rescale=False, gt_bboxes=None, gt_labels=None, gt_masks=None, idx=None):
110 |         
111 |         img = imgs_l[0][0]
112 |         img_n = img.squeeze().cpu().numpy()
113 |         img_n = np.transpose(img_n, [1,2,0])
114 |         img_n -= img_n.min()
115 |         img_n /= abs(img_n).max()
116 |         img_n *= 255.0
117 |         ms_results=[]
118 |         bboxes = []
119 |         labels = []
120 |         for i in [0]:
121 |         #for i in [0, 2, 4, 6, 8]:
122 |             imgs = torch.cat(imgs_l[i:i+2])
123 |             x = self.extract_feat(imgs)
124 |             outs = self.bbox_head(x)
125 |             
126 |             bbox_inputs = outs + (img_meta[i:i+2], self.test_cfg, rescale)
127 |             bbox_list = self.bbox_head.get_bboxes(*bbox_inputs)
128 |       
129 |             ms_results.append(bbox_list)
130 |             bboxes.append(bbox_list[0][0])
131 |             labels.append(bbox_list[0][1])
132 | 
133 |         detections = torch.cat(bboxes)#.cpu().numpy()
134 |         labels = torch.cat(labels)#.cpu().numpy()
135 |         
136 |         bbox_list = [(detections, labels)]        
137 | 
138 |         bbox_results = [
139 |             bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes+1)
140 |             for det_bboxes, det_labels in bbox_list
141 |         ]
142 |         return bbox_results[0]
143 | 
144 | 
145 | 


--------------------------------------------------------------------------------
/src/models/backbones/hourglass.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from mmdet.models.registry import BACKBONES
  4 | 
  5 | 
  6 | class convolution(nn.Module):
  7 |     def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
  8 |         super(convolution, self).__init__()
  9 | 
 10 |         pad = (k - 1) // 2  # What is K? kernel??
 11 |         self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn)
 12 |         self.bn = nn.BatchNorm2d(out_dim) if with_bn else nn.Sequential()
 13 |         self.relu = nn.ReLU(inplace=True)
 14 | 
 15 |     def forward(self, x):
 16 |         conv = self.conv(x)
 17 |         bn = self.bn(conv)
 18 |         relu = self.relu(bn)
 19 |         return relu
 20 | 
 21 | class residual(nn.Module):
 22 |     def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
 23 |         super(residual, self).__init__()
 24 | 
 25 |         self.conv1 = nn.Conv2d(inp_dim, out_dim, (3, 3), padding=(1, 1), stride=(stride, stride), bias=False)
 26 |         self.bn1 = nn.BatchNorm2d(out_dim)
 27 |         self.relu1 = nn.ReLU(inplace=True)
 28 | 
 29 |         self.conv2 = nn.Conv2d(out_dim, out_dim, (3, 3), padding=(1, 1), bias=False)
 30 |         self.bn2 = nn.BatchNorm2d(out_dim)
 31 | 
 32 |         self.skip = nn.Sequential(
 33 |             nn.Conv2d(inp_dim, out_dim, (1, 1), stride=(stride, stride), bias=False),
 34 |             nn.BatchNorm2d(out_dim)
 35 |         ) if stride != 1 or inp_dim != out_dim else nn.Sequential()
 36 |         self.relu = nn.ReLU(inplace=True)
 37 | 
 38 |     def forward(self, x):
 39 |         conv1 = self.conv1(x)
 40 |         bn1 = self.bn1(conv1)
 41 |         relu1 = self.relu1(bn1)
 42 | 
 43 |         conv2 = self.conv2(relu1)
 44 |         bn2 = self.bn2(conv2)
 45 | 
 46 |         skip = self.skip(x)
 47 |         return self.relu(bn2 + skip)
 48 | 
 49 | class MergeUp(nn.Module):
 50 |     def forward(self, up1, up2):
 51 |         return up1 + up2
 52 | 
 53 | def make_merge_layer(dim):
 54 |     return MergeUp()
 55 | 
 56 | def make_layer(k, inp_dim, out_dim, modules, layer=convolution, **kwargs):  # layer is choosed from conv/fc/res
 57 |     layers = [layer(k, inp_dim, out_dim, **kwargs)]
 58 |     for _ in range(1, modules):
 59 |         layers.append(layer(k, out_dim, out_dim, **kwargs))
 60 |     return nn.Sequential(*layers)
 61 | 
 62 | 
 63 | def make_layer_revr(k, inp_dim, out_dim, modules, layer=convolution, **kwargs):
 64 |     layers = []
 65 |     for _ in range(modules - 1):
 66 |         layers.append(layer(k, inp_dim, inp_dim, **kwargs))
 67 |     layers.append(layer(k, inp_dim, out_dim, **kwargs))
 68 |     return nn.Sequential(*layers)
 69 | 
 70 | #def make_pool_layer(dim):
 71 | #    return nn.MaxPool2d(kernel_size=2, stride=2)
 72 | 
 73 | def make_pool_layer(dim):
 74 |     return nn.Sequential()
 75 | 
 76 | def make_unpool_layer(dim, trans_conv=False):
 77 |     if not trans_conv:
 78 |         return nn.Upsample(scale_factor=2)
 79 |     else:
 80 |         return nn.ConvTranspose2d(dim, dim, kernel_size=4, stride=2, padding=1)
 81 | 
 82 | def make_kp_layer(cnv_dim, curr_dim, out_dim):
 83 |     return nn.Sequential(
 84 |         convolution(3, cnv_dim, curr_dim, with_bn=False),
 85 |         nn.Conv2d(curr_dim, out_dim, (1, 1))
 86 |     )
 87 | 
 88 | def make_hg_layer(kernel, dim0, dim1, mod, layer=convolution, **kwargs):
 89 |     layers  = [layer(kernel, dim0, dim1, stride=2)]
 90 |     layers += [layer(kernel, dim1, dim1) for _ in range(mod - 1)]
 91 |     return nn.Sequential(*layers)
 92 | 
 93 | def make_inter_layer(dim):
 94 |     return residual(3, dim, dim)
 95 | 
 96 | def make_cnv_layer(inp_dim, out_dim):
 97 |     return convolution(3, inp_dim, out_dim)
 98 | 
 99 | 
100 | 
101 | class kp_module(nn.Module):
102 |     def __init__(
103 |         self, n, dims, modules, layer=residual, trans_conv=False,
104 |         make_up_layer=make_layer, make_low_layer=make_layer,#what does up and low mean?
105 |         make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr,
106 |         make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer,
107 |         make_merge_layer=make_merge_layer, **kwargs
108 |     ):
109 |         super(kp_module, self).__init__()
110 | 
111 |         self.n   = n
112 | 
113 |         curr_mod = modules[0]
114 |         next_mod = modules[1]
115 | 
116 |         curr_dim = dims[0]
117 |         next_dim = dims[1]
118 | 
119 |         self.up1  = make_up_layer(#make_layer
120 |             3, curr_dim, curr_dim, curr_mod,
121 |             layer=layer, **kwargs
122 |         )
123 |         self.max1 = make_pool_layer(curr_dim)
124 |         self.low1 = make_hg_layer(
125 |             3, curr_dim, next_dim, curr_mod,
126 |             layer=layer, **kwargs
127 |         )
128 |         #a recurse defination, interesting.
129 |         self.low2 = kp_module(
130 |             n - 1, dims[1:], modules[1:], layer=layer,
131 |             make_up_layer=make_up_layer,
132 |             make_low_layer=make_low_layer,
133 |             make_hg_layer=make_hg_layer,
134 |             make_hg_layer_revr=make_hg_layer_revr,
135 |             make_pool_layer=make_pool_layer,
136 |             make_unpool_layer=make_unpool_layer,
137 |             make_merge_layer=make_merge_layer,
138 |             **kwargs
139 |         ) if self.n > 1 else \
140 |         make_low_layer(
141 |             3, next_dim, next_dim, next_mod,
142 |             layer=layer, **kwargs
143 |         )
144 |         self.low3 = make_hg_layer_revr(
145 |             3, next_dim, curr_dim, curr_mod,
146 |             layer=layer, **kwargs
147 |         )
148 |         self.up2  = make_unpool_layer(curr_dim, trans_conv)
149 | 
150 |         self.merge = make_merge_layer(curr_dim)
151 | 
152 |     def forward(self, x):
153 |         up1  = self.up1(x)
154 |         max1 = self.max1(x)
155 |         low1 = self.low1(max1)
156 |         low2 = self.low2(low1)
157 |         low3 = self.low3(low2)
158 |         up2  = self.up2(low3)
159 |         return self.merge(up1, up2)
160 | 
161 | 
162 | 
163 | """
164 | n       = 5
165 | dims    = [256, 256, 384, 384, 384, 512]
166 | modules = [2, 2, 2, 2, 2, 4]
167 | out_dim = 80
168 | """
169 | 
170 | @BACKBONES.register_module
171 | class Hourglass(nn.Module):
172 |     def __init__(
173 |         self, n, nstack, dims, modules, out_dim, pre=None, cnv_dim=256,trans_conv=False,
174 |         make_cnv_layer = make_cnv_layer,
175 |         make_up_layer=make_layer, make_low_layer=make_layer,
176 |         make_hg_layer=make_hg_layer, make_hg_layer_revr=make_layer_revr,
177 |         make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer,
178 |         make_merge_layer=make_merge_layer, make_inter_layer=make_inter_layer,
179 |         kp_layer=residual
180 |     ):
181 |         super(Hourglass, self).__init__()
182 | 
183 |         self.nstack    = nstack
184 | 
185 |         curr_dim = dims[0]
186 | 
187 |         self.pre = nn.Sequential(
188 |             convolution(7, 3, 128, stride=2),
189 |             residual(3, 128, 256, stride=2)
190 |         ) if pre is None else pre
191 | 
192 |         self.hg_modules  = nn.ModuleList([
193 |             kp_module(
194 |                 n, dims, modules, trans_conv=trans_conv, layer=kp_layer,
195 |                 make_up_layer=make_up_layer,
196 |                 make_low_layer=make_low_layer,
197 |                 make_hg_layer=make_hg_layer,
198 |                 make_hg_layer_revr=make_hg_layer_revr,
199 |                 make_pool_layer=make_pool_layer,
200 |                 make_unpool_layer=make_unpool_layer,
201 |                 make_merge_layer=make_merge_layer
202 |             ) for _ in range(nstack)#nstack is 2 in the Corner paper
203 |         ])
204 | 
205 |         self.inters = nn.ModuleList([
206 |             make_inter_layer(curr_dim) for _ in range(nstack - 1)
207 |         ])
208 | 
209 |         self.inters_ = nn.ModuleList([
210 |             nn.Sequential(
211 |                 nn.Conv2d(curr_dim, curr_dim, (1, 1), bias=False),
212 |                 nn.BatchNorm2d(curr_dim)
213 |             ) for _ in range(nstack - 1)
214 |         ])
215 | 
216 |         self.cnvs = nn.ModuleList([
217 |             make_cnv_layer(curr_dim, cnv_dim) for _ in range(nstack)
218 |         ])
219 | 
220 |         self.cnvs_   = nn.ModuleList([
221 |             nn.Sequential(
222 |                 nn.Conv2d(cnv_dim, curr_dim, (1, 1), bias=False),
223 |                 nn.BatchNorm2d(curr_dim)
224 |             ) for _ in range(nstack - 1)
225 |         ])
226 | 
227 |         self.relu = nn.ReLU(inplace=True)
228 | 
229 |     def init_weights(self, pretrained=None):
230 |         pass
231 | 
232 |     def forward(self, x):
233 |         inter = self.pre(x)
234 |         layers = zip(
235 |             self.hg_modules, self.cnvs,
236 |         )
237 |         outs = []
238 |         
239 |         #inter = self.hg_modules[0](inter)
240 |         for ind, layer in enumerate(layers):
241 |             hg_, cnv_          = layer[0:2]
242 | 
243 |             hg  = hg_(inter)
244 |             cnv = cnv_(hg)
245 |             outs.append(cnv)
246 |             
247 |             if ind < self.nstack - 1:
248 |                 inter = self.inters_[ind](inter) + self.cnvs_[ind](cnv)
249 |                 inter = self.relu(inter)
250 |                 inter = self.inters[ind](inter)
251 |                 #outs.append(inter)
252 |         #outs.append(cnv)
253 |         
254 |         return outs
255 | 
256 | 
257 | if __name__=='__main__':
258 |     n       = 5
259 |     dims    = [256, 256, 384, 384, 384, 512]
260 |     modules = [2, 2, 2, 2, 2, 4]
261 |     out_dim = 80
262 |     
263 |     model = Hourglass(n=n, nstack=2,dims=dims, modules=modules, out_dim=out_dim ).cuda()
264 |     img = torch.rand(4,3,511, 511).cuda()
265 |     out = model(img)
266 |     
267 |     import pdb
268 |     pdb.set_trace()
269 | 
270 | 


--------------------------------------------------------------------------------
/src/models/detectors/test_mixins.py:
--------------------------------------------------------------------------------
  1 | from mmdet.core import (bbox2roi, bbox_mapping, merge_aug_proposals,
  2 |                         merge_aug_bboxes, merge_aug_masks, multiclass_nms)
  3 | import numpy as np
  4 | import cv2
  5 | import pycocotools.mask as mask_util
  6 | import pdb
  7 | 
  8 | class RPNTestMixin(object):
  9 | 
 10 |     def simple_test_rpn(self, x, img_meta, rpn_test_cfg):
 11 |         rpn_outs = self.rpn_head(x)
 12 |         proposal_inputs = rpn_outs + (img_meta, rpn_test_cfg)
 13 |         proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
 14 |         return proposal_list
 15 | 
 16 |     def aug_test_rpn(self, feats, img_metas, rpn_test_cfg):
 17 |         imgs_per_gpu = len(img_metas[0])
 18 |         aug_proposals = [[] for _ in range(imgs_per_gpu)]
 19 |         for x, img_meta in zip(feats, img_metas):
 20 |             proposal_list = self.simple_test_rpn(x, img_meta, rpn_test_cfg)
 21 |             for i, proposals in enumerate(proposal_list):
 22 |                 aug_proposals[i].append(proposals)
 23 |         # after merging, proposals will be rescaled to the original image size
 24 |         merged_proposals = [
 25 |             merge_aug_proposals(proposals, img_meta, rpn_test_cfg)
 26 |             for proposals, img_meta in zip(aug_proposals, img_metas)
 27 |         ]
 28 |         return merged_proposals
 29 | 
 30 | 
 31 | class BBoxTestMixin(object):
 32 | 
 33 |     def simple_test_bboxes(self,
 34 |                            x,
 35 |                            img_meta,
 36 |                            proposals,
 37 |                            rcnn_test_cfg,
 38 |                            rescale=False):
 39 |         """Test only det bboxes without augmentation."""
 40 |         rois = bbox2roi(proposals)
 41 |         roi_feats = self.bbox_roi_extractor(
 42 |             x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
 43 |         cls_score, bbox_pred = self.bbox_head(roi_feats)
 44 |         img_shape = img_meta[0]['img_shape']
 45 |         scale_factor = img_meta[0]['scale_factor']
 46 |         det_bboxes, det_labels = self.bbox_head.get_det_bboxes(
 47 |             rois,
 48 |             cls_score,
 49 |             bbox_pred,
 50 |             img_shape,
 51 |             scale_factor,
 52 |             rescale=rescale,
 53 |             cfg=rcnn_test_cfg)
 54 |         return det_bboxes, det_labels
 55 | 
 56 |     def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg):
 57 |         aug_bboxes = []
 58 |         aug_scores = []
 59 |         for x, img_meta in zip(feats, img_metas):
 60 |             # only one image in the batch
 61 |             img_shape = img_meta[0]['img_shape']
 62 |             scale_factor = img_meta[0]['scale_factor']
 63 |             flip = img_meta[0]['flip']
 64 |             # TODO more flexible
 65 |             proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
 66 |                                      scale_factor, flip)
 67 |             rois = bbox2roi([proposals])
 68 |             # recompute feature maps to save GPU memory
 69 |             roi_feats = self.bbox_roi_extractor(
 70 |                 x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
 71 |             cls_score, bbox_pred = self.bbox_head(roi_feats)
 72 |             bboxes, scores = self.bbox_head.get_det_bboxes(
 73 |                 rois,
 74 |                 cls_score,
 75 |                 bbox_pred,
 76 |                 img_shape,
 77 |                 scale_factor,
 78 |                 rescale=False,
 79 |                 cfg=None)
 80 |             aug_bboxes.append(bboxes)
 81 |             aug_scores.append(scores)
 82 |         # after merging, bboxes will be rescaled to the original image size
 83 |         merged_bboxes, merged_scores = merge_aug_bboxes(
 84 |             aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
 85 |         det_bboxes, det_labels = multiclass_nms(
 86 |             merged_bboxes, merged_scores, rcnn_test_cfg.score_thr,
 87 |             rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img)
 88 |         return det_bboxes, det_labels
 89 | 
 90 | 
 91 | class MaskTestMixin(object):
 92 | 
 93 |     def simple_test_mask(self,
 94 |                          x,
 95 |                          img_meta,
 96 |                          det_bboxes,
 97 |                          det_labels,
 98 |                          rescale=False):
 99 |         # image shape of the first image in the batch (only one)
100 |         ori_shape = img_meta[0]['ori_shape']
101 |         scale_factor = img_meta[0]['scale_factor']
102 |         if det_bboxes.shape[0] == 0:
103 |             segm_result = [[] for _ in range(self.mask_head.num_classes - 1)]
104 |         else:
105 |             # if det_bboxes is rescaled to the original image size, we need to
106 |             # rescale it back to the testing scale to obtain RoIs.
107 |             _bboxes = (det_bboxes[:, :4] * scale_factor
108 |                        if rescale else det_bboxes)
109 |             mask_rois = bbox2roi([_bboxes])
110 |             mask_feats = self.mask_roi_extractor(
111 |                 x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois)
112 |             mask_pred = self.mask_head(mask_feats)
113 |             segm_result = self.mask_head.get_seg_masks(
114 |                 mask_pred, _bboxes, det_labels, self.test_cfg.rcnn, ori_shape,
115 |                 scale_factor, rescale)
116 |         return segm_result
117 | 
118 |     def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels):
119 |         if det_bboxes.shape[0] == 0:
120 |             segm_result = [[] for _ in range(self.mask_head.num_classes - 1)]
121 |         else:
122 |             aug_masks = []
123 |             for x, img_meta in zip(feats, img_metas):
124 |                 img_shape = img_meta[0]['img_shape']
125 |                 scale_factor = img_meta[0]['scale_factor']
126 |                 flip = img_meta[0]['flip']
127 |                 _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
128 |                                        scale_factor, flip)
129 |                 mask_rois = bbox2roi([_bboxes])
130 |                 mask_feats = self.mask_roi_extractor(
131 |                     x[:len(self.mask_roi_extractor.featmap_strides)],
132 |                     mask_rois)
133 |                 mask_pred = self.mask_head(mask_feats)
134 |                 # convert to numpy array to save memory
135 |                 aug_masks.append(mask_pred.sigmoid().cpu().numpy())
136 |             merged_masks = merge_aug_masks(aug_masks, img_metas,
137 |                                            self.test_cfg.rcnn)
138 | 
139 |             ori_shape = img_metas[0][0]['ori_shape']
140 |             segm_result = self.mask_head.get_seg_masks(
141 |                 merged_masks,
142 |                 det_bboxes,
143 |                 det_labels,
144 |                 self.test_cfg.rcnn,
145 |                 ori_shape,
146 |                 scale_factor=1.0,
147 |                 rescale=False)
148 |         return segm_result
149 | 
150 | 
151 | class MaskTestMixin_kpt(object):
152 | 
153 |     def simple_test_mask(self,
154 |                          score_map,
155 |                          corner_offsets,
156 |                          img_meta,
157 |                          det_bboxes,
158 |                          rescale=False):
159 |         '''
160 |         :param semantic_map: semantic map  hxwx80
161 |         :param img_meta:
162 |         :param det_bboxes:
163 |         :param rescale:
164 |         :return:
165 |         '''
166 |         # TODO: solve hardcode
167 |         semantic_map = (score_map>0.4).astype('int')
168 |         h, w, _ = semantic_map.shape
169 |         instance_map = -np.ones_like(semantic_map)
170 |         border_y, border_x = -img_meta['offset']
171 |         ori_h, ori_w, _ = img_meta['ori_shape']
172 |         _, img_h, img_w = img_meta['img_shape']
173 | 
174 |         for label, bboxes in enumerate(det_bboxes):
175 |             #keepinds = (bboxes[...,-1]>0.4)
176 |             #bboxes = bboxes[keepinds]
177 |             if (len(bboxes)==0) or (semantic_map[...,label].sum()==0):
178 |                 continue
179 |             centers = np.array(bboxes)[...,:4]
180 |             centers[..., 0::2] += border_x
181 |             centers[..., 1::2] += border_y
182 |             pixels = semantic_map[..., label]
183 | 
184 |             #pdb.set_trace()
185 |             if len(bboxes) == 1:
186 |                 instance_map[..., label] = pixels - 1
187 |             else:
188 |                 for y in range(h):
189 |                     for x in range(w):
190 |                         if pixels[y, x] == 0:
191 |                             continue
192 |                         tl_x = 4 * (x + corner_offsets[label, y, x]) - 1
193 |                         tl_y = 4 * (y + corner_offsets[label + 80, y, x]) - 1
194 |                         br_x = 4 * (x + corner_offsets[label + 160, y, x]) - 1
195 |                         br_y = 4 * (y + corner_offsets[label + 240, y, x]) - 1
196 |                         #pdb.set_trace()
197 |                         instance_map[y, x, label] = KNN_cluster(centers, np.array([tl_x, tl_y, br_x, br_y]))
198 | 
199 |         #seg_maps = []
200 |         cls_segms = [[] for _ in range(80)]
201 | 
202 |         for label in range(80):
203 |             map_with_id = instance_map[..., label]
204 |             if map_with_id.max() == -1:
205 |                 continue
206 | 
207 |             for ins_id in range(map_with_id.max()+1):
208 |                 seg_map = (map_with_id == ins_id).astype('float32')
209 |                 seg_map *= score_map[...,label]
210 |                 seg_map = cv2.resize(seg_map, (img_w, img_h))
211 |                 seg_map = (seg_map>0.4).astype('int')
212 |                 #seg_map = seg_map[border_y:border_y + ori_h, border_x:border_x + ori_w]
213 |                 if seg_map.sum()==0:
214 |                     continue
215 |                 seg_map = np.uint8(seg_map)
216 |                 
217 |                 rle = mask_util.encode(np.array(seg_map[:, :, np.newaxis], order='F'))[0]
218 |                 #rle['counts'].decode()
219 |                 #cls_segms[label].append(rle)
220 |                 cls_segms[label].append(seg_map)
221 |         #pdb.set_trace()
222 |         return cls_segms
223 | 
224 | 
225 | 
226 | def KNN_cluster(centers, x):
227 |     '''
228 |     :param centers: Nxd
229 |     :param x: d
230 |     :return: cluster id
231 |     '''
232 |     return ((x - centers) ** 2).sum(1).argmin()
233 | 


--------------------------------------------------------------------------------
/src/datasets/transforms.py:
--------------------------------------------------------------------------------
  1 | import mmcv
  2 | import numpy as np
  3 | import torch
  4 | import cv2
  5 | 
  6 | __all__ = ['MaskTransform_cornernet', 'ImageTransform', 'ImageTransform_cornernet', 'BboxTransform', 'BboxTransform_cornernet', 'MaskTransform', 'Numpy2Tensor']
  7 | 
  8 | 
  9 | class ImageTransform(object):
 10 |       """
 11 |       Preprocess an image.
 12 |       1. rescale the image to expected size
 13 |       2. normalize the image
 14 |       3. flip the image (if needed)
 15 |       4. pad the image (if needed)
 16 |       5. transpose to (c, h, w)
 17 |       """
 18 | 
 19 |       def __init__(self,
 20 |                    mean=(0, 0, 0),
 21 |                    std=(1, 1, 1),
 22 |                    pixel_scale=1,
 23 |                    to_rgb=True,
 24 |                    size_divisor=None):
 25 |           self.mean = np.array(mean, dtype=np.float32)
 26 |           self.std = np.array(std, dtype=np.float32)
 27 |           self.pixel_scale = pixel_scale
 28 |           self.to_rgb = to_rgb
 29 |           self.size_divisor = size_divisor
 30 | 
 31 |       def __call__(self, img, scale, flip=False, keep_ratio=True, crop=False):
 32 |           if keep_ratio:
 33 |               img, scale_factor = mmcv.imrescale(img, scale, return_scale=True)
 34 |           else:
 35 |               img, w_scale, h_scale = mmcv.imresize(img, scale, return_scale=True)
 36 |               scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32)
 37 |           img_shape = img.shape
 38 |           img = img * float(self.pixel_scale)
 39 |           img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb)
 40 |           if flip:
 41 |               img = mmcv.imflip(img)
 42 |           if self.size_divisor is not None:
 43 |               img = mmcv.impad_to_multiple(img, self.size_divisor)
 44 |               pad_shape = img.shape
 45 |           else:
 46 |               pad_shape = img_shape
 47 |           img = img.transpose(2, 0, 1)
 48 |           return img, img_shape, pad_shape, scale_factor
 49 | 
 50 | class ImageTransform_cornernet(object):
 51 |     """Preprocess an image.
 52 | 
 53 |     1. rescale the image to expected size
 54 |     2. normalize the image
 55 |     3. flip the image (if needed)
 56 |     4. pad the image (if needed)
 57 |     5. transpose to (c, h, w)
 58 |     """
 59 | 
 60 |     def __init__(self,
 61 |                  mean=(0, 0, 0),
 62 |                  std=(1, 1, 1),
 63 |                  pixel_scale=1,
 64 |                  to_rgb=True,
 65 |                  size_divisor=None):
 66 |         self.mean = np.array(mean, dtype=np.float32)
 67 |         self.std = np.array(std, dtype=np.float32)
 68 |         self.pixel_scale = pixel_scale
 69 |         self.to_rgb = to_rgb
 70 |         self.size_divisor = size_divisor
 71 | 
 72 |     def __call__(self, img, scale, flip=False, keep_ratio=True, crop=False):
 73 |         if crop:
 74 |             h, w, c = img.shape
 75 | 
 76 |             nh = int(h*scale)
 77 |             nw = int(w*scale)
 78 |             img = mmcv.imresize(img, (nw, nh))
 79 |             h, w, c = img.shape
 80 | 
 81 |             inp_h = h | 127
 82 |             inp_w = w | 127
 83 |             center = np.array([h // 2, w // 2])
 84 |             if flip:
 85 |                 img = mmcv.imflip(img)
 86 |             img, border, offset = crop_image(img, center, [inp_h, inp_w])
 87 |             img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb)
 88 |             img = img.transpose(2, 0, 1)
 89 |             
 90 |             return img, border, offset
 91 | 
 92 |         '''if keep_ratio:
 93 |             img, scale_factor = mmcv.imrescale(img, scale, return_scale=True)
 94 |         else:
 95 |             img, w_scale, h_scale = mmcv.imresize(
 96 |                 img, scale, return_scale=True)
 97 |             scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
 98 |                                     dtype=np.float32)
 99 |         img_shape = img.shape'''
100 |         #img = mmcv.imnormalize(img, np.array((0, 0, 0), dtype=np.float32), np.array((1.0/float(self.pixel_scale), 1.0/float(self.pixel_scale), 1.0/float(self.pixel_scale)), dtype=np.float32), False)
101 |         #img = img * float(self.pixel_scale)
102 |         h, w, _ = img.shape
103 |         img = mmcv.imresize(img,(511,511))
104 |         ratio = 511.0/float(h)
105 |         img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb)
106 |         if flip:
107 |             img = mmcv.imflip(img)
108 |         '''if self.size_divisor is not None:
109 |             img = mmcv.impad_to_multiple(img, self.size_divisor)
110 |             pad_shape = img.shape
111 |         else:
112 |             pad_shape = img_shape'''
113 |         img = img.transpose(2, 0, 1)
114 |         #return img, (511, 511, 3), ratio#, pad_shape, scale_factor
115 |         return img, (511, 511, 3), None, ratio
116 | 
117 | def crop_image(image, center, size):
118 |     cty, ctx            = center
119 |     height, width       = size
120 |     im_height, im_width = image.shape[0:2]
121 |     cropped_image       = np.zeros((height, width, 3), dtype=np.float32)
122 |     cropped_image[:, :, 0] += 103.53
123 |     cropped_image[:, :, 1] += 116.28
124 |     cropped_image[:, :, 2] += 123.68
125 | 
126 |     x0, x1 = max(0, ctx - width // 2), min(ctx + width // 2, im_width)
127 |     y0, y1 = max(0, cty - height // 2), min(cty + height // 2, im_height)
128 | 
129 |     left, right = ctx - x0, x1 - ctx
130 |     top, bottom = cty - y0, y1 - cty
131 | 
132 |     cropped_cty, cropped_ctx = height // 2, width // 2
133 |     y_slice = slice(cropped_cty - top, cropped_cty + bottom)
134 |     x_slice = slice(cropped_ctx - left, cropped_ctx + right)
135 |     cropped_image[y_slice, x_slice, :] = image[y0:y1, x0:x1, :]
136 | 
137 |     border = np.array([
138 |        cropped_cty - top,
139 |        cropped_cty + bottom,
140 |        cropped_ctx - left,
141 |        cropped_ctx + right
142 |     ], dtype=np.float32)
143 | 
144 |     offset = np.array([
145 |         cty - height // 2,
146 |         ctx - width  // 2
147 |     ])
148 | 
149 |     return cropped_image, border, offset
150 | 
151 | 
152 | def bbox_flip(bboxes, img_shape):
153 |     """Flip bboxes horizontally.
154 | 
155 |     Args:
156 |         bboxes(ndarray): shape (..., 4*k)
157 |         img_shape(tuple): (height, width)
158 |     """
159 |     assert bboxes.shape[-1] % 4 == 0
160 |     w = img_shape[1]
161 |     flipped = bboxes.copy()
162 |     flipped[..., 0::4] = w - bboxes[..., 2::4] - 1
163 |     flipped[..., 2::4] = w - bboxes[..., 0::4] - 1
164 |     return flipped
165 | 
166 | 
167 | class BboxTransform_cornernet(object):
168 |     """Preprocess gt bboxes.
169 | 
170 |     1. rescale bboxes according to image size
171 |     2. flip bboxes (if needed)
172 |     3. pad the first dimension to `max_num_gts`
173 |     """
174 | 
175 |     def __init__(self, max_num_gts=None):
176 |         self.max_num_gts = max_num_gts
177 | 
178 |     def __call__(self, bboxes, img_shape, scale_factor, flip=False):
179 |         bboxes = np.array(bboxes)
180 |         gt_bboxes = bboxes * scale_factor
181 |         if flip:
182 |             gt_bboxes = bbox_flip(gt_bboxes, img_shape)
183 |         return gt_bboxes
184 |         '''if len(gt_bboxes)>0:
185 |         #try:
186 |             gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1])
187 |         #except IndexError:
188 |         #    raise AssertionError(gt_bboxes)
189 |             gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0])
190 |         if self.max_num_gts is None:
191 |             return gt_bboxes
192 |         else:
193 |             num_gts = gt_bboxes.shape[0]
194 |             padded_bboxes = np.zeros((self.max_num_gts, 4), dtype=np.float32)
195 |             padded_bboxes[:num_gts, :] = gt_bboxes
196 |             return padded_bboxes'''
197 | 
198 | 
199 | class BboxTransform(object):
200 |     """Preprocess gt bboxes.
201 | 
202 |     1. rescale bboxes according to image size
203 |     2. flip bboxes (if needed)
204 |     3. pad the first dimension to `max_num_gts`
205 |     """
206 | 
207 |     def __init__(self, max_num_gts=None):
208 |         self.max_num_gts = max_num_gts
209 | 
210 |     def __call__(self, bboxes, img_shape, scale_factor, flip=False):
211 |         bboxes = np.array(bboxes)
212 |         gt_bboxes = bboxes * scale_factor
213 |         if flip:
214 |             gt_bboxes = bbox_flip(gt_bboxes, img_shape)
215 |         if len(gt_bboxes)>0:
216 |         #try:
217 |             gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1])
218 |         #except IndexError:
219 |         #    raise AssertionError(gt_bboxes)
220 |             gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0])
221 |         if self.max_num_gts is None:
222 |             return gt_bboxes
223 |         else:
224 |             num_gts = gt_bboxes.shape[0]
225 |             padded_bboxes = np.zeros((self.max_num_gts, 4), dtype=np.float32)
226 |             padded_bboxes[:num_gts, :] = gt_bboxes
227 |             return padded_bboxes
228 | 
229 | 
230 | class MaskTransform(object):
231 |     """Preprocess masks.
232 | 
233 |     1. resize masks to expected size and stack to a single array
234 |     2. flip the masks (if needed)
235 |     3. pad the masks (if needed)
236 |     """
237 | 
238 |     def __call__(self, masks, pad_shape, scale_factor, flip=False):
239 |         masks = [
240 |             mmcv.imrescale(mask, scale_factor, interpolation='nearest')
241 |             for mask in masks
242 |         ]
243 |         if flip:
244 |             masks = [mask[:, ::-1] for mask in masks]
245 |         padded_masks = [
246 |             mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks
247 |         ]
248 |         padded_masks = np.stack(padded_masks, axis=0)
249 |         return padded_masks
250 | 
251 | class MaskTransform_cornernet(object):
252 |     def __call__(self, masks, new_scale, flip=False):
253 |         masks = [mmcv.imrescale(mask, new_scale, interpolation='nearest')
254 |                  for mask in masks]
255 |         #print(masks[0].shape)
256 |         if flip:
257 |             masks = [mask[:, ::-1] for mask in masks]
258 | 
259 |         return masks
260 | 
261 | class Numpy2Tensor(object):
262 | 
263 |     def __init__(self):
264 |         pass
265 | 
266 |     def __call__(self, *args):
267 |         if len(args) == 1:
268 |             return torch.from_numpy(args[0])
269 |         else:
270 |             return tuple([torch.from_numpy(np.array(array)) for array in args])
271 | 


--------------------------------------------------------------------------------
/src/core/corner/kp_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | import pdb
  5 | import cv2
  6 | from mmcv.runner import get_dist_info 
  7 | import mmcv
  8 | 
  9 | def _gather_feat(feat, ind, mask=None): 
 10 |     dim = feat.size(2)
 11 |     ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
 12 |     feat = feat.gather(1, ind)
 13 |     if mask is not None:
 14 |         mask = mask.unsqueeze(2).expand_as(feat)
 15 |         feat = feat[mask]
 16 |         feat = feat.view(-1, dim)
 17 |     return feat
 18 | 
 19 | 
 20 | def _nms(heat, kernel=1):  # kernel size is 3 in the paper
 21 |     pad = (kernel - 1) // 2
 22 | 
 23 |     hmax = nn.functional.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad)
 24 |     keep = (hmax == heat).float()
 25 |     return heat * keep
 26 | 
 27 | 
 28 | def _tranpose_and_gather_feat(feat, ind):
 29 |     feat = feat.permute(0, 2, 3, 1).contiguous()
 30 |     feat = feat.view(feat.size(0), -1, feat.size(3))  # why flatten the feature maps?
 31 |     feat = _gather_feat(feat, ind)
 32 |     return feat
 33 | 
 34 | 
 35 | def _topk(scores, K=20):
 36 |     batch, cat, height, width = scores.size()  # cat is the num of categories
 37 | 
 38 |     topk_scores, topk_inds = torch.topk(scores.view(batch, -1), K)
 39 | 
 40 |     topk_clses = (topk_inds / (height * width)).int()
 41 | 
 42 |     topk_inds = topk_inds % (height * width)
 43 |     topk_ys = (topk_inds / width).int().float()
 44 |     topk_xs = (topk_inds % width).int().float()
 45 |     return topk_scores, topk_inds, topk_clses, topk_ys, topk_xs
 46 | 
 47 | def _decode_center(
 48 |         tl_heat, br_heat, tl_off_c, br_off_c, tl_regr, br_regr, img_meta,
 49 |         scale_factor=None, rescale=False, obj=False, direct=False, 
 50 |         linear_factor=8.0, K=100, kernel=3, ae_threshold=0.05, num_dets=1000
 51 | ):
 52 |     batch, cat, height, width = tl_heat.size()
 53 |     _, inp_h, inp_w = img_meta['img_shape']
 54 | 
 55 |     if not obj:
 56 |         tl_heat = torch.sigmoid(tl_heat)
 57 |         br_heat = torch.sigmoid(br_heat)
 58 | 
 59 |     # perform nms on heatmaps
 60 |     tl_heat = _nms(tl_heat, kernel=kernel)
 61 |     br_heat = _nms(br_heat, kernel=kernel)
 62 | 
 63 |     if direct:
 64 |         tl_off_c *= linear_factor
 65 |         br_off_c *= linear_factor
 66 |     else:
 67 |         tl_off_c = torch.exp(tl_off_c)
 68 |         br_off_c = torch.exp(br_off_c)
 69 | 
 70 |     tl_scores, tl_inds, tl_clses, tl_ys, tl_xs = _topk(tl_heat, K=K)
 71 |     br_scores, br_inds, br_clses, br_ys, br_xs = _topk(br_heat, K=K)
 72 |     tl_ys1 = tl_ys.view(batch, K, 1)
 73 |     tl_xs1 = tl_xs.view(batch, K, 1)
 74 |     br_ys1 = br_ys.view(batch, 1, K)
 75 |     br_xs1 = br_xs.view(batch, 1, K)
 76 | 
 77 |     tl_ys = tl_ys1.expand(batch, K, K)  # expand for combine all possible boxes
 78 |     tl_xs = tl_xs1.expand(batch, K, K)
 79 |     br_ys = br_ys1.expand(batch, K, K)
 80 |     br_xs = br_xs1.expand(batch, K, K)
 81 | 
 82 |     if tl_regr is not None and br_regr is not None:
 83 |         tl_off_c = _tranpose_and_gather_feat(tl_off_c, tl_inds)
 84 |         br_off_c = _tranpose_and_gather_feat(br_off_c, br_inds)
 85 |         tl_off_c = tl_off_c.view(batch, K, 1, 2)
 86 |         br_off_c = br_off_c.view(batch, 1, K, 2)
 87 |     
 88 |         tl_regr = _tranpose_and_gather_feat(tl_regr, tl_inds) 
 89 |         tl_regr = tl_regr.view(batch, K, 1, 2)
 90 |         br_regr = _tranpose_and_gather_feat(br_regr, br_inds)
 91 |         br_regr = br_regr.view(batch, 1, K, 2)
 92 | 
 93 |         tl_cxs = tl_xs + tl_off_c[..., 0] + tl_regr[..., 0]
 94 |         tl_cys = tl_ys + tl_off_c[..., 1] + tl_regr[..., 1]
 95 |         br_cxs = br_xs - br_off_c[..., 0] + br_regr[..., 0]
 96 |         br_cys = br_ys - br_off_c[..., 1] + br_regr[..., 1]
 97 |     
 98 |         tl_xs = tl_xs + tl_regr[..., 0]
 99 |         tl_ys = tl_ys + tl_regr[..., 1]
100 |         br_xs = br_xs + br_regr[..., 0]
101 |         br_ys = br_ys + br_regr[..., 1]
102 | 
103 | 
104 |     # all possible boxes based on top k corners (ignoring class)
105 |     tl_xs *= (inp_w / width)
106 |     tl_ys *= (inp_h / height)
107 |     br_xs *= (inp_w / width)
108 |     br_ys *= (inp_h / height)
109 | 
110 |     tl_cxs *= (inp_w / width)
111 |     tl_cys *= (inp_h / height)
112 |     br_cxs *= (inp_w / width)
113 |     br_cys *= (inp_h / height)
114 | 
115 |     x_off = img_meta['border'][2]
116 |     y_off = img_meta['border'][0]
117 | 
118 |     tl_xs -= torch.Tensor([x_off]).type_as(tl_xs)
119 |     tl_ys -= torch.Tensor([y_off]).type_as(tl_ys)
120 |     br_xs -= torch.Tensor([x_off]).type_as(br_xs)
121 |     br_ys -= torch.Tensor([y_off]).type_as(br_ys)
122 | 
123 |     tl_xs *= tl_xs.gt(0.0).type_as(tl_xs)
124 |     tl_ys *= tl_ys.gt(0.0).type_as(tl_ys)
125 |     br_xs *= br_xs.gt(0.0).type_as(br_xs)
126 |     br_ys *= br_ys.gt(0.0).type_as(br_ys)
127 |     
128 |     tl_cxs -= torch.Tensor([x_off]).type_as(tl_cxs)
129 |     tl_cys -= torch.Tensor([y_off]).type_as(tl_cys)
130 |     br_cxs -= torch.Tensor([x_off]).type_as(br_cxs)
131 |     br_cys -= torch.Tensor([y_off]).type_as(br_cys)
132 | 
133 |     tl_cxs *= tl_cxs.gt(0.0).type_as(tl_cxs)
134 |     tl_cys *= tl_cys.gt(0.0).type_as(tl_cys)
135 |     br_cxs *= br_cxs.gt(0.0).type_as(br_cxs)
136 |     br_cys *= br_cys.gt(0.0).type_as(br_cys)
137 | 
138 |     bboxes = torch.stack((tl_xs, tl_ys, br_xs, br_ys), dim=3)
139 | 
140 |     group_bboxes = torch.stack((tl_xs, tl_ys, br_xs, br_ys), dim=3)
141 |     centers      = torch.stack((tl_cxs, tl_cys, br_cxs, br_cys), dim=3)
142 |     cre          = torch.zeros_like(centers)
143 |     area_bbox   = torch.abs(br_xs  - tl_xs )*torch.abs(tl_ys  - br_ys ) + 1e-16
144 | 
145 |     ns = torch.ones_like(area_bbox)*2.1#.6
146 |     l_idxs = area_bbox>3500#22500
147 |     ns[l_idxs]=2.4
148 |     
149 |     cre[...,0] = ((ns+1)*group_bboxes[...,0] + (ns-1)*group_bboxes[...,2])/(2*ns)
150 |     cre[...,1] = ((ns+1)*group_bboxes[...,1] + (ns-1)*group_bboxes[...,3])/(2*ns)
151 |     cre[...,2] = ((ns-1)*group_bboxes[...,0] + (ns+1)*group_bboxes[...,2])/(2*ns)
152 |     cre[...,3] = ((ns-1)*group_bboxes[...,1] + (ns+1)*group_bboxes[...,3])/(2*ns)
153 |     
154 |     area_center = torch.abs(br_cxs - tl_cxs)*torch.abs(tl_cys - br_cys)
155 |     #area_bbox   = torch.abs(br_xs  - tl_xs )*torch.abs(tl_ys  - br_ys ) + 1e-16
156 |     area_cre = torch.abs(cre[...,0] - cre[...,2])*torch.abs(cre[...,1] - cre[...,3])
157 |     dists = area_center/area_cre#area_bbox
158 |     
159 |     tl_cx_inds = ((centers[...,0]<=cre[...,0]) | (centers[...,0]>=cre[...,2]))#.unsqueeze(0)
160 |     tl_cy_inds = ((centers[...,1]<=cre[...,1]) | (centers[...,1]>=cre[...,3]))#.unsqueeze(0)
161 |     br_cx_inds = ((centers[...,2]<=cre[...,0]) | (centers[...,2]>=cre[...,2]))#.unsqueeze(0)
162 |     br_cy_inds = ((centers[...,3]<=cre[...,1]) | (centers[...,3]>=cre[...,3]))#.unsqueeze(0)
163 | 
164 |     ctr_inds = (tl_cx_inds | tl_cy_inds) & (br_cx_inds | br_cy_inds)
165 | 
166 |     tl_scores = tl_scores.view(batch, K, 1).expand(batch, K, K)
167 |     br_scores = br_scores.view(batch, 1, K).expand(batch, K, K)
168 |     scores = (tl_scores + br_scores) / 2  # scores for all possible boxes
169 | 
170 |     # reject boxes based on classes
171 |     tl_clses = tl_clses.view(batch, K, 1).expand(batch, K, K)
172 |     br_clses = br_clses.view(batch, 1, K).expand(batch, K, K)
173 |     cls_inds = (tl_clses != br_clses)  # tl and br should have the same class
174 | 
175 |     # reject boxes based on distances
176 |     dist_inds = (dists > ae_threshold)
177 | 
178 |     # reject boxes based on widths and heights
179 |     # tl should be upper and lefter than br
180 |     width_inds = (br_xs < tl_xs)
181 |     height_inds = (br_ys < tl_ys)
182 | 
183 |     scores[cls_inds] = -1
184 |     scores[width_inds] = -1
185 |     scores[height_inds] = -1
186 |     scores[tl_cx_inds] = -1
187 |     scores[tl_cy_inds] = -1
188 |     scores[br_cx_inds] = -1
189 |     scores[br_cy_inds] = -1
190 | 
191 |     scores = scores.view(batch, -1)
192 |     scores, inds = torch.topk(scores, num_dets)
193 |     scores = scores.unsqueeze(2)
194 | 
195 |     bboxes = bboxes.view(batch, -1, 4)
196 |     bboxes = _gather_feat(bboxes, inds)
197 | 
198 |     clses = tl_clses.contiguous().view(batch, -1, 1)
199 |     clses = _gather_feat(clses, inds).float()
200 | 
201 |     tl_scores = tl_scores.contiguous().view(batch, -1, 1)
202 |     tl_scores = _gather_feat(tl_scores, inds).float()
203 |     br_scores = br_scores.contiguous().view(batch, -1, 1)
204 |     br_scores = _gather_feat(br_scores, inds).float()
205 | 
206 |     return bboxes, scores, clses
207 | 
208 | 
209 | def _neg_loss(preds, gt):
210 |     pos_inds = gt.eq(1)
211 |     neg_inds = gt.lt(1)
212 |     #
213 |     neg_weights = torch.pow(1 - gt[neg_inds], 4)
214 |     #
215 |     loss = 0
216 |     for pred in preds:
217 |         pos_pred = pred[pos_inds]
218 |         neg_pred = pred[neg_inds]
219 |         #
220 |         pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2)
221 |         neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights
222 |         #
223 |         num_pos = pos_inds.float().sum()
224 |         pos_loss = pos_loss.sum()
225 |         neg_loss = neg_loss.sum()
226 |         #
227 |         # avoid the error when num_pos is zero
228 |         if pos_pred.nelement() == 0:
229 |             loss = loss - neg_loss
230 |         else:
231 |             loss = loss - (pos_loss + neg_loss) / num_pos
232 |     return loss
233 | 
234 | 
235 | def _sigmoid(x):
236 |     x = torch.clamp(x.sigmoid_(), min=1e-4, max=1 - 1e-4)
237 |     return x
238 | 
239 | 
240 | def _ae_loss(tag0, tag1, mask):  # mask means only consider the loss of positive corner
241 |     num = mask.sum(dim=1, keepdim=True).float()
242 |     tag0 = tag0.squeeze()
243 |     tag1 = tag1.squeeze()
244 |     #
245 |     tag_mean = (tag0 + tag1) / 2
246 |     #
247 |     tag0 = torch.pow(tag0 - tag_mean, 2) / (num + 1e-4)
248 |     tag0 = tag0[mask].sum()
249 |     tag1 = torch.pow(tag1 - tag_mean, 2) / (num + 1e-4)
250 |     tag1 = tag1[mask].sum()
251 |     pull = tag0 + tag1  # this is pull loss, smaller means tag0 and tag1 are more similiar
252 |     #
253 |     mask = mask.unsqueeze(1) + mask.unsqueeze(2)
254 |     mask = mask.eq(2)
255 |     num = num.unsqueeze(2)
256 |     num2 = (num - 1) * num
257 |     dist = tag_mean.unsqueeze(1) - tag_mean.unsqueeze(2)
258 |     dist = 1 - torch.abs(dist)
259 |     dist = nn.functional.relu(dist, inplace=True)
260 |     dist = dist - 1 / (num + 1e-4)
261 |     dist = dist / (num2 + 1e-4)
262 |     dist = dist[mask]
263 |     push = dist.sum()
264 |     return pull, push
265 | 
266 | 
267 | def _regr_loss(regr, gt_regr, mask):  # regression loss
268 |     num = mask.float().sum()
269 |     mask = mask.unsqueeze(2).expand_as(gt_regr)
270 | 
271 |     regr = regr[mask]
272 |     gt_regr = gt_regr[mask]
273 | 
274 |     regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)
275 |     regr_loss = regr_loss / (num + 1e-4)
276 |     return regr_loss
277 | 
278 | 
279 | def gaussian2D(shape, sigma=1):
280 |     m, n = [(ss - 1.) / 2. for ss in shape]
281 |     y, x = np.ogrid[-m:m+1,-n:n+1]
282 |     #
283 |     h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
284 | 
285 |     h[h < np.finfo(h.dtype).eps * h.max()] = 0
286 |     return h
287 | 
288 | def draw_gaussian(heatmap, center, radius, k=1):
289 |     diameter = 2 * radius + 1
290 |     gaussian  = gaussian2D((diameter, diameter), sigma=diameter / 6)
291 |     x, y = center
292 |     #
293 |     height, width = heatmap.shape[0:2]
294 |     #process the border
295 |     left, right = min(x, radius), min(width - x, radius + 1)
296 |     top, bottom = min(y, radius), min(height - y, radius + 1)
297 |     #
298 |     masked_heatmap  = heatmap[y - top:y + bottom, x - left:x + right]
299 |     masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right]
300 |     np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
301 | 
302 | def gaussian_radius(det_size, min_overlap):
303 |     height, width = det_size
304 | 
305 |     a1  = 1
306 |     b1  = (height + width)
307 |     c1  = width * height * (1 - min_overlap) / (1 + min_overlap)
308 |     sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1)
309 |     r1  = (b1 - sq1) / (2 * a1)
310 | 
311 |     a2  = 4
312 |     b2  = 2 * (height + width)
313 |     c2  = (1 - min_overlap) * width * height
314 |     sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2)
315 |     r2  = (b2 - sq2) / (2 * a2)
316 | 
317 |     a3  = 4 * min_overlap
318 |     b3  = -2 * min_overlap * (height + width)
319 |     c3  = (min_overlap - 1) * width * height
320 |     sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3)
321 |     r3  = (b3 + sq3) / (2 * a3)
322 |     return min(r1, r2, r3)
323 | 
324 | 


--------------------------------------------------------------------------------
/src/datasets/extra_aug.py:
--------------------------------------------------------------------------------
  1 | import mmcv
  2 | import numpy as np
  3 | from numpy import random
  4 | import pdb
  5 | 
  6 | from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps
  7 | 
  8 | 
  9 | class PhotoMetricDistortion(object):
 10 | 
 11 |     def __init__(self,
 12 |                  brightness_delta=32,
 13 |                  contrast_range=(0.5, 1.5),
 14 |                  saturation_range=(0.5, 1.5),
 15 |                  hue_delta=18):
 16 |         self.brightness_delta = brightness_delta
 17 |         self.contrast_lower, self.contrast_upper = contrast_range
 18 |         self.saturation_lower, self.saturation_upper = saturation_range
 19 |         self.hue_delta = hue_delta
 20 | 
 21 |     def __call__(self, img, boxes, labels):
 22 |         # random brightness
 23 |         if random.randint(2):
 24 |             delta = random.uniform(-self.brightness_delta,
 25 |                                    self.brightness_delta)
 26 |             img += delta
 27 | 
 28 |         # mode == 0 --> do random contrast first
 29 |         # mode == 1 --> do random contrast last
 30 |         mode = random.randint(2)
 31 |         if mode == 1:
 32 |             if random.randint(2):
 33 |                 alpha = random.uniform(self.contrast_lower,
 34 |                                        self.contrast_upper)
 35 |                 img *= alpha
 36 | 
 37 |         # convert color from BGR to HSV
 38 |         img = mmcv.bgr2hsv(img)
 39 | 
 40 |         # random saturation
 41 |         if random.randint(2):
 42 |             img[..., 1] *= random.uniform(self.saturation_lower,
 43 |                                           self.saturation_upper)
 44 | 
 45 |         # random hue
 46 |         if random.randint(2):
 47 |             img[..., 0] += random.uniform(-self.hue_delta, self.hue_delta)
 48 |             img[..., 0][img[..., 0] > 360] -= 360
 49 |             img[..., 0][img[..., 0] < 0] += 360
 50 | 
 51 |         # convert color from HSV to BGR
 52 |         img = mmcv.hsv2bgr(img)
 53 | 
 54 |         # random contrast
 55 |         if mode == 0:
 56 |             if random.randint(2):
 57 |                 alpha = random.uniform(self.contrast_lower,
 58 |                                        self.contrast_upper)
 59 |                 img *= alpha
 60 | 
 61 |         # randomly swap channels
 62 |         if random.randint(2):
 63 |             img = img[..., random.permutation(3)]
 64 | 
 65 |         return img, boxes, labels
 66 | 
 67 | 
 68 | class Expand(object):
 69 | 
 70 |     def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)):
 71 |         if to_rgb:
 72 |             self.mean = mean[::-1]
 73 |         else:
 74 |             self.mean = mean
 75 |         self.min_ratio, self.max_ratio = ratio_range
 76 | 
 77 |     def __call__(self, img, boxes, labels):
 78 |         if random.randint(2):
 79 |             return img, boxes, labels
 80 | 
 81 |         h, w, c = img.shape
 82 |         ratio = random.uniform(self.min_ratio, self.max_ratio)
 83 |         expand_img = np.full((int(h * ratio), int(w * ratio), c),
 84 |                              self.mean).astype(img.dtype)
 85 |         left = int(random.uniform(0, w * ratio - w))
 86 |         top = int(random.uniform(0, h * ratio - h))
 87 |         expand_img[top:top + h, left:left + w] = img
 88 |         img = expand_img
 89 |         boxes += np.tile((left, top), 2)
 90 |         return img, boxes, labels
 91 | 
 92 | 
 93 | class RandomCrop(object):
 94 | 
 95 |     def __init__(self,
 96 |                  min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
 97 |                  min_crop_size=0.3):
 98 |         # 1: return ori img
 99 |         self.sample_mode = (1, *min_ious, 0)
100 |         self.min_crop_size = min_crop_size
101 | 
102 |     def __call__(self, img, boxes, labels):
103 |         h, w, c = img.shape
104 |         while True:
105 |             mode = random.choice(self.sample_mode)
106 |             if mode == 1:
107 |                 return img, boxes, labels
108 | 
109 |             min_iou = mode
110 |             for i in range(50):
111 |                 new_w = random.uniform(self.min_crop_size * w, w)
112 |                 new_h = random.uniform(self.min_crop_size * h, h)
113 | 
114 |                 # h / w in [0.5, 2]
115 |                 if new_h / new_w < 0.5 or new_h / new_w > 2:
116 |                     continue
117 | 
118 |                 left = random.uniform(w - new_w)
119 |                 top = random.uniform(h - new_h)
120 | 
121 |                 patch = np.array((int(left), int(top), int(left + new_w),
122 |                                   int(top + new_h)))
123 |                 overlaps = bbox_overlaps(
124 |                     patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1)
125 |                 if overlaps.min() < min_iou:
126 |                     continue
127 | 
128 |                 # center of boxes should inside the crop img
129 |                 center = (boxes[:, :2] + boxes[:, 2:]) / 2
130 |                 mask = (center[:, 0] > patch[0]) * (
131 |                     center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * (
132 |                         center[:, 1] < patch[3])
133 |                 if not mask.any():
134 |                     continue
135 |                 boxes = boxes[mask]
136 |                 labels = labels[mask]
137 | 
138 |                 # adjust boxes
139 |                 img = img[patch[1]:patch[3], patch[0]:patch[2]]
140 |                 boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:])
141 |                 boxes[:, :2] = boxes[:, :2].clip(min=patch[:2])
142 |                 boxes -= np.tile(patch[:2], 2)
143 | 
144 |                 return img, boxes, labels
145 | 
146 | def _get_border(border, size):
147 |     i = 1
148 |     while size - border // i <= border // i:
149 |         i *= 2
150 |     return border // i
151 | 
152 | class KeepRatioCrop(object):
153 | 
154 |     def __init__(self,
155 |                  random_scales=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3),# 1.4),
156 |                  size=(511,511), border=128):
157 |         self.random_scales = random_scales
158 |         self.crop_size = size
159 |         self.border = border
160 | 
161 |     def __call__(self, img, boxes, labels):
162 |         h, w, c = img.shape
163 |         while True:
164 |             scale = random.choice(self.random_scales)
165 |             new_h = int(self.crop_size[0] * scale)
166 |             new_w = int(self.crop_size[1] * scale)
167 |             h_border = _get_border(self.border, h)
168 |             w_border = _get_border(self.border, w)
169 | 
170 |             for i in range(50):
171 |                 ctx = np.random.randint(low=w_border, high=w-w_border)
172 |                 cty = np.random.randint(low=h_border, high=h-h_border)
173 | 
174 |                 x0, x1 = max(ctx - new_w // 2, 0), min(ctx + new_w // 2, w)
175 |                 y0, y1 = max(cty - new_h // 2, 0), min(cty + new_h // 2, h)
176 |                 patch = np.array((int(x0), int(y0), int(x1), int(y1)))
177 | 
178 |                 center = (boxes[:, :2] + boxes[:, 2:]) / 2
179 |                 mask = (center[:, 0] > patch[0]) * (
180 |                         center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * (
181 |                                center[:, 1] < patch[3])
182 |                 if not mask.any():
183 |                     continue
184 |                 boxes = boxes[mask]
185 |                 labels = labels[mask]
186 | 
187 |                 cropped_img = np.zeros((new_h, new_w, 3), dtype=img.dtype)
188 |                 cropped_img[:,:,0] += 103.53
189 |                 cropped_img[:,:,1] += 116.28
190 |                 cropped_img[:,:,2] += 123.68
191 | 
192 |                 left_w, right_w = ctx - x0, x1 - ctx
193 |                 top_h, bottom_h = cty - y0, y1 - cty
194 | 
195 |                 # crop image
196 |                 cropped_ctx, cropped_cty = new_w // 2, new_h // 2
197 |                 x_slice = slice(cropped_ctx - left_w, cropped_ctx + right_w)
198 |                 y_slice = slice(cropped_cty - top_h, cropped_cty + bottom_h)
199 |                 cropped_img[y_slice, x_slice, :] = img[y0:y1, x0:x1, :]
200 |                
201 |                 # crop detections
202 |                 cropped_detections = boxes.copy()
203 |                 cropped_detections[:, 0:4:2] -= x0
204 |                 cropped_detections[:, 1:4:2] -= y0
205 |                 cropped_detections[:, 0:4:2] += cropped_ctx - left_w
206 |                 cropped_detections[:, 1:4:2] += cropped_cty - top_h
207 |                 #print(boxes.shape,'ori')
208 |                 
209 |                 cropped_detections, labels, keep_inds = _clip_detections(cropped_img, cropped_detections, labels)
210 |                 #print(cropped_detections.shape)
211 |                 
212 |                 #import pdb
213 |                 #pdb.set_trace()
214 |                 crop_args = (mask, keep_inds, new_h, new_w, y_slice, x_slice, x0, y0, x1, y1)
215 |                 
216 |                 return cropped_img, cropped_detections, labels, crop_args
217 | 
218 | def _clip_detections(image, detections, labels):
219 |     detections    = detections.copy()
220 |     height, width = image.shape[0:2]
221 | 
222 |     detections[:, 0:4:2] = np.clip(detections[:, 0:4:2], 0, width - 1)
223 |     detections[:, 1:4:2] = np.clip(detections[:, 1:4:2], 0, height - 1)
224 |     keep_inds  = ((detections[:, 2] - detections[:, 0]) > 0) & \
225 |                  ((detections[:, 3] - detections[:, 1]) > 0)
226 |     detections = detections[keep_inds]
227 |     labels = labels[keep_inds]
228 |     return detections, labels, keep_inds
229 | 
230 | 
231 | class Noise(object):
232 | 
233 |     def __init__(self, mean=0, std=1, noise_ratio=0):
234 |         self.mean = mean
235 |         self.std = std
236 |         self.noise_ratio = noise_ratio
237 | 
238 |     def __call__(self, img, boxes, labels):
239 |         if np.random.uniform(0,1) > self.noise_ratio:
240 |             return img, boxes, labels
241 | 
242 |         h, w, c = img.shape
243 |         noise_value = np.random.normal(self.mean, self.std, img.shape)
244 |         img = img + noise_value
245 |         return img, boxes, labels
246 | 
247 | class MaskCrop(object):
248 |     def __call__(self, gt_masks, crop_args):
249 |         '''
250 |         :param gt_masks: a list of gt masks(np.ararry)
251 |         :param crop_args:
252 |         :return:
253 |         '''
254 |         keepinds1, keepinds2, new_h, new_w, y_slice, x_slice, x0, y0, x1, y1 = crop_args
255 |         gt_masks = np.stack(gt_masks, 0)
256 |         #print('mask shape', gt_masks.shape)
257 |         #pdb.set_trace()
258 |         gt_masks = gt_masks[keepinds1]
259 |         gt_masks = gt_masks[keepinds2]
260 |         crop_masks = np.zeros([len(gt_masks), new_h, new_w])
261 | 
262 |         crop_masks[:, y_slice, x_slice] = gt_masks[:, y0:y1, x0:x1]
263 | 
264 |         return list(crop_masks)
265 | 
266 | 
267 | class ExtraAugmentation_cornernet(object):
268 | 
269 |     def __init__(self,
270 |                  photo_metric_distortion=None,
271 |                  expand=None,
272 |                  random_crop=None,
273 |                  noise=None):
274 |         self.transforms = []
275 |         if photo_metric_distortion is not None:
276 |             self.transforms.append(
277 |                 PhotoMetricDistortion(**photo_metric_distortion))
278 |         #if expand is not None:
279 |         #    self.transforms.append(Expand(**expand))
280 |         if random_crop is not None:
281 |             self.transforms.append(KeepRatioCrop())
282 |             #self.transforms.append(RandomCrop(**random_crop))
283 |         if noise is not None:
284 |             self.transforms.append(Noise(**noise))
285 | 
286 |     def __call__(self, img, boxes, labels):
287 |         img = img.astype(np.float32)
288 |         for transform in self.transforms:
289 |             if isinstance(transform, KeepRatioCrop):
290 |                 img, boxes, labels, crop_args = transform(img, boxes, labels)
291 |             else:
292 |                 img, boxes, labels = transform(img, boxes, labels)
293 |         return img, boxes, labels, crop_args
294 | 
295 |         
296 | class ExtraAugmentation(object):
297 | 
298 |     def __init__(self,
299 |                  photo_metric_distortion=None,
300 |                  expand=None,
301 |                  random_crop=None,
302 |                  noise=None):
303 |         self.transforms = []
304 |         if photo_metric_distortion is not None:
305 |             self.transforms.append(
306 |                 PhotoMetricDistortion(**photo_metric_distortion))
307 |         if expand is not None:
308 |             self.transforms.append(Expand(**expand))
309 |         if random_crop is not None:
310 |             #self.transforms.append(KeepRatioCrop())
311 |             self.transforms.append(RandomCrop(**random_crop))
312 |         if noise is not None:
313 |             self.transforms.append(Noise(**noise))
314 | 
315 |     def __call__(self, img, boxes, labels):
316 |         img = img.astype(np.float32)
317 |         for transform in self.transforms:
318 |             img, boxes, labels = transform(img, boxes, labels)
319 |         return img, boxes, labels
320 | 


--------------------------------------------------------------------------------
/src/datasets/custom.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import cv2
  3 | 
  4 | import os.path as osp
  5 | 
  6 | import mmcv
  7 | import numpy as np
  8 | from mmcv.parallel import DataContainer as DC
  9 | from torch.utils.data import Dataset
 10 | 
 11 | from .transforms import (ImageTransform, BboxTransform, MaskTransform, MaskTransform_cornernet,
 12 |                          Numpy2Tensor, ImageTransform_cornernet, BboxTransform_cornernet)
 13 | from .utils import to_tensor, random_scale
 14 | from .extra_aug import ExtraAugmentation, ExtraAugmentation_cornernet, MaskCrop
 15 | 
 16 | import cv2
 17 | import random
 18 | 
 19 | class CustomDataset(Dataset):
 20 |     """Custom dataset for detection.
 21 | 
 22 |     Annotation format:
 23 |     [
 24 |         {
 25 |             'filename': 'a.jpg',
 26 |             'width': 1280,
 27 |             'height': 720,
 28 |             'ann': {
 29 |                 'bboxes': <np.ndarray> (n, 4),
 30 |                 'labels': <np.ndarray> (n, ),
 31 |                 'bboxes_ignore': <np.ndarray> (k, 4),
 32 |                 'labels_ignore': <np.ndarray> (k, 4) (optional field)
 33 |             }
 34 |         },
 35 |         ...
 36 |     ]
 37 | 
 38 |     The `ann` field is optional for testing.
 39 |     """
 40 | 
 41 |     CLASSES = None
 42 | 
 43 |     def __init__(self,
 44 |                  ann_file,
 45 |                  img_prefix,
 46 |                  img_scale,
 47 |                  img_norm_cfg,
 48 |                  size_divisor=None,
 49 |                  proposal_file=None,
 50 |                  num_max_proposals=1000,
 51 |                  flip_ratio=0,
 52 |                  with_mask=False,
 53 |                  with_crowd=True,
 54 |                  with_label=True,
 55 |                  with_triple_grey=False, #default no triple-grey op
 56 |                  mixup=False,
 57 |                  mixup_sampler=np.random.beta,
 58 |                  mixup_args=[0.4,0.4],
 59 |                  extra_aug=None,
 60 |                  resize_keep_ratio=True,
 61 |                  test_mode=False,
 62 |                  cornernet_mode=False,
 63 |                  with_maskhead=False,
 64 |                  **kwargs):
 65 |         # prefix of images path
 66 |         self.img_prefix = img_prefix
 67 | 
 68 |         # load annotations (and proposals)
 69 |         self.img_infos = self.load_annotations(ann_file)
 70 |         if proposal_file is not None:
 71 |             self.proposals = self.load_proposals(proposal_file)
 72 |         else:
 73 |             self.proposals = None
 74 |         # filter images with no annotation during training
 75 |         if not test_mode:
 76 |             valid_inds = self._filter_imgs()
 77 |             self.img_infos = [self.img_infos[i] for i in valid_inds]
 78 |             if self.proposals is not None:
 79 |                 self.proposals = [self.proposals[i] for i in valid_inds]
 80 | 
 81 |         # (long_edge, short_edge) or [(long1, short1), (long2, short2), ...]
 82 |         self.img_scales = img_scale if isinstance(img_scale,
 83 |                                                   list) else [img_scale]
 84 |         assert mmcv.is_list_of(self.img_scales, tuple)
 85 |         # normalization configs
 86 |         self.img_norm_cfg = img_norm_cfg
 87 | 
 88 |         # max proposals per image
 89 |         self.num_max_proposals = num_max_proposals
 90 |         # flip ratio
 91 |         self.flip_ratio = flip_ratio
 92 |         assert flip_ratio >= 0 and flip_ratio <= 1
 93 |         # padding border to ensure the image size can be divided by
 94 |         # size_divisor (used for FPN)
 95 |         self.size_divisor = size_divisor
 96 | 
 97 |         # with mask or not (reserved field, takes no effect)
 98 |         self.with_mask = with_mask
 99 |         # some datasets provide bbox annotations as ignore/crowd/difficult,
100 |         # if `with_crowd` is True, then these info is returned.
101 |         self.with_crowd = with_crowd
102 |         # with label is False for RPN
103 |         self.with_label = with_label
104 |         # in test mode or not
105 |         self.test_mode = test_mode
106 |         # if apply triple grey op on training imgs
107 |         self.with_triple_grey=with_triple_grey
108 |         # if apply mixup op on training imgs
109 |         self.mixup=mixup
110 |         self.mixup_sampler=mixup_sampler
111 |         self.mixup_args=mixup_args
112 | 
113 |         # set group flag for the sampler
114 |         if not self.test_mode:
115 |             self._set_group_flag()
116 |         # transforms
117 |         self.cornernet_mode = cornernet_mode
118 |         self.with_maskhead = with_maskhead
119 | 
120 |         if self.cornernet_mode:
121 |             self.img_transform = ImageTransform_cornernet(
122 |                 size_divisor=self.size_divisor, **self.img_norm_cfg)
123 |             self.bbox_transform = BboxTransform_cornernet()
124 |             self.mask_transform = MaskTransform_cornernet()
125 |         else:
126 |             self.img_transform = ImageTransform(
127 |                 size_divisor=self.size_divisor, **self.img_norm_cfg)
128 |             self.bbox_transform = BboxTransform()
129 |             self.mask_transform = MaskTransform()
130 |         
131 |         #self.mask_transform = MaskTransform()
132 |         self.numpy2tensor = Numpy2Tensor()
133 | 
134 |         # if use extra augmentation
135 |         if extra_aug is not None:
136 |             if self.cornernet_mode:
137 |                 self.extra_aug = ExtraAugmentation_cornernet(**extra_aug)
138 |                 self.mask_crop = MaskCrop()
139 |             else:
140 |                 self.extra_aug = ExtraAugmentation(**extra_aug)
141 |         else:
142 |             self.extra_aug = None
143 | 
144 |         # image rescale if keep ratio
145 |         self.resize_keep_ratio = resize_keep_ratio
146 | 
147 |     def __len__(self):
148 |         return len(self.img_infos)
149 | 
150 |     def load_annotations(self, ann_file):
151 |         return mmcv.load(ann_file)
152 | 
153 |     def load_proposals(self, proposal_file):
154 |         return mmcv.load(proposal_file)
155 | 
156 |     def get_ann_info(self, idx):
157 |         return self.img_infos[idx]['ann']
158 | 
159 |     def _filter_imgs(self, min_size=32):
160 |         """Filter images too small."""
161 |         valid_inds = []
162 |         for i, img_info in enumerate(self.img_infos):
163 |             if min(img_info['width'], img_info['height']) >= min_size:
164 |                 valid_inds.append(i)
165 |         return valid_inds
166 | 
167 |     def _set_group_flag(self):
168 |         """Set flag according to image aspect ratio.
169 | 
170 |         Images with aspect ratio greater than 1 will be set as group 1,
171 |         otherwise group 0.
172 |         """
173 |         self.flag = np.zeros(len(self), dtype=np.uint8)
174 |         for i in range(len(self)):
175 |             img_info = self.img_infos[i]
176 |             if img_info['width'] / img_info['height'] > 1:
177 |                 self.flag[i] = 1
178 | 
179 |     def _rand_another(self, idx):
180 |         pool = np.where(self.flag == self.flag[idx])[0]
181 |         return np.random.choice(pool)
182 | 
183 |     def __getitem__(self, idx):
184 |         if self.test_mode:
185 |             return self.prepare_test_img(idx)
186 |         while True:
187 |             data = self.prepare_train_img(idx)
188 |             if data is None:
189 |                 idx = self._rand_another(idx)
190 |                 continue
191 |             return data
192 |     def prepare_train_img_(self,idx):
193 |         img_info = self.img_infos[idx]
194 |         if 'COCO_val2014_' in img_info['filename']:
195 |             s = 13
196 |         elif 'COCO_train2014_' in img_info['filename']:
197 |             s = 15
198 |         else:
199 |             s = 0
200 |         # load image
201 |         img = mmcv.imread(osp.join(self.img_prefix, img_info['filename'][s:]))
202 |         # load proposals if necessary
203 |         if self.proposals is not None:
204 |             proposals = self.proposals[idx][:self.num_max_proposals]
205 |             # TODO: Handle empty proposals properly. Currently images with
206 |             # no proposals are just ignored, but they can be used for
207 |             # training in concept.
208 |             if len(proposals) == 0:
209 |                 return None
210 |             if not (proposals.shape[1] == 4 or proposals.shape[1] == 5):
211 |                 raise AssertionError(
212 |                     'proposals should have shapes (n, 4) or (n, 5), '
213 |                     'but found {}'.format(proposals.shape))
214 |             if proposals.shape[1] == 5:
215 |                 scores = proposals[:, 4, None]
216 |                 proposals = proposals[:, :4]
217 |             else:
218 |                 scores = None
219 |         else:
220 |             proposals = None
221 |             scores = None
222 | 
223 |         ann = self.get_ann_info(idx)
224 |         gt_bboxes = ann['bboxes']
225 |         gt_labels = ann['labels']
226 |         if self.with_crowd:
227 |             gt_bboxes_ignore = ann['bboxes_ignore']
228 |         else:
229 |             gt_bboxes_ignore = None
230 |         # skip the image if there is no valid gt bbox
231 |         if len(gt_bboxes) == 0:
232 |             #official version:
233 |             #return None
234 | 
235 |             gt_bboxes = [[0,0,0,0]]
236 |             if self.with_label:
237 |                 gt_labels = [0]
238 |         if self.extra_aug is not None:
239 |             if self.cornernet_mode:
240 |                 img, gt_bboxes, gt_labels, crop_args = self.extra_aug(img, gt_bboxes, gt_labels)
241 |             else:
242 |                 img, gt_bboxes, gt_labels = self.extra_aug(img, gt_bboxes, gt_labels)
243 |         # apply transforms
244 |         #first step of transform: convert color
245 |         if self.with_triple_grey:
246 |             if random.random()>=0.5:
247 |                 gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
248 |                 img = cv2.merge([gray,gray,gray])
249 |         else:
250 |             pass
251 | 
252 |         #after color convert,test img color
253 |  
254 |         flip = True if np.random.rand() < self.flip_ratio else False
255 |         img_scale = random_scale(self.img_scales)  # sample a scale
256 |         if self.with_mask:
257 |             gt_masks = ann['masks']
258 |             if self.cornernet_mode:
259 |                 gt_masks = self.mask_crop(gt_masks, crop_args)
260 | 
261 |         else:
262 |             gt_masks = None 
263 | 
264 |         return img_info, img, proposals, scores, gt_bboxes, gt_labels, flip, img_scale, gt_bboxes_ignore, gt_masks
265 | 
266 |     def prepare_train_img(self, idx):
267 |         img_info, img, proposals, scores, gt_bboxes, gt_labels, flip, img_scale, gt_bboxes_ignore, gt_masks = self.prepare_train_img_(idx)
268 |         if self.mixup:
269 |             idx_ = self._rand_another(idx)
270 |             img_info_, img_, proposals_, scores_, gt_bboxes_, gt_labels_, flip_, img_scale_, gt_bboxes_ignore_, gt_masks_ = self.prepare_train_img_(idx_)
271 |             lambd = max(0, min(1, self.mixup_sampler(*self.mixup_args)))
272 |             height = max(img_info['height'], img_info_['height'])
273 |             width = max(img_info['width'], img_info_['width'])
274 |             mix_img = np.zeros(shape=(height, width, 3), dtype='float32')
275 |             mix_img[:img.shape[0], :img.shape[1], :] = img.astype('float32') * lambd
276 |             mix_img[:img_.shape[0], :img_.shape[1], :] += img_.astype('float32') * (1. - lambd)
277 |             
278 |             gt_bboxes = np.vstack((gt_bboxes,gt_bboxes_))
279 |             if self.with_label:
280 |                 gt_labels = np.hstack((gt_labels,gt_labels_))
281 |             if self.with_crowd:
282 |                 gt_bboxes_ignore = np.vstack((gt_bboxes_ignore, gt_bboxes_ignore_))
283 |             if self.with_mask:
284 |                 gt_masks = np.vstack((gt_masks, gt_masks_))     
285 |             img = mix_img
286 |         
287 |         img, img_shape, pad_shape, scale_factor = self.img_transform(img, img_scale, flip, keep_ratio=self.resize_keep_ratio)
288 |         
289 |         img = img.copy()
290 |         
291 |         gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor, flip)
292 | 
293 |         if self.with_crowd:
294 |             gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape,
295 |                                                    scale_factor, flip)
296 |         if self.with_mask:
297 |             if self.cornernet_mode:
298 |                 if self.with_maskhead:
299 |                     gt_masks = self.mask_transform(gt_masks, img_shape[:2], flip)
300 |                 else:
301 |                     gt_masks = self.mask_transform(gt_masks, (128, 128), flip)
302 |             else:
303 |                 gt_masks = self.mask_transform(gt_masks, pad_shape, scale_factor, flip)
304 | 
305 |         if self.mixup:
306 |             ori_shape = (height, width, 3)
307 |         else:
308 |             ori_shape = (img_info['height'], img_info['width'], 3)
309 |         img_meta = dict(
310 |             ori_shape=ori_shape,
311 |             img_shape=img_shape,
312 |             pad_shape=pad_shape,
313 |             scale_factor=scale_factor,
314 |             flip=flip)
315 |         data = dict(
316 |             img=DC(to_tensor(img), stack=True),
317 |             img_meta=DC(img_meta, cpu_only=True),
318 |             gt_bboxes=DC(to_tensor(gt_bboxes)))
319 |         if self.proposals is not None:
320 |             data['proposals'] = DC(to_tensor(proposals))
321 |         if self.with_label:
322 |             data['gt_labels'] = DC(to_tensor(gt_labels))
323 |         if self.with_crowd:
324 |             data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))
325 |         if self.with_mask:
326 |             data['gt_masks'] = DC(gt_masks, cpu_only=True)
327 |         return data
328 | 
329 | 
330 |     def prepare_test_img(self, idx, gt=True):#keep ratio and padding to desired size
331 |         """Prepare an image for testing (multi-scale and flipping)"""
332 |         img_info = self.img_infos[idx]
333 |         img = mmcv.imread(osp.join(self.img_prefix, img_info['filename']))
334 |         
335 |         if gt:
336 |             ann = self.get_ann_info(idx)
337 |             gt_bboxes = ann['bboxes']
338 |             gt_labels = ann['labels']
339 |             if self.with_mask:
340 |                 gt_masks  = ann['masks']
341 | 
342 |         def prepare_single(img, scale, flip):
343 |             _img, border, offset = self.img_transform(
344 |                 img, scale, flip, keep_ratio=self.resize_keep_ratio, crop=True)
345 |             _img_meta = dict(
346 |                 ori_shape=(img_info['height'], img_info['width'], 3),
347 |                 img_shape=_img.shape, scale=scale,
348 |                 border=border, offset=offset,
349 |                 flip=flip)
350 |             _img = to_tensor(_img)
351 | 
352 |             return _img, _img_meta
353 | 
354 |         imgs = []
355 |         img_metas = []
356 | 
357 |         for scale in [1.0]:
358 |             _img, _img_meta, = prepare_single(img, scale, False)
359 |             imgs.append(_img)
360 |             img_metas.append(DC(_img_meta, cpu_only=True))
361 | 
362 |             if self.flip_ratio > 0:
363 |                 _img, _img_meta= prepare_single(
364 |                     img, scale, True)
365 |                 imgs.append(_img)
366 |                 img_metas.append(DC(_img_meta, cpu_only=True))
367 |         data = dict(img=imgs, img_meta=img_metas)
368 |         if not self.with_mask:
369 |             h, w = _img.shape[0:2]
370 |             gt_masks = [np.zeros([h, w])]
371 |         
372 |         if len(gt_labels)==0:
373 |             gt_labels = np.array([-1])
374 |             h,w=_img.shape[0:2]
375 |             gt_masks = [np.zeros([h,w])]
376 |             gt_bboxes = np.array([[0,0,0,0]])
377 |         if gt:
378 |             data['gt_bboxes'] = gt_bboxes
379 |             data['gt_labels'] = gt_labels
380 |             data['gt_masks']  = gt_masks
381 |             data['idx'] = idx
382 |         return data
383 | 


--------------------------------------------------------------------------------
/src/models/bbox_heads/centripetal_mask.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from __future__ import division
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | from mmdet.core.corner.corner_target import corner_target
  9 | from mmcv.cnn import normal_init
 10 | 
 11 | from mmdet.ops import soft_nms, DeformConv, TopPool, BottomPool, LeftPool, RightPool
 12 | from mmdet.core import smooth_l1_loss
 13 | 
 14 | from mmdet.core.corner.kp_utils import _decode_center
 15 | 
 16 | from ..registry import HEADS
 17 | from ..utils import ConvModule
 18 | 
 19 | 
 20 | @HEADS.register_module
 21 | class Centripetal_mask(nn.Module):
 22 | 
 23 |     def __init__(self,
 24 |                  num_classes,
 25 |                  in_channels, with_mask=False):
 26 |         super(Centripetal_mask, self).__init__()
 27 |         self.num_classes = num_classes - 1
 28 |         self.in_channels = in_channels
 29 | 
 30 |         self.tl_out_channels = self.num_classes + 2 + 2  # 2 is the dim for offset map, as there are 2 coordinates, x,y
 31 |         self.br_out_channels = self.num_classes + 2 + 2
 32 |         
 33 |         self.convs = nn.ModuleList()
 34 |         self.mid_convs = nn.ModuleList()
 35 | 
 36 |         self.with_mask = with_mask
 37 | 
 38 |         self._init_layers()
 39 | 
 40 |     def _init_layers(self):
 41 |         
 42 |         self.tl_fadp = DeformConv(self.in_channels, self.in_channels, 3, 1, 1)
 43 |         self.br_fadp = DeformConv(self.in_channels, self.in_channels, 3, 1, 1)
 44 |         self.mid_tl_fadp = DeformConv(self.in_channels, self.in_channels, 3, 1, 1)
 45 |         self.mid_br_fadp = DeformConv(self.in_channels, self.in_channels, 3, 1, 1)
 46 | 
 47 |         self.tl_offset = nn.Conv2d(2, 18, 1, bias=False)
 48 |         self.br_offset = nn.Conv2d(2, 18, 1, bias=False)
 49 |         self.mid_tl_offset = nn.Conv2d(2, 18, 1, bias=False)
 50 |         self.mid_br_offset = nn.Conv2d(2, 18, 1, bias=False)
 51 | 
 52 |         self.tl_pool = TopLeftPool(self.in_channels)
 53 |         self.br_pool = BottomRightPool(self.in_channels)
 54 |         self.mid_tl_pool = TopLeftPool(self.in_channels)
 55 |         self.mid_br_pool = BottomRightPool(self.in_channels)
 56 | 
 57 |         self.tl_heat = make_kp_layer(out_dim=self.num_classes)
 58 |         self.br_heat = make_kp_layer(out_dim=self.num_classes)
 59 | 
 60 |         self.tl_off_c = make_kp_layer(out_dim=2)
 61 |         self.br_off_c = make_kp_layer(out_dim=2)
 62 |         
 63 |         self.tl_off_c_2 = make_kp_layer(out_dim=2)
 64 |         self.br_off_c_2 = make_kp_layer(out_dim=2)
 65 | 
 66 |         self.tl_off = make_kp_layer(out_dim=2)
 67 |         self.br_off = make_kp_layer(out_dim=2)
 68 | 
 69 |         # middle supervision
 70 | 
 71 |         self.mid_tl_heat = make_kp_layer(out_dim=self.num_classes)
 72 |         self.mid_br_heat = make_kp_layer(out_dim=self.num_classes)
 73 | 
 74 |         self.mid_tl_off_c = make_kp_layer(out_dim=2)
 75 |         self.mid_br_off_c = make_kp_layer(out_dim=2)
 76 |         
 77 |         self.mid_tl_off_c_2 = make_kp_layer(out_dim=2)
 78 |         self.mid_br_off_c_2 = make_kp_layer(out_dim=2)
 79 | 
 80 |         self.mid_tl_off = make_kp_layer(out_dim=2)
 81 |         self.mid_br_off = make_kp_layer(out_dim=2)
 82 | 
 83 |         if self.with_mask:
 84 |             for i in range(4):
 85 |                 self.convs.append(
 86 |                     ConvModule(self.in_channels, self.in_channels, 3, padding=1)
 87 |                 )
 88 |                 self.mid_convs.append(
 89 |                     ConvModule(self.in_channels, self.in_channels, 3, padding=1)
 90 |                 )
 91 |                 
 92 |             self.conv_logits = nn.Conv2d(self.in_channels, 81, 1)
 93 |             self.mid_conv_logits = nn.Conv2d(self.in_channels, 81, 1)
 94 | 
 95 |     def init_weights(self):
 96 |         """
 97 |         TODO: weight init method
 98 |         """
 99 |         self.tl_heat[-1].bias.data.fill_(-2.19)
100 |         self.br_heat[-1].bias.data.fill_(-2.19)
101 |         self.mid_tl_heat[-1].bias.data.fill_(-2.19)
102 |         self.mid_br_heat[-1].bias.data.fill_(-2.19)
103 |         normal_init(self.tl_offset, std=0.1)
104 |         normal_init(self.tl_fadp  , std=0.01)
105 |         normal_init(self.br_offset, std=0.1)
106 |         normal_init(self.br_fadp  , std=0.01)
107 |         normal_init(self.mid_tl_offset, std=0.1)
108 |         normal_init(self.mid_tl_fadp  , std=0.01)
109 |         normal_init(self.mid_br_offset, std=0.1)
110 |         normal_init(self.mid_br_fadp  , std=0.01)
111 | 
112 | 
113 |     def forward_single(self, feats):
114 |         '''tl_result = self.tl_branch(x)
115 |         br_result = self.br_branch(x)'''
116 |         x = feats[-1]
117 |         mask = None
118 |         mask_mid = None        
119 |         if self.with_mask:
120 |             mask = x
121 |             for conv in self.convs:
122 |                 mask = conv(mask)
123 |             mask = self.conv_logits(mask)
124 | 
125 |         tl_pool = self.tl_pool(x)
126 |         tl_heat = self.tl_heat(tl_pool)
127 |         tl_off_c = self.tl_off_c(tl_pool)
128 |         tl_off = self.tl_off(tl_pool)
129 |         tl_offmap = self.tl_offset(tl_off_c.detach())
130 |         x_tl_fadp = self.tl_fadp(tl_pool, tl_offmap)
131 |         tl_off_c_2= self.tl_off_c_2(x_tl_fadp)
132 | 
133 | 
134 |         br_pool = self.br_pool(x)
135 |         br_heat = self.br_heat(br_pool)
136 |         br_off_c = self.br_off_c(br_pool)
137 |         br_off = self.br_off(br_pool)
138 |         br_offmap = self.br_offset(br_off_c.detach())
139 |         x_br_fadp = self.br_fadp(br_pool, br_offmap)
140 |         br_off_c_2= self.br_off_c_2(x_br_fadp)
141 | 
142 |         tl_result = torch.cat([tl_heat, tl_off_c, tl_off_c_2, tl_off], 1)
143 |         br_result = torch.cat([br_heat, br_off_c, br_off_c_2, br_off], 1)
144 | 
145 |         x = feats[0]
146 |         
147 |         if self.with_mask:
148 |             mask_mid = x
149 |             for conv in self.mid_convs:
150 |                 mask_mid = conv(mask_mid)
151 |             mask_mid = self.mid_conv_logits(mask_mid)
152 |         
153 |         tl_pool_mid = self.mid_tl_pool(x)
154 |         tl_heat_mid = self.mid_tl_heat(tl_pool_mid)
155 |         tl_off_c_mid = self.mid_tl_off_c(tl_pool_mid)
156 |         tl_off_mid = self.mid_tl_off(tl_pool_mid)
157 |         tl_offmap_mid = self.mid_tl_offset(tl_off_c_mid.detach())
158 |         x_tl_fadp_mid = self.mid_tl_fadp(tl_pool_mid, tl_offmap_mid)
159 |         tl_off_c_2_mid= self.mid_tl_off_c_2(x_tl_fadp_mid)
160 | 
161 |         br_pool_mid = self.mid_br_pool(x)
162 |         br_heat_mid = self.mid_br_heat(br_pool_mid)
163 |         br_off_c_mid = self.mid_br_off_c(br_pool_mid)
164 |         br_off_mid = self.mid_br_off(br_pool_mid)
165 |         br_offmap_mid = self.mid_br_offset(br_off_c_mid.detach())
166 |         x_br_fadp_mid = self.mid_br_fadp(br_pool_mid, br_offmap_mid)
167 |         br_off_c_2_mid= self.mid_br_off_c_2(x_br_fadp_mid)
168 | 
169 |         tl_result_mid = torch.cat([tl_heat_mid, tl_off_c_mid, tl_off_c_2_mid, tl_off_mid], 1)
170 |         br_result_mid = torch.cat([br_heat_mid, br_off_c_mid, br_off_c_2_mid, br_off_mid], 1)
171 | 
172 |         if self.with_mask:
173 |             return tl_result, br_result, mask, tl_result_mid, br_result_mid, mask_mid
174 |         else:
175 |             return tl_result, br_result, None, tl_result_mid, br_result_mid, None
176 | 
177 |     def forward(self, feats):
178 |         """
179 |         :param feats: different layer's feature
180 |         :return: the raw results
181 |         """
182 |         feat = feats  # [-1]# we only use the feature of the last layer
183 |         return self.forward_single(feat)
184 | 
185 |     def loss(self, tl_result, br_result, mask, mid_tl_result, mid_br_result, mid_mask, gt_bboxes, gt_labels, gt_masks, img_metas, cfg, imgscale):
186 |         gt_tl_heatmap, gt_br_heatmap, gt_tl_offsets, gt_br_offsets, gt_tl_off_c, gt_br_off_c,\
187 |         gt_tl_off_c2, gt_br_off_c2 = corner_target(gt_bboxes=gt_bboxes, gt_labels=gt_labels, feats=tl_result, imgscale=imgscale, direct=True, scale=1.0, dcn=True)
188 |         # pred_tl_heatmap = _sigmoid(tl_result[:, :self.num_classes, :, :])
189 |         pred_tl_heatmap = tl_result[:, :self.num_classes, :, :].sigmoid()
190 |         pred_tl_off_c   = tl_result[:, self.num_classes:self.num_classes + 2, :, :]
191 |         pred_tl_off_c2  = tl_result[:, self.num_classes+2:self.num_classes+4, :, :]
192 |         pred_tl_offsets = tl_result[:, -2:, :, :]
193 |         # pred_br_heatmap = _sigmoid(br_result[:, :self.num_classes, :, :])
194 |         pred_br_heatmap = br_result[:, :self.num_classes, :, :].sigmoid()
195 |         pred_br_off_c   = br_result[:, self.num_classes:self.num_classes + 2, :, :]
196 |         pred_br_off_c2  = br_result[:, self.num_classes+2:self.num_classes+4, :, :]
197 |         pred_br_offsets = br_result[:, -2:, :, :]
198 | 
199 |         # mid_pred_tl_heatmap = _sigmoid(mid_tl_result[:, :self.num_classes, :, :])
200 |         mid_pred_tl_heatmap = mid_tl_result[:, :self.num_classes, :, :].sigmoid()
201 |         mid_pred_tl_off_c   = mid_tl_result[:, self.num_classes:self.num_classes + 2, :, :]
202 |         mid_pred_tl_off_c2  = mid_tl_result[:, self.num_classes+2:self.num_classes+4, :, :]
203 |         mid_pred_tl_offsets = mid_tl_result[:, -2:, :, :]
204 |         # mid_pred_br_heatmap = _sigmoid(mid_br_result[:, :self.num_classes, :, :])
205 |         mid_pred_br_heatmap = mid_br_result[:, :self.num_classes, :, :].sigmoid()
206 |         mid_pred_br_off_c   = mid_br_result[:, self.num_classes:self.num_classes + 2, :, :]
207 |         mid_pred_br_off_c2  = mid_br_result[:, self.num_classes+2:self.num_classes+4, :, :]
208 |         mid_pred_br_offsets = mid_br_result[:, -2:, :, :]
209 | 
210 |         tl_det_loss = det_loss_(pred_tl_heatmap, gt_tl_heatmap) + det_loss_(mid_pred_tl_heatmap, gt_tl_heatmap)
211 |         br_det_loss = det_loss_(pred_br_heatmap, gt_br_heatmap) + det_loss_(mid_pred_br_heatmap, gt_br_heatmap)
212 |         # tl_det_loss = _neg_loss([pred_tl_heatmap, mid_pred_tl_heatmap], gt_tl_heatmap)
213 |         # br_det_loss = _neg_loss([pred_br_heatmap, mid_pred_br_heatmap], gt_br_heatmap)
214 | 
215 |         det_loss = (tl_det_loss + br_det_loss) / 2.0
216 | 
217 |         tl_off_mask = gt_tl_heatmap.eq(1).type_as(gt_tl_heatmap)
218 |         br_off_mask = gt_br_heatmap.eq(1).type_as(gt_br_heatmap)
219 | 
220 | 
221 |         tl_off_c_loss = off_loss_(pred_tl_off_c, gt_tl_off_c, mask=tl_off_mask) + off_loss_(mid_pred_tl_off_c, gt_tl_off_c,mask=tl_off_mask)
222 |         br_off_c_loss = off_loss_(pred_br_off_c, gt_br_off_c, mask=br_off_mask) + off_loss_(mid_pred_br_off_c, gt_br_off_c,mask=br_off_mask)
223 |         off_c_loss = tl_off_c_loss.sum() / tl_off_mask.sum() + br_off_c_loss.sum() / br_off_mask.sum()
224 |         off_c_loss /= 2.0
225 |         off_c_loss *= 0.05
226 | 
227 |         tl_off_c2_loss = off_loss_(pred_tl_off_c2, gt_tl_off_c2, mask=tl_off_mask) + off_loss_(mid_pred_tl_off_c2, gt_tl_off_c2,mask=tl_off_mask)
228 |         br_off_c2_loss = off_loss_(pred_br_off_c2, gt_br_off_c2, mask=br_off_mask) + off_loss_(mid_pred_br_off_c2, gt_br_off_c2,mask=br_off_mask)
229 |         off_c2_loss = tl_off_c2_loss.sum() / tl_off_mask.sum() + br_off_c2_loss.sum() / br_off_mask.sum()
230 |         off_c2_loss /= 2.0
231 | 
232 |         tl_off_loss = off_loss_(pred_tl_offsets, gt_tl_offsets, mask=tl_off_mask) + off_loss_(mid_pred_tl_offsets, gt_tl_offsets,mask=tl_off_mask)
233 |         br_off_loss = off_loss_(pred_br_offsets, gt_br_offsets, mask=br_off_mask) + off_loss_(mid_pred_br_offsets, gt_br_offsets,mask=br_off_mask)
234 |         off_loss = tl_off_loss.sum() / tl_off_mask.sum() + br_off_loss.sum() / br_off_mask.sum()
235 |         off_loss /= 2.0
236 | 
237 |         mask_loss = 0
238 |         if self.with_mask:
239 |             for b_id in range(len(gt_labels)):
240 |                 for mask_id in range(len(gt_labels[b_id])):
241 |                     mask_label = gt_labels[b_id][mask_id]
242 |                     m_pred     = mask[b_id][mask_label]
243 |                     mid_m_pred = mid_mask[b_id][mask_label]
244 |                     m_gt = torch.from_numpy(gt_masks[b_id][mask_id]).float().cuda()
245 |                     mask_loss += F.binary_cross_entropy_with_logits(m_pred, m_gt)
246 |                     mask_loss += F.binary_cross_entropy_with_logits(mid_m_pred, m_gt)
247 |             mask_loss /= mask.size(0)
248 |             mask_loss /= 2.0
249 | 
250 |         # return dict(det_loss=det_loss, ae_loss=ae_loss, off_loss=off_loss)
251 |         if self.with_mask:
252 |             return dict(det_loss=det_loss, off_c_loss=off_c_loss, off_c2_loss=off_c2_loss, off_loss=off_loss, mask_loss=mask_loss)
253 |         else:
254 |             return dict(det_loss=det_loss, off_c_loss=off_c_loss, off_c2_loss=off_c2_loss, off_loss=off_loss)
255 | 
256 |     def get_bboxes(self, tl_result, br_result, mask, mid_tl_result, mid_br_result, mid_mask, img_metas, cfg, rescale=False):
257 |         tl_heat = tl_result[:, :self.num_classes, :, :]
258 |         tl_off_c= tl_result[:, self.num_classes+2:self.num_classes+4, :, :]
259 |         tl_regr = tl_result[:, -2:, :, :]
260 |         br_heat = br_result[:, :self.num_classes, :, :]
261 |         br_off_c= br_result[:, self.num_classes+2:self.num_classes+4, :, :]
262 |         br_regr = br_result[:, -2:, :, :]
263 |         #pdb.set_trace()
264 |         if len(tl_heat) == 2:
265 |             img_metas = img_metas[0]
266 | 
267 |         if isinstance(img_metas, list):
268 |             img_metas_1 = img_metas[0]
269 |         else:
270 |             img_metas_1 = img_metas
271 | 
272 |         batch_bboxes, batch_scores, batch_clses = _decode_center(tl_heat=tl_heat, br_heat=br_heat, tl_off_c=tl_off_c, br_off_c=br_off_c, tl_regr=tl_regr, br_regr=br_regr, img_meta=img_metas_1)#[0]
273 |         h, w, _ = img_metas_1['ori_shape']
274 |         #h, w, _ = img_metas[0]['ori_shape']        
275 | 
276 |         scale = img_metas_1['scale']
277 |         #batch_bboxes /= scale
278 | 
279 |         if len(batch_bboxes) == 2:
280 |             # print('flip')
281 |             batch_bboxes[1, :, [0, 2]] = w - batch_bboxes[1, :, [2, 0]]
282 | 
283 | 
284 |         batch_bboxes = batch_bboxes.view([-1, 4]).unsqueeze(0)
285 |         batch_scores = batch_scores.view([-1, 1]).unsqueeze(0)
286 |         batch_clses = batch_clses.view([-1, 1]).unsqueeze(0)
287 |         # pdb.set_trace()
288 |         # assert  len(img_metas)==len(batch_bboxes)
289 |         result_list = []
290 |         for img_id in range(len(img_metas)):
291 |             # pdb.set_trace()
292 |             bboxes = batch_bboxes[img_id]
293 |             scores = batch_scores[img_id]
294 |             clses = batch_clses[img_id]
295 | 
296 |             scores_n = scores.cpu().numpy()
297 |             idx = scores_n.argsort(0)[::-1]
298 |             idx = torch.Tensor(idx.astype(float)).long()
299 | 
300 |             bboxes = bboxes[idx].squeeze()
301 |             scores = scores[idx].view(-1)
302 |             clses = clses[idx].view(-1)
303 | 
304 |             det_num = len(bboxes)
305 | 
306 |             # img_h, img_w, _ = img_metas[img_id]['img_shape']
307 |             # ori_h, ori_w, _ = img_metas[img_id]['ori_shape']
308 |             # h_scale = float(ori_h) / float(img_h)
309 |             # w_scale = float(ori_w) / float(img_w)
310 | 
311 |             # bboxes[:,0::2] *= w_scale
312 |             # bboxes[:,1::2] *= h_scale
313 | 
314 |             '''clses_idx = (clses + 1).long()
315 |             det_idx   = torch.Tensor(np.arange(det_num)).long()
316 |             scores_81 = -1*torch.ones(det_num, self.num_classes + 1).type_as(scores)
317 |             scores_81[det_idx, clses_idx] = scores
318 | 
319 |             bboxes_scores = torch.cat([bboxes, scores.unsqueeze(-1)], 1)
320 |             nms_bboxes, _ = nms(bboxes_scores, 0.5)
321 |             #nms_bboxes, nms_labels = multiclass_nms(bboxes, scores_81, 0.5, cfg.nms, cfg.max_per_img)
322 | 
323 |             result_list.append((nms_bboxes, nms_labels))'''
324 |             detections = torch.cat([bboxes, scores.unsqueeze(-1)], -1)
325 |             keepinds = (detections[:, -1] > -0.1)  # 0.05
326 |             detections = detections[keepinds]
327 |             labels = clses[keepinds]
328 | 
329 |             areas = (bboxes[:,2] - bboxes[:,0])*(bboxes[:,3] - bboxes[:,1])
330 |             areas = areas[keepinds]
331 | 
332 |             #pdb.set_trace()
333 |             if scale == 0.8:
334 |                 keepinds2 = (areas >= 96**2)
335 |                 detections = detections[keepinds2]
336 |                 labels = labels[keepinds2]
337 |                 topk = 35
338 |             #elif scale == 2.0:
339 |             #    keepinds2 = (areas <= 32**2)
340 |             #    detections = detections[keepinds2]
341 |             #    labels = labels[keepinds2]
342 |             #    topk = 40
343 |             else:
344 |                 topk = 100
345 | 
346 |             
347 |             # idx = detections[:,-1].topk(len(detections))[1]
348 |             # detections = detections[idx]
349 |             # labels = labels[idx]
350 | 
351 |             out_bboxes = []
352 |             out_labels = []
353 |             # pdb.set_trace()
354 |             for i in range(80):
355 |                 keepinds = (labels == i)
356 |                 nms_detections = detections[keepinds]
357 |                 a = nms_detections.size(0)
358 |                 if nms_detections.size(0) == 0:
359 |                     # print('no NMS')
360 |                     continue
361 |                 nms_detections, _ = soft_nms(nms_detections, 0.5, 'gaussian', sigma=0.7)
362 |                 b = nms_detections.size(0)
363 |                 # print(a,b)
364 | 
365 |                 out_bboxes.append(nms_detections)
366 |                 out_labels += [i for _ in range(len(nms_detections))]
367 | 
368 |             if len(out_bboxes) > 0:
369 |                 out_bboxes = torch.cat(out_bboxes)
370 |                 # out_labels = 1 + torch.Tensor(out_labels)
371 |                 out_labels = torch.Tensor(out_labels)
372 |             else:
373 |                 out_bboxes = torch.Tensor(out_bboxes).cuda()
374 |                 out_labels = torch.Tensor(out_labels)
375 | 
376 |             # out_labels = 1+torch.Tensor(out_labels)
377 | 
378 |             # pdb.set_trace()
379 |             if len(out_bboxes) > 0:
380 |                 out_bboxes_np = out_bboxes.cpu().numpy()
381 |                 out_labels_np = out_labels.cpu().numpy()
382 |                 idx = np.argsort(out_bboxes_np[:, -1])[::-1][:topk]  #100
383 |                 out_bboxes_np = out_bboxes_np[idx, :]
384 |                 out_labels_np = out_labels_np[idx]
385 |                 out_bboxes = torch.Tensor(out_bboxes_np).type_as(out_bboxes)
386 |                 out_labels = torch.Tensor(out_labels_np).type_as(out_labels)
387 | 
388 | 
389 |             # pdb.set_trace()
390 | 
391 |             result_list.append((out_bboxes, out_labels))
392 |         return result_list
393 | 
394 | 
395 | class pool(nn.Module):
396 |     def __init__(self, dim, pool1, pool2):  # pool1, pool2 should be Class name
397 |         super(pool, self).__init__()
398 |         self.p1_conv1 = convolution(3, dim, 128)
399 |         self.p2_conv1 = convolution(3, dim, 128)
400 | 
401 |         self.p_conv1 = nn.Conv2d(128, dim, (3, 3), padding=(1, 1), bias=False)
402 |         self.p_bn1 = nn.BatchNorm2d(dim)
403 | 
404 |         self.conv1 = nn.Conv2d(dim, dim, (1, 1), bias=False)
405 |         self.bn1 = nn.BatchNorm2d(dim)
406 |         self.relu1 = nn.ReLU(inplace=True)
407 | 
408 |         self.conv2 = convolution(3, dim, dim)
409 | 
410 |         self.pool1 = pool1()
411 |         self.pool2 = pool2()
412 | 
413 |     def forward(self, x):
414 |         # pool 1
415 |         p1_conv1 = self.p1_conv1(x)
416 |         pool1 = self.pool1(p1_conv1)
417 | 
418 |         # pool 2
419 |         p2_conv1 = self.p2_conv1(x)
420 |         pool2 = self.pool2(p2_conv1)
421 | 
422 |         # pool 1 + pool 2
423 |         p_conv1 = self.p_conv1(pool1 + pool2)
424 |         p_bn1 = self.p_bn1(p_conv1)
425 | 
426 |         conv1 = self.conv1(x)
427 |         bn1 = self.bn1(conv1)
428 |         relu1 = self.relu1(p_bn1 + bn1)
429 | 
430 |         conv2 = self.conv2(relu1)
431 |         return conv2
432 | 
433 | class pool_new(nn.Module):
434 |     def __init__(self, dim, pool1, pool2):
435 |         super(pool, self).__init__()
436 |         self.p1_conv1 = convolution(3, dim, 128)
437 |         self.p2_conv1 = convolution(3, dim, 128)
438 | 
439 |         self.p_conv1 = nn.Conv2d(128, dim, (3, 3), padding=(1, 1), bias=False)
440 |         self.p_bn1   = nn.BatchNorm2d(dim)
441 | 
442 |         self.conv1 = nn.Conv2d(dim, dim, (1, 1), bias=False)
443 |         self.bn1   = nn.BatchNorm2d(dim)
444 |         self.relu1 = nn.ReLU(inplace=True)
445 | 
446 |         self.conv2 = convolution(3, dim, dim)
447 | 
448 |         self.pool1 = pool1()
449 |         self.pool2 = pool2()
450 | 
451 |         self.look_conv1 = convolution(3, dim, 128)
452 |         self.look_conv2 = convolution(3, dim, 128)
453 |         self.P1_look_conv = nn.Conv2d(128, 128, (3, 3), padding=(1, 1), bias=False)
454 |         self.P2_look_conv = nn.Conv2d(128, 128, (3, 3), padding=(1, 1), bias=False)
455 | 
456 |     def forward(self, x):
457 |         # pool 1
458 |         look_conv1   = self.look_conv1(x)
459 |         p1_conv1     = self.p1_conv1(x)
460 |         look_right   = self.pool2(look_conv1)
461 |         P1_look_conv = self.P1_look_conv(p1_conv1+look_right)
462 |         pool1        = self.pool1(P1_look_conv)
463 | 
464 |         # pool 2
465 |         look_conv2   = self.look_conv2(x)
466 |         p2_conv1 = self.p2_conv1(x)
467 |         look_down   = self.pool1(look_conv2)
468 |         P2_look_conv = self.P2_look_conv(p2_conv1+look_down)
469 |         pool2    = self.pool2(P2_look_conv)
470 | 
471 |         # pool 1 + pool 2
472 |         p_conv1 = self.p_conv1(pool1 + pool2)
473 |         p_bn1   = self.p_bn1(p_conv1)
474 | 
475 |         conv1 = self.conv1(x)
476 |         bn1   = self.bn1(conv1)
477 |         relu1 = self.relu1(p_bn1 + bn1)
478 | 
479 |         conv2 = self.conv2(relu1)
480 |         return conv2
481 | 
482 | 
483 | class TopLeftPool(pool):
484 |     def __init__(self, dim):
485 |         super(TopLeftPool, self).__init__(dim, TopPool, LeftPool)
486 | 
487 | 
488 | class BottomRightPool(pool):
489 |     def __init__(self, dim):
490 |         super(BottomRightPool, self).__init__(dim, BottomPool, RightPool)
491 | 
492 | 
493 | class convolution(nn.Module):
494 |     def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
495 |         super(convolution, self).__init__()
496 | 
497 |         pad = (k - 1) // 2
498 |         self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn)
499 |         self.bn = nn.BatchNorm2d(out_dim) if with_bn else nn.Sequential()
500 |         self.relu = nn.ReLU(inplace=False)
501 | 
502 |     def forward(self, x):
503 |         conv = self.conv(x)
504 |         bn = self.bn(conv)
505 |         relu = self.relu(bn)
506 |         return relu
507 | 
508 | 
509 | def top_pool(x):  # from right to left
510 |     """
511 |     :param x:feature map x, a Tensor
512 |     :return: feature map with the same size as x
513 |     """
514 |     x_p = torch.zeros_like(x)
515 |     x_p[:, :, :, -1] = x[:, :, :, -1]
516 |     _, _, h, w = x.size()
517 |     for col in range(w - 1, -1, -1):
518 |         x_p[:, :, :, col] = x[:, :, :, col:].max(-1)[0]
519 | 
520 |     return x_p
521 | 
522 | 
523 | def left_pool(x):  # from bottom to top
524 |     x_p = torch.zeros_like(x)
525 |     x_p[:, :, -1, :] = x[:, :, -1, :]
526 |     _, _, h, w = x.size()
527 |     for row in range(h - 1, -1, -1):
528 |         x_p[:, :, row, :] = x[:, :, row:, :].max(-2)[0]
529 | 
530 |     return x_p
531 | 
532 | 
533 | def bottom_pool(x):  # from left to right
534 |     x_p = torch.zeros_like(x)
535 |     x_p[:, :, :, 0] = x[:, :, :, 0]
536 |     _, _, h, w = x.size()
537 |     for col in range(1, w):
538 |         x_p[:, :, :, col] = x[:, :, :, 0:col + 1].max(-1)[0]
539 | 
540 |     return x_p
541 | 
542 | 
543 | def right_pool(x):  # from up to bottom
544 |     x_p = torch.zeros_like(x)
545 |     x_p[:, :, 0, :] = x[:, :, 0, :]
546 |     _, _, h, w = x.size()
547 |     for row in range(1, h):
548 |         x_p[:, :, row, :] = x[:, :, 0:row + 1, :].max(-2)[0]
549 | 
550 |     return x_p
551 | 
552 | 
553 | def det_loss_(preds, gt, Epsilon=1e-12):
554 |     # TODO: add Gaussian to gt_heatmap
555 |     # _, t_num = gt.view([gt.size(0), -1]).size()
556 |     pos_weights = (gt == 1.0).type_as(gt)
557 |     neg_weights = torch.pow(1 - gt, 4).type_as(gt)
558 |     pos_loss = -torch.log(preds + Epsilon) * torch.pow(1 - preds, 2) * pos_weights
559 |     neg_loss = -torch.log(1 - preds + Epsilon) * torch.pow(preds, 2) * neg_weights
560 |     # obj_num = pos_weights.sum(-1).sum(-1).sum(-1)
561 |     obj_num = pos_weights.sum()
562 |     # loss = pos_loss.sum(-1).sum(-1).sum(-1)/obj_num + neg_loss.sum(-1).sum(-1).sum(-1)/(t_num-obj_num)
563 |     if obj_num < 1:
564 |         loss = neg_loss.sum()
565 |     else:
566 |         loss = (pos_loss + neg_loss).sum() / obj_num
567 | 
568 |     return loss
569 | 
570 | 
571 | def _neg_loss(preds, gt, Epsilon=1e-12):
572 |     pos_inds = gt.eq(1)
573 |     neg_inds = gt.lt(1)
574 |     #
575 |     neg_weights = torch.pow(1 - gt[neg_inds], 4)
576 |     #
577 |     loss = 0
578 |     for pred in preds:
579 |         pos_pred = pred[pos_inds]
580 |         neg_pred = pred[neg_inds]
581 |         #
582 |         pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2)
583 |         neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights
584 |         #
585 |         num_pos = pos_inds.float().sum()
586 |         pos_loss = pos_loss.sum()
587 |         neg_loss = neg_loss.sum()
588 |         #
589 |         # avoid the error when num_pos is zero
590 |         if pos_pred.nelement() == 0:
591 |             loss = loss - neg_loss
592 |         else:
593 |             loss = loss - (pos_loss + neg_loss) / num_pos
594 |     return loss
595 | 
596 | 
597 | def off_loss_(preds, target, mask):
598 |     """
599 |     :param preds: pred_offsets
600 |     :param gt:  gt_offsets
601 |     :param mask: denotes where is those corners
602 |     :return: smooth l1 loss of offsets
603 |     """
604 |     mask = (mask.sum(1) > 0).unsqueeze(1).type_as(preds)
605 |     preds *= mask
606 |     target *= mask
607 | 
608 |     return smooth_l1_loss(preds, target, reduction='none')
609 | 
610 | 
611 | def ae_loss_(tl_preds, br_preds, match):
612 |     """
613 |     :param tl_preds: predicted tensor of top-left embedding
614 |     :param br_preds: predicted tensor of bottom-right embedding
615 |     :param match:
616 |     :return: pull loss and push loss
617 |     """
618 |     b = tl_preds.size(0)
619 | 
620 |     loss = 0
621 |     pull = 0
622 |     push = 0
623 |     for i in range(b):
624 |         # loss += ae_loss_per_image(tl_preds[i], br_preds[i], match[i])
625 |         loss = ae_loss_per_image(tl_preds[i], br_preds[i], match[i])
626 |         pull += loss[0]
627 |         push += loss[1]
628 |     # return loss
629 |     return pull, push
630 | 
631 | 
632 | def ae_loss_per_image(tl_preds, br_preds, match, pull_weight=0.25, push_weight=0.25):
633 |     tl_list = torch.Tensor([]).type_as(tl_preds)
634 |     br_list = torch.Tensor([]).type_as(tl_preds)
635 |     me_list = torch.Tensor([]).type_as(tl_preds)
636 |     for m in match:
637 |         tl_y = m[0][0]
638 |         tl_x = m[0][1]
639 |         br_y = m[1][0]
640 |         br_x = m[1][1]
641 |         tl_e = tl_preds[:, tl_y, tl_x]
642 |         br_e = br_preds[:, br_y, br_x]
643 |         tl_list = torch.cat([tl_list, tl_e])
644 |         br_list = torch.cat([br_list, br_e])
645 |         me_list = torch.cat([me_list, ((tl_e + br_e) / 2.0)])
646 | 
647 |     assert tl_list.size() == br_list.size()
648 | 
649 |     N = tl_list.size(0)
650 | 
651 |     if N > 0:
652 |         pull_loss = (torch.pow(tl_list - me_list, 2) + torch.pow(br_list - me_list, 2)).sum() / N
653 |     else:
654 |         pull_loss = 0
655 | 
656 |     margin = 1
657 |     push_loss = 0
658 |     for i in range(N):
659 |         mask = torch.ones(N, device=tl_preds.device)
660 |         mask[i] = 0
661 |         push_loss += (mask * F.relu(margin - abs(me_list[i] - me_list))).sum()
662 | 
663 |     if N > 1:
664 |         push_loss /= (N * (N - 1))
665 |     else:
666 |         pass
667 |     '''if N>0:
668 |         N2 = N*(N-1)
669 |         x0 = me_list.unsqueeze(0)
670 |         x1 = me_list.unsqueeze(1)
671 |         push_loss = (F.relu(1 - torch.abs(x0-x1))-1/(N+1e-4))/(N2+1e-4)
672 |         #push_loss -= 1/(N+1e-4)
673 |         #push_loss /= (N2+1e-4)
674 |         push_loss = push_loss.sum()
675 |     else:
676 |         push_loss = 0'''
677 | 
678 |     return pull_weight * pull_loss, push_weight * push_loss
679 | 
680 | 
681 | def make_kp_layer(out_dim, cnv_dim=256, curr_dim=256):
682 |     return nn.Sequential(
683 |         convolution(3, cnv_dim, curr_dim, with_bn=False),
684 |         nn.Conv2d(curr_dim, out_dim, (1, 1))
685 |     )
686 | 
687 | 
688 | def _sigmoid(x):
689 |     x = torch.clamp(torch.sigmoid(x), min=1e-4, max=1 - 1e-4)
690 |     return x
691 | 
692 | 


--------------------------------------------------------------------------------