├── README.md ├── src ├── ops │ ├── _cpools │ │ ├── .gitignore │ │ ├── setup.py │ │ ├── __init__.py │ │ └── src │ │ │ ├── bottom_pool.cpp │ │ │ ├── right_pool.cpp │ │ │ ├── left_pool.cpp │ │ │ └── top_pool.cpp │ └── __init__.py ├── models │ ├── backbones │ │ ├── __init__.py │ │ └── hourglass.py │ ├── bbox_heads │ │ ├── __init__.py │ │ └── centripetal_mask.py │ └── detectors │ │ ├── __init__.py │ │ ├── centripetal.py │ │ └── test_mixins.py ├── core │ ├── __init__.py │ └── corner │ │ ├── __init__.py │ │ ├── corner_target.py │ │ └── kp_utils.py └── datasets │ ├── coco.py │ ├── transforms.py │ ├── extra_aug.py │ └── custom.py ├── .gitmodules ├── init.sh ├── compile.sh ├── LICENSE ├── .gitignore └── configs └── centripetalnet_mask_hg104.py /README.md: -------------------------------------------------------------------------------- 1 | # CentripetalNet -------------------------------------------------------------------------------- /src/ops/_cpools/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | cpools.egg-info/ 3 | dist/ 4 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "mmdetection"] 2 | path = mmdetection 3 | url = https://github.com/open-mmlab/mmdetection.git 4 | -------------------------------------------------------------------------------- /src/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import ResNet 2 | from .resnext import ResNeXt 3 | from .ssd_vgg import SSDVGG 4 | from .hourglass import Hourglass 5 | 6 | __all__ = ['ResNet', 'ResNeXt', 'SSDVGG', 'Hourglass'] 7 | -------------------------------------------------------------------------------- /src/models/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_head import BBoxHead 2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead 3 | from .centripetal_mask import Centripetal_mask 4 | 5 | __all__ = [ 6 | 'BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead', 'Centripetal_mask' 7 | ] 8 | -------------------------------------------------------------------------------- /src/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor import * # noqa: F401, F403 2 | from .bbox import * # noqa: F401, F403 3 | from .corner import * 4 | from .mask import * 5 | from .loss import * # noqa: F401, F403 6 | from .evaluation import * # noqa: F401, F403 7 | from .post_processing import * # noqa: F401, F403 8 | from .utils import * # noqa: F401, F403 9 | -------------------------------------------------------------------------------- /init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | # copy files 6 | cp compile.sh mmdetection/ 7 | cp -r src/core/* mmdetection/mmdet/core/ 8 | cp -r src/datasets/* mmdetection/mmdet/datasets/ 9 | cp -r src/models/* mmdetection/mmdet/models/ 10 | cp -r src/ops/* mmdetection/mmdet/ops/ 11 | 12 | # compile and setup 13 | cd mmdetection 14 | ./compile.sh 15 | $PYTHON setup.py install --user 16 | -------------------------------------------------------------------------------- /src/core/corner/__init__.py: -------------------------------------------------------------------------------- 1 | from .corner_target import corner_target 2 | from .kp_utils import _gather_feat,_nms,_tranpose_and_gather_feat,_topk,_neg_loss,_sigmoid,_ae_loss,_regr_loss,gaussian2D,draw_gaussian,gaussian_radius, _decode_center 3 | 4 | __all__ = ['corner_target','_gather_feat','_nms','_tranpose_and_gather_feat','_topk','_decode_center','_neg_loss','_sigmoid','_ae_loss','_regr_loss','gaussian2D','draw_gaussian','gaussian_radius'] 5 | 6 | -------------------------------------------------------------------------------- /src/ops/_cpools/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CppExtension 3 | 4 | setup( 5 | name="cpools", 6 | ext_modules=[ 7 | CppExtension("top_pool", ["src/top_pool.cpp"]), 8 | CppExtension("bottom_pool", ["src/bottom_pool.cpp"]), 9 | CppExtension("left_pool", ["src/left_pool.cpp"]), 10 | CppExtension("right_pool", ["src/right_pool.cpp"]) 11 | ], 12 | cmdclass={ 13 | "build_ext": BuildExtension 14 | } 15 | ) 16 | -------------------------------------------------------------------------------- /src/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseDetector 2 | from .cascade_rcnn import CascadeRCNN 3 | from .centripetal import CentripetalNet 4 | from .fast_rcnn import FastRCNN 5 | from .faster_rcnn import FasterRCNN 6 | from .mask_rcnn import MaskRCNN 7 | from .retinanet import RetinaNet 8 | from .rpn import RPN 9 | from .single_stage import SingleStageDetector 10 | from .two_stage import TwoStageDetector 11 | 12 | __all__ = [ 13 | 'BaseDetector', 'CentripetalNet', 'SingleStageDetector', 'TwoStageDetector', 'RPN', 14 | 'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'RetinaNet' 15 | ] 16 | -------------------------------------------------------------------------------- /compile.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | echo "Building roi align op..." 6 | cd mmdet/ops/roi_align 7 | if [ -d "build" ]; then 8 | rm -r build 9 | fi 10 | $PYTHON setup.py build_ext --inplace 11 | 12 | echo "Building roi pool op..." 13 | cd ../roi_pool 14 | if [ -d "build" ]; then 15 | rm -r build 16 | fi 17 | $PYTHON setup.py build_ext --inplace 18 | 19 | echo "Building nms op..." 20 | cd ../nms 21 | make clean 22 | make PYTHON=${PYTHON} 23 | 24 | echo "Building dcn..." 25 | cd ../dcn 26 | if [ -d "build" ]; then 27 | rm -r build 28 | fi 29 | $PYTHON setup.py build_ext --inplace 30 | 31 | echo "Building corner pooling..." 32 | cd ../_cpools 33 | if [ -d "build" ]; then 34 | rm -r build 35 | fi 36 | $PYTHON setup.py build_ext --inplace 37 | -------------------------------------------------------------------------------- /src/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .dcn import (DeformConv, DeformRoIPooling, DeformRoIPoolingPack, 2 | ModulatedDeformRoIPoolingPack, ModulatedDeformConv, 3 | ModulatedDeformConvPack, deform_conv, modulated_deform_conv, 4 | deform_roi_pooling) 5 | from .nms import nms, soft_nms 6 | from .roi_align import RoIAlign, roi_align 7 | from .roi_pool import RoIPool, roi_pool 8 | from ._cpools import TopPool, BottomPool, LeftPool, RightPool 9 | 10 | __all__ = [ 11 | 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 12 | 'DeformConv', 'DeformRoIPooling', 'DeformRoIPoolingPack', 13 | 'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv', 14 | 'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv', 15 | 'deform_roi_pooling', 'TopPool', 'BottomPool', 'LeftPool', 'RightPool' 16 | ] 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 KiveeDong 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # cython generated cpp 107 | mmdet/ops/nms/*.cpp 108 | mmdet/version.py 109 | data 110 | .vscode 111 | .idea 112 | 113 | work_dirs/ 114 | -------------------------------------------------------------------------------- /src/ops/_cpools/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | from torch.autograd import Function 5 | import sys 6 | import os 7 | sys.path.append(os.path.join(os.path.dirname(__file__),'dist/cpools-0.0.0-py3.6-linux-x86_64.egg')) 8 | import top_pool, bottom_pool, left_pool, right_pool 9 | 10 | class TopPoolFunction(Function): 11 | @staticmethod 12 | def forward(ctx, input): 13 | output = top_pool.forward(input)[0] 14 | ctx.save_for_backward(input) 15 | return output 16 | 17 | @staticmethod 18 | def backward(ctx, grad_output): 19 | input = ctx.saved_variables[0] 20 | output = top_pool.backward(input, grad_output)[0] 21 | return output 22 | 23 | class BottomPoolFunction(Function): 24 | @staticmethod 25 | def forward(ctx, input): 26 | output = bottom_pool.forward(input)[0] 27 | ctx.save_for_backward(input) 28 | return output 29 | 30 | @staticmethod 31 | def backward(ctx, grad_output): 32 | input = ctx.saved_variables[0] 33 | output = bottom_pool.backward(input, grad_output)[0] 34 | return output 35 | 36 | class LeftPoolFunction(Function): 37 | @staticmethod 38 | def forward(ctx, input): 39 | output = left_pool.forward(input)[0] 40 | ctx.save_for_backward(input) 41 | return output 42 | 43 | @staticmethod 44 | def backward(ctx, grad_output): 45 | input = ctx.saved_variables[0] 46 | output = left_pool.backward(input, grad_output)[0] 47 | return output 48 | 49 | class RightPoolFunction(Function): 50 | @staticmethod 51 | def forward(ctx, input): 52 | output = right_pool.forward(input)[0] 53 | ctx.save_for_backward(input) 54 | return output 55 | 56 | @staticmethod 57 | def backward(ctx, grad_output): 58 | input = ctx.saved_variables[0] 59 | output = right_pool.backward(input, grad_output)[0] 60 | return output 61 | 62 | class TopPool(nn.Module): 63 | def forward(self, x): 64 | return TopPoolFunction.apply(x) 65 | 66 | class BottomPool(nn.Module): 67 | def forward(self, x): 68 | return BottomPoolFunction.apply(x) 69 | 70 | class LeftPool(nn.Module): 71 | def forward(self, x): 72 | return LeftPoolFunction.apply(x) 73 | 74 | class RightPool(nn.Module): 75 | def forward(self, x): 76 | return RightPoolFunction.apply(x) 77 | -------------------------------------------------------------------------------- /src/ops/_cpools/src/bottom_pool.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | std::vector pool_forward( 6 | at::Tensor input 7 | ) { 8 | // Initialize output 9 | at::Tensor output = at::zeros_like(input); 10 | 11 | // Get height 12 | int64_t height = input.size(2); 13 | 14 | // Copy the last column 15 | at::Tensor input_temp = input.select(2, 0); 16 | at::Tensor output_temp = output.select(2, 0); 17 | output_temp.copy_(input_temp); 18 | 19 | at::Tensor max_temp; 20 | for (int64_t ind = 0; ind < height - 1; ++ind) { 21 | input_temp = input.select(2, ind + 1); 22 | output_temp = output.select(2, ind); 23 | max_temp = output.select(2, ind + 1); 24 | 25 | at::max_out(max_temp, input_temp, output_temp); 26 | } 27 | 28 | return { 29 | output 30 | }; 31 | } 32 | 33 | std::vector pool_backward( 34 | at::Tensor input, 35 | at::Tensor grad_output 36 | ) { 37 | auto output = at::zeros_like(input); 38 | 39 | int32_t batch = input.size(0); 40 | int32_t channel = input.size(1); 41 | int32_t height = input.size(2); 42 | int32_t width = input.size(3); 43 | 44 | auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width}); 45 | auto max_ind = at::zeros(torch::CUDA(at::kLong), {batch, channel, width}); 46 | 47 | auto input_temp = input.select(2, 0); 48 | max_val.copy_(input_temp); 49 | 50 | max_ind.fill_(0); 51 | 52 | auto output_temp = output.select(2, 0); 53 | auto grad_output_temp = grad_output.select(2, 0); 54 | output_temp.copy_(grad_output_temp); 55 | 56 | auto un_max_ind = max_ind.unsqueeze(2); 57 | auto gt_mask = at::zeros(torch::CUDA(at::kByte), {batch, channel, width}); 58 | auto max_temp = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width}); 59 | for (int32_t ind = 0; ind < height - 1; ++ind) { 60 | input_temp = input.select(2, ind + 1); 61 | at::gt_out(gt_mask, input_temp, max_val); 62 | 63 | at::masked_select_out(max_temp, input_temp, gt_mask); 64 | max_val.masked_scatter_(gt_mask, max_temp); 65 | max_ind.masked_fill_(gt_mask, ind + 1); 66 | 67 | grad_output_temp = grad_output.select(2, ind + 1).unsqueeze(2); 68 | output.scatter_add_(2, un_max_ind, grad_output_temp); 69 | } 70 | 71 | return { 72 | output 73 | }; 74 | } 75 | 76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 77 | m.def( 78 | "forward", &pool_forward, "Bottom Pool Forward", 79 | py::call_guard() 80 | ); 81 | m.def( 82 | "backward", &pool_backward, "Bottom Pool Backward", 83 | py::call_guard() 84 | ); 85 | } 86 | -------------------------------------------------------------------------------- /src/ops/_cpools/src/right_pool.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | std::vector pool_forward( 6 | at::Tensor input 7 | ) { 8 | // Initialize output 9 | at::Tensor output = at::zeros_like(input); 10 | 11 | // Get width 12 | int64_t width = input.size(3); 13 | 14 | // Copy the last column 15 | at::Tensor input_temp = input.select(3, 0); 16 | at::Tensor output_temp = output.select(3, 0); 17 | output_temp.copy_(input_temp); 18 | 19 | at::Tensor max_temp; 20 | for (int64_t ind = 0; ind < width - 1; ++ind) { 21 | input_temp = input.select(3, ind + 1); 22 | output_temp = output.select(3, ind); 23 | max_temp = output.select(3, ind + 1); 24 | 25 | at::max_out(max_temp, input_temp, output_temp); 26 | } 27 | 28 | return { 29 | output 30 | }; 31 | } 32 | 33 | std::vector pool_backward( 34 | at::Tensor input, 35 | at::Tensor grad_output 36 | ) { 37 | at::Tensor output = at::zeros_like(input); 38 | 39 | int32_t batch = input.size(0); 40 | int32_t channel = input.size(1); 41 | int32_t height = input.size(2); 42 | int32_t width = input.size(3); 43 | 44 | auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height}); 45 | auto max_ind = at::zeros(torch::CUDA(at::kLong), {batch, channel, height}); 46 | 47 | auto input_temp = input.select(3, 0); 48 | max_val.copy_(input_temp); 49 | 50 | max_ind.fill_(0); 51 | 52 | auto output_temp = output.select(3, 0); 53 | auto grad_output_temp = grad_output.select(3, 0); 54 | output_temp.copy_(grad_output_temp); 55 | 56 | auto un_max_ind = max_ind.unsqueeze(3); 57 | auto gt_mask = at::zeros(torch::CUDA(at::kByte), {batch, channel, height}); 58 | auto max_temp = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height}); 59 | for (int32_t ind = 0; ind < width - 1; ++ind) { 60 | input_temp = input.select(3, ind + 1); 61 | at::gt_out(gt_mask, input_temp, max_val); 62 | 63 | at::masked_select_out(max_temp, input_temp, gt_mask); 64 | max_val.masked_scatter_(gt_mask, max_temp); 65 | max_ind.masked_fill_(gt_mask, ind + 1); 66 | 67 | grad_output_temp = grad_output.select(3, ind + 1).unsqueeze(3); 68 | output.scatter_add_(3, un_max_ind, grad_output_temp); 69 | } 70 | 71 | return { 72 | output 73 | }; 74 | } 75 | 76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 77 | m.def( 78 | "forward", &pool_forward, "Right Pool Forward", 79 | py::call_guard() 80 | ); 81 | m.def( 82 | "backward", &pool_backward, "Right Pool Backward", 83 | py::call_guard() 84 | ); 85 | } 86 | -------------------------------------------------------------------------------- /src/ops/_cpools/src/left_pool.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | std::vector pool_forward( 6 | at::Tensor input 7 | ) { 8 | // Initialize output 9 | at::Tensor output = at::zeros_like(input); 10 | 11 | // Get width 12 | int64_t width = input.size(3); 13 | 14 | // Copy the last column 15 | at::Tensor input_temp = input.select(3, width - 1); 16 | at::Tensor output_temp = output.select(3, width - 1); 17 | output_temp.copy_(input_temp); 18 | 19 | at::Tensor max_temp; 20 | for (int64_t ind = 1; ind < width; ++ind) { 21 | input_temp = input.select(3, width - ind - 1); 22 | output_temp = output.select(3, width - ind); 23 | max_temp = output.select(3, width - ind - 1); 24 | 25 | at::max_out(max_temp, input_temp, output_temp); 26 | } 27 | 28 | return { 29 | output 30 | }; 31 | } 32 | 33 | std::vector pool_backward( 34 | at::Tensor input, 35 | at::Tensor grad_output 36 | ) { 37 | auto output = at::zeros_like(input); 38 | 39 | int32_t batch = input.size(0); 40 | int32_t channel = input.size(1); 41 | int32_t height = input.size(2); 42 | int32_t width = input.size(3); 43 | 44 | auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height}); 45 | auto max_ind = at::zeros(torch::CUDA(at::kLong), {batch, channel, height}); 46 | 47 | auto input_temp = input.select(3, width - 1); 48 | max_val.copy_(input_temp); 49 | 50 | max_ind.fill_(width - 1); 51 | 52 | auto output_temp = output.select(3, width - 1); 53 | auto grad_output_temp = grad_output.select(3, width - 1); 54 | output_temp.copy_(grad_output_temp); 55 | 56 | auto un_max_ind = max_ind.unsqueeze(3); 57 | auto gt_mask = at::zeros(torch::CUDA(at::kByte), {batch, channel, height}); 58 | auto max_temp = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height}); 59 | for (int32_t ind = 1; ind < width; ++ind) { 60 | input_temp = input.select(3, width - ind - 1); 61 | at::gt_out(gt_mask, input_temp, max_val); 62 | 63 | at::masked_select_out(max_temp, input_temp, gt_mask); 64 | max_val.masked_scatter_(gt_mask, max_temp); 65 | max_ind.masked_fill_(gt_mask, width - ind - 1); 66 | 67 | grad_output_temp = grad_output.select(3, width - ind - 1).unsqueeze(3); 68 | output.scatter_add_(3, un_max_ind, grad_output_temp); 69 | } 70 | 71 | return { 72 | output 73 | }; 74 | } 75 | 76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 77 | m.def( 78 | "forward", &pool_forward, "Left Pool Forward", 79 | py::call_guard() 80 | ); 81 | m.def( 82 | "backward", &pool_backward, "Left Pool Backward", 83 | py::call_guard() 84 | ); 85 | } 86 | -------------------------------------------------------------------------------- /src/ops/_cpools/src/top_pool.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | std::vector top_pool_forward( 6 | at::Tensor input 7 | ) { 8 | // Initialize output 9 | at::Tensor output = at::zeros_like(input); 10 | 11 | // Get height 12 | int64_t height = input.size(2); 13 | 14 | // Copy the last column 15 | at::Tensor input_temp = input.select(2, height - 1); 16 | at::Tensor output_temp = output.select(2, height - 1); 17 | output_temp.copy_(input_temp); 18 | 19 | at::Tensor max_temp; 20 | for (int64_t ind = 1; ind < height; ++ind) { 21 | input_temp = input.select(2, height - ind - 1); 22 | output_temp = output.select(2, height - ind); 23 | max_temp = output.select(2, height - ind - 1); 24 | 25 | at::max_out(max_temp, input_temp, output_temp); 26 | } 27 | 28 | return { 29 | output 30 | }; 31 | } 32 | 33 | std::vector top_pool_backward( 34 | at::Tensor input, 35 | at::Tensor grad_output 36 | ) { 37 | auto output = at::zeros_like(input); 38 | 39 | int32_t batch = input.size(0); 40 | int32_t channel = input.size(1); 41 | int32_t height = input.size(2); 42 | int32_t width = input.size(3); 43 | 44 | auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width}); 45 | auto max_ind = at::zeros(torch::CUDA(at::kLong), {batch, channel, width}); 46 | 47 | auto input_temp = input.select(2, height - 1); 48 | max_val.copy_(input_temp); 49 | 50 | max_ind.fill_(height - 1); 51 | 52 | auto output_temp = output.select(2, height - 1); 53 | auto grad_output_temp = grad_output.select(2, height - 1); 54 | output_temp.copy_(grad_output_temp); 55 | 56 | auto un_max_ind = max_ind.unsqueeze(2); 57 | auto gt_mask = at::zeros(torch::CUDA(at::kByte), {batch, channel, width}); 58 | auto max_temp = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width}); 59 | for (int32_t ind = 1; ind < height; ++ind) { 60 | input_temp = input.select(2, height - ind - 1); 61 | at::gt_out(gt_mask, input_temp, max_val); 62 | 63 | at::masked_select_out(max_temp, input_temp, gt_mask); 64 | max_val.masked_scatter_(gt_mask, max_temp); 65 | max_ind.masked_fill_(gt_mask, height - ind - 1); 66 | 67 | grad_output_temp = grad_output.select(2, height - ind - 1).unsqueeze(2); 68 | output.scatter_add_(2, un_max_ind, grad_output_temp); 69 | } 70 | 71 | return { 72 | output 73 | }; 74 | } 75 | 76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 77 | m.def( 78 | "forward", &top_pool_forward, "Top Pool Forward", 79 | py::call_guard() 80 | ); 81 | m.def( 82 | "backward", &top_pool_backward, "Top Pool Backward", 83 | py::call_guard() 84 | ); 85 | } 86 | -------------------------------------------------------------------------------- /configs/centripetalnet_mask_hg104.py: -------------------------------------------------------------------------------- 1 | #model settings 2 | model = dict( 3 | type='CentripetalNet', 4 | backbone=dict( 5 | type='Hourglass', 6 | n=5, 7 | nstack=2, 8 | dims=[256, 256, 384, 384, 384, 512], 9 | modules=[2, 2, 2, 2, 2, 4], 10 | out_dim=80,), 11 | neck=None, 12 | bbox_head=dict( 13 | type='Centripetal_mask', 14 | num_classes=81, 15 | in_channels=256, 16 | with_mask=True, 17 | )) 18 | # training and testing settings 19 | train_cfg = dict( 20 | assigner=dict( 21 | type='MaxIoUAssigner', 22 | pos_iou_thr=0.5, 23 | neg_iou_thr=0.4, 24 | min_pos_iou=0, 25 | ignore_iof_thr=-1), 26 | smoothl1_beta=0.11, 27 | gamma=2.0, 28 | alpha=0.25, 29 | allowed_border=-1, 30 | pos_weight=-1, 31 | debug=False) 32 | test_cfg = dict( 33 | nms_pre=1000, 34 | min_bbox_size=0, 35 | score_thr=0.05, 36 | nms=dict(type='nms', iou_thr=0.5), 37 | max_per_img=100) 38 | # dataset settings 39 | dataset_type = 'CocoDataset' 40 | data_root = 'data/mscoco2017/' 41 | img_norm_cfg = dict( 42 | mean=[103.53, 116.28, 123.675], std=[57.375, 57.12, 58.395], to_rgb=False) 43 | 44 | cornernet_mode = True 45 | 46 | data = dict( 47 | imgs_per_gpu=6,#3 48 | workers_per_gpu=3,#3 49 | train=dict( 50 | type=dataset_type, 51 | ann_file=data_root + 'annotations/instances_train2017.json', 52 | img_prefix=data_root + 'train2017/', 53 | img_scale=(511, 511), 54 | img_norm_cfg=img_norm_cfg, 55 | size_divisor=None, 56 | flip_ratio=0.5, 57 | with_mask=True, 58 | with_crowd=False, 59 | with_label=True, 60 | resize_keep_ratio=False, 61 | cornernet_mode=cornernet_mode, 62 | extra_aug=dict( 63 | photo_metric_distortion=dict( 64 | brightness_delta=32, 65 | contrast_range=(0.5, 1.5), 66 | saturation_range=(0.5, 1.5), 67 | hue_delta=18), 68 | expand=dict( 69 | mean=img_norm_cfg['mean'], 70 | to_rgb=img_norm_cfg['to_rgb'], 71 | ratio_range=(1, 4)), 72 | random_crop=dict( 73 | min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3))), 74 | val=dict( 75 | type=dataset_type, 76 | ann_file=data_root + 'annotations/instances_val2017.json', 77 | img_prefix=data_root + 'val2017/', 78 | img_scale=(511, 511), 79 | img_norm_cfg=img_norm_cfg, 80 | size_divisor=None, 81 | flip_ratio=1, 82 | with_mask=False, 83 | with_crowd=False, 84 | with_label=True, 85 | cornernet_mode=cornernet_mode, 86 | resize_keep_ratio=False), 87 | test=dict( 88 | type=dataset_type, 89 | ann_file=data_root + 'annotations/image_info_test-dev2017.json', 90 | img_prefix=data_root + 'test2017/', 91 | img_scale=(511, 511), 92 | img_norm_cfg=img_norm_cfg, 93 | size_divisor=None, 94 | flip_ratio=1, 95 | with_mask=False, 96 | with_crowd=False, 97 | with_label=False, 98 | test_mode=True, 99 | cornernet_mode=cornernet_mode, 100 | resize_keep_ratio=False)) 101 | # optimizer 102 | optimizer = dict(type='Adam', lr=0.00005) 103 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 104 | 105 | # learning policy 106 | # policy='fixed' 107 | lr_config = dict( 108 | policy='step', 109 | warmup='linear', 110 | warmup_iters=500, 111 | warmup_ratio=1.0 / 3, 112 | step=[190]) 113 | checkpoint_config = dict(interval=1) 114 | # yapf:disable 115 | log_config = dict( 116 | interval=50, 117 | hooks=[ 118 | dict(type='TextLoggerHook'), 119 | # dict(type='TensorboardLoggerHook') 120 | ]) 121 | # yapf:enable 122 | # runtime settings 123 | total_epochs = 210 124 | device_ids = range(8) 125 | dist_params = dict(backend='nccl') 126 | log_level = 'INFO' 127 | work_dir = './work_dirs/centripetalnet_mask_hg104' 128 | resume_from = None 129 | load_from = None 130 | workflow = [('train', 1)] 131 | 132 | -------------------------------------------------------------------------------- /src/datasets/coco.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from pycocotools.coco import COCO 3 | 4 | from .custom import CustomDataset 5 | 6 | 7 | class CocoDataset(CustomDataset): 8 | 9 | CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 10 | 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant', 11 | 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog', 12 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 13 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 14 | 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat', 15 | 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket', 16 | 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 17 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 18 | 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 19 | 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', 20 | 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', 21 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 22 | 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush') 23 | 24 | def load_annotations(self, ann_file): 25 | self.coco = COCO(ann_file) 26 | self.cat_ids = self.coco.getCatIds() 27 | self.cat2label = { 28 | cat_id: i + 1 29 | for i, cat_id in enumerate(self.cat_ids) 30 | } 31 | self.img_ids = self.coco.getImgIds() 32 | img_infos = [] 33 | for i in self.img_ids: 34 | info = self.coco.loadImgs([i])[0] 35 | info['filename'] = info['file_name'] 36 | img_infos.append(info) 37 | return img_infos 38 | 39 | def get_ann_info(self, idx): 40 | img_id = self.img_infos[idx]['id'] 41 | ann_ids = self.coco.getAnnIds(imgIds=[img_id]) 42 | ann_info = self.coco.loadAnns(ann_ids) 43 | return self._parse_ann_info(ann_info, self.with_mask) 44 | 45 | def _filter_imgs(self, min_size=32): 46 | """Filter images too small or without ground truths.""" 47 | valid_inds = [] 48 | ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values()) 49 | for i, img_info in enumerate(self.img_infos): 50 | if self.img_ids[i] not in ids_with_ann: 51 | continue 52 | if min(img_info['width'], img_info['height']) >= min_size: 53 | valid_inds.append(i) 54 | return valid_inds 55 | 56 | def _parse_ann_info(self, ann_info, with_mask=True): 57 | """Parse bbox and mask annotation. 58 | 59 | Args: 60 | ann_info (list[dict]): Annotation info of an image. 61 | with_mask (bool): Whether to parse mask annotations. 62 | 63 | Returns: 64 | dict: A dict containing the following keys: bboxes, bboxes_ignore, 65 | labels, masks, mask_polys, poly_lens. 66 | """ 67 | gt_bboxes = [] 68 | gt_labels = [] 69 | gt_bboxes_ignore = [] 70 | # Two formats are provided. 71 | # 1. mask: a binary map of the same size of the image. 72 | # 2. polys: each mask consists of one or several polys, each poly is a 73 | # list of float. 74 | if with_mask: 75 | gt_masks = [] 76 | gt_mask_polys = [] 77 | gt_poly_lens = [] 78 | for i, ann in enumerate(ann_info): 79 | if ann.get('ignore', False): 80 | continue 81 | x1, y1, w, h = ann['bbox'] 82 | if ann['area'] <= 0 or w < 1 or h < 1: 83 | continue 84 | bbox = [x1, y1, x1 + w - 1, y1 + h - 1] 85 | if ann['iscrowd']: 86 | gt_bboxes_ignore.append(bbox) 87 | else: 88 | gt_bboxes.append(bbox) 89 | gt_labels.append(self.cat2label[ann['category_id']]) 90 | if with_mask and not ann['iscrowd']: 91 | gt_masks.append(self.coco.annToMask(ann)) 92 | mask_polys = [ 93 | p for p in ann['segmentation'] if len(p) >= 6 94 | ] # valid polygons have >= 3 points (6 coordinates) 95 | poly_lens = [len(p) for p in mask_polys] 96 | gt_mask_polys.append(mask_polys) 97 | gt_poly_lens.extend(poly_lens) 98 | if gt_bboxes: 99 | gt_bboxes = np.array(gt_bboxes, dtype=np.float32) 100 | gt_labels = np.array(gt_labels, dtype=np.int64) 101 | else: 102 | gt_bboxes = np.zeros((0, 4), dtype=np.float32) 103 | gt_labels = np.array([], dtype=np.int64) 104 | 105 | if gt_bboxes_ignore: 106 | gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32) 107 | else: 108 | gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32) 109 | 110 | ann = dict( 111 | bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore) 112 | 113 | if with_mask: 114 | ann['masks'] = gt_masks 115 | # poly format is not used in the current implementation 116 | ann['mask_polys'] = gt_mask_polys 117 | ann['poly_lens'] = gt_poly_lens 118 | return ann 119 | -------------------------------------------------------------------------------- /src/core/corner/corner_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from random import randint 4 | from .kp_utils import gaussian_radius, draw_gaussian 5 | import math 6 | 7 | 8 | def corner_target(gt_bboxes, gt_labels, feats, imgscale, num_classes=80, direct=False, obj=False, scale=8.0, dcn=False): 9 | """ 10 | :param gt_bboxes: list of boxes (xmin, ymin, xmax, ymax) 11 | :param gt_labels: list of labels 12 | :param featsize: 13 | :return: 14 | """ 15 | b, _, h, w = feats.size() 16 | im_h, im_w = imgscale 17 | 18 | width_ratio = float(w / im_w) 19 | height_ratio = float(h / im_h) 20 | 21 | gt_tl_corner_heatmap = np.zeros([b, num_classes, h, w]) * 1.0 22 | gt_br_corner_heatmap = np.zeros([b, num_classes, h, w]) * 1.0 23 | 24 | gt_tl_obj = np.zeros([b, 1, h, w]) * 1.0 25 | gt_br_obj = np.zeros([b, 1, h, w]) * 1.0 26 | 27 | gt_tl_off_c = np.zeros([b, 2, h, w]) * 1.0 28 | gt_br_off_c = np.zeros([b, 2, h, w]) * 1.0 29 | 30 | gt_tl_off_c2 = np.zeros([b, 2, h, w]) * 1.0 31 | gt_br_off_c2 = np.zeros([b, 2, h, w]) * 1.0 32 | 33 | 34 | gt_tl_offsets = np.zeros([b, 2, h, w]) * 1.0 35 | gt_br_offsets = np.zeros([b, 2, h, w]) * 1.0 36 | 37 | 38 | for b_id in range(b): 39 | #match = [] 40 | for box_id in range(len(gt_labels[b_id])): 41 | tl_x, tl_y, br_x, br_y = gt_bboxes[b_id][box_id] 42 | c_x = (tl_x + br_x)/2.0 43 | c_y = (tl_y + br_y)/2.0 44 | 45 | label = gt_labels[b_id][box_id] # label is between(1,80) 46 | 47 | ftlx = float(tl_x * width_ratio) 48 | fbrx = float(br_x * width_ratio) 49 | ftly = float(tl_y * height_ratio) 50 | fbry = float(br_y * height_ratio) 51 | fcx = float(c_x * width_ratio) 52 | fcy = float(c_y * height_ratio) 53 | 54 | 55 | #tl_x_idx = int(min(ftlx, w - 1)) 56 | #br_x_idx = int(min(fbrx, w - 1)) 57 | #tl_y_idx = int(min(ftly, h - 1)) 58 | #br_y_idx = int(min(fbry, h - 1)) 59 | tl_x_idx = int(ftlx) 60 | br_x_idx = int(fbrx) 61 | tl_y_idx = int(ftly) 62 | br_y_idx = int(fbry) 63 | 64 | width = float(br_x - tl_x) 65 | height = float(br_y - tl_y) 66 | 67 | width = math.ceil(width * width_ratio) 68 | height = math.ceil(height * height_ratio) 69 | 70 | radius = gaussian_radius((height, width), min_overlap=0.3) 71 | radius = max(0, int(radius)) 72 | # radius = 10 73 | 74 | draw_gaussian(gt_tl_corner_heatmap[b_id, label.long() - 1], [tl_x_idx, tl_y_idx], radius)#, mode='tl') 75 | draw_gaussian(gt_br_corner_heatmap[b_id, label.long() - 1], [br_x_idx, br_y_idx], radius)#, mode='br') 76 | draw_gaussian(gt_tl_obj[b_id, 0], [tl_x_idx, tl_y_idx], radius) 77 | draw_gaussian(gt_br_obj[b_id, 0], [br_x_idx, br_y_idx], radius) 78 | 79 | # gt_tl_corner_heatmap[b_id, label.long()-1, tl_x_idx.long(), tl_y_idx.long()] += 1 80 | # gt_br_corner_heatmap[b_id, label.long()-1, br_x_idx.long(), br_y_idx.long()] += 1 81 | 82 | tl_x_offset = ftlx - tl_x_idx 83 | tl_y_offset = ftly - tl_y_idx 84 | br_x_offset = fbrx - br_x_idx 85 | br_y_offset = fbry - br_y_idx 86 | 87 | if direct: 88 | tl_x_off_c = (fcx - tl_x_idx)/scale 89 | tl_y_off_c = (fcy - tl_y_idx)/scale 90 | br_x_off_c = (br_x_idx - fcx)/scale 91 | br_y_off_c = (br_y_idx - fcy)/scale 92 | else: 93 | tl_x_off_c = np.log(fcx - ftlx) 94 | tl_y_off_c = np.log(fcy - ftly) 95 | br_x_off_c = np.log(fbrx - fcx) 96 | br_y_off_c = np.log(fbry - fcy) 97 | 98 | gt_tl_offsets[b_id, 0, tl_y_idx, tl_x_idx] = tl_x_offset 99 | gt_tl_offsets[b_id, 1, tl_y_idx, tl_x_idx] = tl_y_offset 100 | gt_br_offsets[b_id, 0, br_y_idx, br_x_idx] = br_x_offset 101 | gt_br_offsets[b_id, 1, br_y_idx, br_x_idx] = br_y_offset 102 | 103 | gt_tl_off_c[b_id, 0, tl_y_idx, tl_x_idx] = tl_x_off_c 104 | gt_tl_off_c[b_id, 1, tl_y_idx, tl_x_idx] = tl_y_off_c 105 | gt_br_off_c[b_id, 0, br_y_idx, br_x_idx] = br_x_off_c 106 | gt_br_off_c[b_id, 1, br_y_idx, br_x_idx] = br_y_off_c 107 | 108 | gt_tl_off_c2[b_id, 0, tl_y_idx, tl_x_idx] = np.log(fcx - ftlx) 109 | gt_tl_off_c2[b_id, 1, tl_y_idx, tl_x_idx] = np.log(fcy - ftly) 110 | gt_br_off_c2[b_id, 0, br_y_idx, br_x_idx] = np.log(fbrx - fcx) 111 | gt_br_off_c2[b_id, 1, br_y_idx, br_x_idx] = np.log(fbry - fcy) 112 | gt_tl_corner_heatmap = torch.from_numpy(gt_tl_corner_heatmap).type_as(feats) 113 | gt_br_corner_heatmap = torch.from_numpy(gt_br_corner_heatmap).type_as(feats) 114 | gt_tl_obj = torch.from_numpy(gt_tl_obj).type_as(feats) 115 | gt_br_obj = torch.from_numpy(gt_br_obj).type_as(feats) 116 | gt_tl_off_c = torch.from_numpy(gt_tl_off_c).type_as(feats) 117 | gt_br_off_c = torch.from_numpy(gt_br_off_c).type_as(feats) 118 | gt_tl_off_c2 = torch.from_numpy(gt_tl_off_c2).type_as(feats) 119 | gt_br_off_c2 = torch.from_numpy(gt_br_off_c2).type_as(feats) 120 | gt_tl_offsets = torch.from_numpy(gt_tl_offsets).type_as(feats) 121 | gt_br_offsets = torch.from_numpy(gt_br_offsets).type_as(feats) 122 | 123 | if obj: 124 | return gt_tl_obj, gt_br_obj, gt_tl_corner_heatmap, gt_br_corner_heatmap, gt_tl_offsets, gt_br_offsets, gt_tl_off_c, gt_br_off_c 125 | else: 126 | if not dcn: 127 | return gt_tl_corner_heatmap, gt_br_corner_heatmap, gt_tl_offsets, gt_br_offsets, gt_tl_off_c, gt_br_off_c 128 | else: 129 | return gt_tl_corner_heatmap, gt_br_corner_heatmap, gt_tl_offsets, gt_br_offsets, gt_tl_off_c, gt_br_off_c, gt_tl_off_c2, gt_br_off_c2 130 | 131 | -------------------------------------------------------------------------------- /src/models/detectors/centripetal.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import mmcv 4 | import cv2 5 | from .base import BaseDetector 6 | from .. import builder 7 | from ..registry import DETECTORS 8 | from mmdet.core import bbox2result 9 | from collections import OrderedDict 10 | from mmcv.runner import get_dist_info 11 | import numpy as np 12 | import json 13 | from .test_mixins import MaskTestMixin_kpt 14 | from numpy.random import randint 15 | 16 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 17 | 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant', 18 | 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog', 19 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 20 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 21 | 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat', 22 | 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket', 23 | 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 24 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 25 | 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 26 | 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', 27 | 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', 28 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 29 | 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'] 30 | 31 | @DETECTORS.register_module 32 | class CentripetalNet(BaseDetector, MaskTestMixin_kpt): 33 | 34 | def __init__(self, 35 | backbone, 36 | neck=None, 37 | bbox_head=None, 38 | train_cfg=None, 39 | test_cfg=None, 40 | pretrained=None): 41 | super(CentripetalNet, self).__init__() 42 | self.backbone = builder.build_backbone(backbone) 43 | if neck is not None: 44 | self.neck = builder.build_neck(neck) 45 | self.bbox_head = builder.build_head(bbox_head) 46 | self.train_cfg = train_cfg 47 | self.test_cfg = test_cfg 48 | self.init_weights(pretrained=pretrained) 49 | if self.bbox_head.with_mask: 50 | self.mask_head = True 51 | 52 | def init_weights(self, pretrained=None): 53 | super(CentripetalNet, self).init_weights(pretrained) 54 | self.backbone.init_weights(pretrained=pretrained) 55 | if self.with_neck: 56 | if isinstance(self.neck, nn.Sequential): 57 | for m in self.neck: 58 | m.init_weights() 59 | else: 60 | self.neck.init_weights() 61 | self.bbox_head.init_weights() 62 | 63 | def extract_feat(self, img): 64 | x = self.backbone(img) 65 | if self.with_neck: 66 | x = self.neck(x) 67 | return x 68 | 69 | def forward_train(self, img, img_metas, gt_bboxes, gt_labels, gt_masks): 70 | """ 71 | :param img: 72 | :param img_metas: 73 | :param gt_bboxes: (xmin, ymin, xmax, ymax) 74 | :param gt_labels: 75 | :return: 76 | """ 77 | _,_,h,w = img.size() 78 | imgscale = (h,w) 79 | x = self.extract_feat(img) 80 | outs = self.bbox_head(x) 81 | loss_inputs = outs + (gt_bboxes, gt_labels, gt_masks, img_metas, self.train_cfg,imgscale) 82 | losses = self.bbox_head.loss(*loss_inputs) 83 | return losses 84 | 85 | def simple_test(self, img, img_meta, rescale=False): 86 | 87 | x = self.extract_feat(img) 88 | outs = self.bbox_head(x) 89 | bbox_inputs = outs + (img_meta, self.test_cfg, rescale) 90 | bbox_list = self.bbox_head.get_bboxes(*bbox_inputs) 91 | bbox_results = [ 92 | bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes+1) 93 | for det_bboxes, det_labels in bbox_list 94 | ] 95 | return bbox_results[0] 96 | 97 | # def aug_test_old(self, imgs, img_meta, rescale=False): 98 | # imgs=torch.cat(imgs) 99 | # x = self.extract_feat(imgs) 100 | # outs = self.bbox_head(x) 101 | # bbox_inputs = outs + (img_meta, self.test_cfg, rescale) 102 | # bbox_list = self.bbox_head.get_bboxes(*bbox_inputs) 103 | # bbox_results = [ 104 | # bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes+1) 105 | # for det_bboxes, det_labels in bbox_list 106 | # ] 107 | # return bbox_results[0] 108 | 109 | def aug_test(self, imgs_l, img_meta, rescale=False, gt_bboxes=None, gt_labels=None, gt_masks=None, idx=None): 110 | 111 | img = imgs_l[0][0] 112 | img_n = img.squeeze().cpu().numpy() 113 | img_n = np.transpose(img_n, [1,2,0]) 114 | img_n -= img_n.min() 115 | img_n /= abs(img_n).max() 116 | img_n *= 255.0 117 | ms_results=[] 118 | bboxes = [] 119 | labels = [] 120 | for i in [0]: 121 | #for i in [0, 2, 4, 6, 8]: 122 | imgs = torch.cat(imgs_l[i:i+2]) 123 | x = self.extract_feat(imgs) 124 | outs = self.bbox_head(x) 125 | 126 | bbox_inputs = outs + (img_meta[i:i+2], self.test_cfg, rescale) 127 | bbox_list = self.bbox_head.get_bboxes(*bbox_inputs) 128 | 129 | ms_results.append(bbox_list) 130 | bboxes.append(bbox_list[0][0]) 131 | labels.append(bbox_list[0][1]) 132 | 133 | detections = torch.cat(bboxes)#.cpu().numpy() 134 | labels = torch.cat(labels)#.cpu().numpy() 135 | 136 | bbox_list = [(detections, labels)] 137 | 138 | bbox_results = [ 139 | bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes+1) 140 | for det_bboxes, det_labels in bbox_list 141 | ] 142 | return bbox_results[0] 143 | 144 | 145 | -------------------------------------------------------------------------------- /src/models/backbones/hourglass.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from mmdet.models.registry import BACKBONES 4 | 5 | 6 | class convolution(nn.Module): 7 | def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True): 8 | super(convolution, self).__init__() 9 | 10 | pad = (k - 1) // 2 # What is K? kernel?? 11 | self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn) 12 | self.bn = nn.BatchNorm2d(out_dim) if with_bn else nn.Sequential() 13 | self.relu = nn.ReLU(inplace=True) 14 | 15 | def forward(self, x): 16 | conv = self.conv(x) 17 | bn = self.bn(conv) 18 | relu = self.relu(bn) 19 | return relu 20 | 21 | class residual(nn.Module): 22 | def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True): 23 | super(residual, self).__init__() 24 | 25 | self.conv1 = nn.Conv2d(inp_dim, out_dim, (3, 3), padding=(1, 1), stride=(stride, stride), bias=False) 26 | self.bn1 = nn.BatchNorm2d(out_dim) 27 | self.relu1 = nn.ReLU(inplace=True) 28 | 29 | self.conv2 = nn.Conv2d(out_dim, out_dim, (3, 3), padding=(1, 1), bias=False) 30 | self.bn2 = nn.BatchNorm2d(out_dim) 31 | 32 | self.skip = nn.Sequential( 33 | nn.Conv2d(inp_dim, out_dim, (1, 1), stride=(stride, stride), bias=False), 34 | nn.BatchNorm2d(out_dim) 35 | ) if stride != 1 or inp_dim != out_dim else nn.Sequential() 36 | self.relu = nn.ReLU(inplace=True) 37 | 38 | def forward(self, x): 39 | conv1 = self.conv1(x) 40 | bn1 = self.bn1(conv1) 41 | relu1 = self.relu1(bn1) 42 | 43 | conv2 = self.conv2(relu1) 44 | bn2 = self.bn2(conv2) 45 | 46 | skip = self.skip(x) 47 | return self.relu(bn2 + skip) 48 | 49 | class MergeUp(nn.Module): 50 | def forward(self, up1, up2): 51 | return up1 + up2 52 | 53 | def make_merge_layer(dim): 54 | return MergeUp() 55 | 56 | def make_layer(k, inp_dim, out_dim, modules, layer=convolution, **kwargs): # layer is choosed from conv/fc/res 57 | layers = [layer(k, inp_dim, out_dim, **kwargs)] 58 | for _ in range(1, modules): 59 | layers.append(layer(k, out_dim, out_dim, **kwargs)) 60 | return nn.Sequential(*layers) 61 | 62 | 63 | def make_layer_revr(k, inp_dim, out_dim, modules, layer=convolution, **kwargs): 64 | layers = [] 65 | for _ in range(modules - 1): 66 | layers.append(layer(k, inp_dim, inp_dim, **kwargs)) 67 | layers.append(layer(k, inp_dim, out_dim, **kwargs)) 68 | return nn.Sequential(*layers) 69 | 70 | #def make_pool_layer(dim): 71 | # return nn.MaxPool2d(kernel_size=2, stride=2) 72 | 73 | def make_pool_layer(dim): 74 | return nn.Sequential() 75 | 76 | def make_unpool_layer(dim, trans_conv=False): 77 | if not trans_conv: 78 | return nn.Upsample(scale_factor=2) 79 | else: 80 | return nn.ConvTranspose2d(dim, dim, kernel_size=4, stride=2, padding=1) 81 | 82 | def make_kp_layer(cnv_dim, curr_dim, out_dim): 83 | return nn.Sequential( 84 | convolution(3, cnv_dim, curr_dim, with_bn=False), 85 | nn.Conv2d(curr_dim, out_dim, (1, 1)) 86 | ) 87 | 88 | def make_hg_layer(kernel, dim0, dim1, mod, layer=convolution, **kwargs): 89 | layers = [layer(kernel, dim0, dim1, stride=2)] 90 | layers += [layer(kernel, dim1, dim1) for _ in range(mod - 1)] 91 | return nn.Sequential(*layers) 92 | 93 | def make_inter_layer(dim): 94 | return residual(3, dim, dim) 95 | 96 | def make_cnv_layer(inp_dim, out_dim): 97 | return convolution(3, inp_dim, out_dim) 98 | 99 | 100 | 101 | class kp_module(nn.Module): 102 | def __init__( 103 | self, n, dims, modules, layer=residual, trans_conv=False, 104 | make_up_layer=make_layer, make_low_layer=make_layer,#what does up and low mean? 105 | make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr, 106 | make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer, 107 | make_merge_layer=make_merge_layer, **kwargs 108 | ): 109 | super(kp_module, self).__init__() 110 | 111 | self.n = n 112 | 113 | curr_mod = modules[0] 114 | next_mod = modules[1] 115 | 116 | curr_dim = dims[0] 117 | next_dim = dims[1] 118 | 119 | self.up1 = make_up_layer(#make_layer 120 | 3, curr_dim, curr_dim, curr_mod, 121 | layer=layer, **kwargs 122 | ) 123 | self.max1 = make_pool_layer(curr_dim) 124 | self.low1 = make_hg_layer( 125 | 3, curr_dim, next_dim, curr_mod, 126 | layer=layer, **kwargs 127 | ) 128 | #a recurse defination, interesting. 129 | self.low2 = kp_module( 130 | n - 1, dims[1:], modules[1:], layer=layer, 131 | make_up_layer=make_up_layer, 132 | make_low_layer=make_low_layer, 133 | make_hg_layer=make_hg_layer, 134 | make_hg_layer_revr=make_hg_layer_revr, 135 | make_pool_layer=make_pool_layer, 136 | make_unpool_layer=make_unpool_layer, 137 | make_merge_layer=make_merge_layer, 138 | **kwargs 139 | ) if self.n > 1 else \ 140 | make_low_layer( 141 | 3, next_dim, next_dim, next_mod, 142 | layer=layer, **kwargs 143 | ) 144 | self.low3 = make_hg_layer_revr( 145 | 3, next_dim, curr_dim, curr_mod, 146 | layer=layer, **kwargs 147 | ) 148 | self.up2 = make_unpool_layer(curr_dim, trans_conv) 149 | 150 | self.merge = make_merge_layer(curr_dim) 151 | 152 | def forward(self, x): 153 | up1 = self.up1(x) 154 | max1 = self.max1(x) 155 | low1 = self.low1(max1) 156 | low2 = self.low2(low1) 157 | low3 = self.low3(low2) 158 | up2 = self.up2(low3) 159 | return self.merge(up1, up2) 160 | 161 | 162 | 163 | """ 164 | n = 5 165 | dims = [256, 256, 384, 384, 384, 512] 166 | modules = [2, 2, 2, 2, 2, 4] 167 | out_dim = 80 168 | """ 169 | 170 | @BACKBONES.register_module 171 | class Hourglass(nn.Module): 172 | def __init__( 173 | self, n, nstack, dims, modules, out_dim, pre=None, cnv_dim=256,trans_conv=False, 174 | make_cnv_layer = make_cnv_layer, 175 | make_up_layer=make_layer, make_low_layer=make_layer, 176 | make_hg_layer=make_hg_layer, make_hg_layer_revr=make_layer_revr, 177 | make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer, 178 | make_merge_layer=make_merge_layer, make_inter_layer=make_inter_layer, 179 | kp_layer=residual 180 | ): 181 | super(Hourglass, self).__init__() 182 | 183 | self.nstack = nstack 184 | 185 | curr_dim = dims[0] 186 | 187 | self.pre = nn.Sequential( 188 | convolution(7, 3, 128, stride=2), 189 | residual(3, 128, 256, stride=2) 190 | ) if pre is None else pre 191 | 192 | self.hg_modules = nn.ModuleList([ 193 | kp_module( 194 | n, dims, modules, trans_conv=trans_conv, layer=kp_layer, 195 | make_up_layer=make_up_layer, 196 | make_low_layer=make_low_layer, 197 | make_hg_layer=make_hg_layer, 198 | make_hg_layer_revr=make_hg_layer_revr, 199 | make_pool_layer=make_pool_layer, 200 | make_unpool_layer=make_unpool_layer, 201 | make_merge_layer=make_merge_layer 202 | ) for _ in range(nstack)#nstack is 2 in the Corner paper 203 | ]) 204 | 205 | self.inters = nn.ModuleList([ 206 | make_inter_layer(curr_dim) for _ in range(nstack - 1) 207 | ]) 208 | 209 | self.inters_ = nn.ModuleList([ 210 | nn.Sequential( 211 | nn.Conv2d(curr_dim, curr_dim, (1, 1), bias=False), 212 | nn.BatchNorm2d(curr_dim) 213 | ) for _ in range(nstack - 1) 214 | ]) 215 | 216 | self.cnvs = nn.ModuleList([ 217 | make_cnv_layer(curr_dim, cnv_dim) for _ in range(nstack) 218 | ]) 219 | 220 | self.cnvs_ = nn.ModuleList([ 221 | nn.Sequential( 222 | nn.Conv2d(cnv_dim, curr_dim, (1, 1), bias=False), 223 | nn.BatchNorm2d(curr_dim) 224 | ) for _ in range(nstack - 1) 225 | ]) 226 | 227 | self.relu = nn.ReLU(inplace=True) 228 | 229 | def init_weights(self, pretrained=None): 230 | pass 231 | 232 | def forward(self, x): 233 | inter = self.pre(x) 234 | layers = zip( 235 | self.hg_modules, self.cnvs, 236 | ) 237 | outs = [] 238 | 239 | #inter = self.hg_modules[0](inter) 240 | for ind, layer in enumerate(layers): 241 | hg_, cnv_ = layer[0:2] 242 | 243 | hg = hg_(inter) 244 | cnv = cnv_(hg) 245 | outs.append(cnv) 246 | 247 | if ind < self.nstack - 1: 248 | inter = self.inters_[ind](inter) + self.cnvs_[ind](cnv) 249 | inter = self.relu(inter) 250 | inter = self.inters[ind](inter) 251 | #outs.append(inter) 252 | #outs.append(cnv) 253 | 254 | return outs 255 | 256 | 257 | if __name__=='__main__': 258 | n = 5 259 | dims = [256, 256, 384, 384, 384, 512] 260 | modules = [2, 2, 2, 2, 2, 4] 261 | out_dim = 80 262 | 263 | model = Hourglass(n=n, nstack=2,dims=dims, modules=modules, out_dim=out_dim ).cuda() 264 | img = torch.rand(4,3,511, 511).cuda() 265 | out = model(img) 266 | 267 | import pdb 268 | pdb.set_trace() 269 | 270 | -------------------------------------------------------------------------------- /src/models/detectors/test_mixins.py: -------------------------------------------------------------------------------- 1 | from mmdet.core import (bbox2roi, bbox_mapping, merge_aug_proposals, 2 | merge_aug_bboxes, merge_aug_masks, multiclass_nms) 3 | import numpy as np 4 | import cv2 5 | import pycocotools.mask as mask_util 6 | import pdb 7 | 8 | class RPNTestMixin(object): 9 | 10 | def simple_test_rpn(self, x, img_meta, rpn_test_cfg): 11 | rpn_outs = self.rpn_head(x) 12 | proposal_inputs = rpn_outs + (img_meta, rpn_test_cfg) 13 | proposal_list = self.rpn_head.get_bboxes(*proposal_inputs) 14 | return proposal_list 15 | 16 | def aug_test_rpn(self, feats, img_metas, rpn_test_cfg): 17 | imgs_per_gpu = len(img_metas[0]) 18 | aug_proposals = [[] for _ in range(imgs_per_gpu)] 19 | for x, img_meta in zip(feats, img_metas): 20 | proposal_list = self.simple_test_rpn(x, img_meta, rpn_test_cfg) 21 | for i, proposals in enumerate(proposal_list): 22 | aug_proposals[i].append(proposals) 23 | # after merging, proposals will be rescaled to the original image size 24 | merged_proposals = [ 25 | merge_aug_proposals(proposals, img_meta, rpn_test_cfg) 26 | for proposals, img_meta in zip(aug_proposals, img_metas) 27 | ] 28 | return merged_proposals 29 | 30 | 31 | class BBoxTestMixin(object): 32 | 33 | def simple_test_bboxes(self, 34 | x, 35 | img_meta, 36 | proposals, 37 | rcnn_test_cfg, 38 | rescale=False): 39 | """Test only det bboxes without augmentation.""" 40 | rois = bbox2roi(proposals) 41 | roi_feats = self.bbox_roi_extractor( 42 | x[:len(self.bbox_roi_extractor.featmap_strides)], rois) 43 | cls_score, bbox_pred = self.bbox_head(roi_feats) 44 | img_shape = img_meta[0]['img_shape'] 45 | scale_factor = img_meta[0]['scale_factor'] 46 | det_bboxes, det_labels = self.bbox_head.get_det_bboxes( 47 | rois, 48 | cls_score, 49 | bbox_pred, 50 | img_shape, 51 | scale_factor, 52 | rescale=rescale, 53 | cfg=rcnn_test_cfg) 54 | return det_bboxes, det_labels 55 | 56 | def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): 57 | aug_bboxes = [] 58 | aug_scores = [] 59 | for x, img_meta in zip(feats, img_metas): 60 | # only one image in the batch 61 | img_shape = img_meta[0]['img_shape'] 62 | scale_factor = img_meta[0]['scale_factor'] 63 | flip = img_meta[0]['flip'] 64 | # TODO more flexible 65 | proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, 66 | scale_factor, flip) 67 | rois = bbox2roi([proposals]) 68 | # recompute feature maps to save GPU memory 69 | roi_feats = self.bbox_roi_extractor( 70 | x[:len(self.bbox_roi_extractor.featmap_strides)], rois) 71 | cls_score, bbox_pred = self.bbox_head(roi_feats) 72 | bboxes, scores = self.bbox_head.get_det_bboxes( 73 | rois, 74 | cls_score, 75 | bbox_pred, 76 | img_shape, 77 | scale_factor, 78 | rescale=False, 79 | cfg=None) 80 | aug_bboxes.append(bboxes) 81 | aug_scores.append(scores) 82 | # after merging, bboxes will be rescaled to the original image size 83 | merged_bboxes, merged_scores = merge_aug_bboxes( 84 | aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) 85 | det_bboxes, det_labels = multiclass_nms( 86 | merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, 87 | rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) 88 | return det_bboxes, det_labels 89 | 90 | 91 | class MaskTestMixin(object): 92 | 93 | def simple_test_mask(self, 94 | x, 95 | img_meta, 96 | det_bboxes, 97 | det_labels, 98 | rescale=False): 99 | # image shape of the first image in the batch (only one) 100 | ori_shape = img_meta[0]['ori_shape'] 101 | scale_factor = img_meta[0]['scale_factor'] 102 | if det_bboxes.shape[0] == 0: 103 | segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] 104 | else: 105 | # if det_bboxes is rescaled to the original image size, we need to 106 | # rescale it back to the testing scale to obtain RoIs. 107 | _bboxes = (det_bboxes[:, :4] * scale_factor 108 | if rescale else det_bboxes) 109 | mask_rois = bbox2roi([_bboxes]) 110 | mask_feats = self.mask_roi_extractor( 111 | x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois) 112 | mask_pred = self.mask_head(mask_feats) 113 | segm_result = self.mask_head.get_seg_masks( 114 | mask_pred, _bboxes, det_labels, self.test_cfg.rcnn, ori_shape, 115 | scale_factor, rescale) 116 | return segm_result 117 | 118 | def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels): 119 | if det_bboxes.shape[0] == 0: 120 | segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] 121 | else: 122 | aug_masks = [] 123 | for x, img_meta in zip(feats, img_metas): 124 | img_shape = img_meta[0]['img_shape'] 125 | scale_factor = img_meta[0]['scale_factor'] 126 | flip = img_meta[0]['flip'] 127 | _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, 128 | scale_factor, flip) 129 | mask_rois = bbox2roi([_bboxes]) 130 | mask_feats = self.mask_roi_extractor( 131 | x[:len(self.mask_roi_extractor.featmap_strides)], 132 | mask_rois) 133 | mask_pred = self.mask_head(mask_feats) 134 | # convert to numpy array to save memory 135 | aug_masks.append(mask_pred.sigmoid().cpu().numpy()) 136 | merged_masks = merge_aug_masks(aug_masks, img_metas, 137 | self.test_cfg.rcnn) 138 | 139 | ori_shape = img_metas[0][0]['ori_shape'] 140 | segm_result = self.mask_head.get_seg_masks( 141 | merged_masks, 142 | det_bboxes, 143 | det_labels, 144 | self.test_cfg.rcnn, 145 | ori_shape, 146 | scale_factor=1.0, 147 | rescale=False) 148 | return segm_result 149 | 150 | 151 | class MaskTestMixin_kpt(object): 152 | 153 | def simple_test_mask(self, 154 | score_map, 155 | corner_offsets, 156 | img_meta, 157 | det_bboxes, 158 | rescale=False): 159 | ''' 160 | :param semantic_map: semantic map hxwx80 161 | :param img_meta: 162 | :param det_bboxes: 163 | :param rescale: 164 | :return: 165 | ''' 166 | # TODO: solve hardcode 167 | semantic_map = (score_map>0.4).astype('int') 168 | h, w, _ = semantic_map.shape 169 | instance_map = -np.ones_like(semantic_map) 170 | border_y, border_x = -img_meta['offset'] 171 | ori_h, ori_w, _ = img_meta['ori_shape'] 172 | _, img_h, img_w = img_meta['img_shape'] 173 | 174 | for label, bboxes in enumerate(det_bboxes): 175 | #keepinds = (bboxes[...,-1]>0.4) 176 | #bboxes = bboxes[keepinds] 177 | if (len(bboxes)==0) or (semantic_map[...,label].sum()==0): 178 | continue 179 | centers = np.array(bboxes)[...,:4] 180 | centers[..., 0::2] += border_x 181 | centers[..., 1::2] += border_y 182 | pixels = semantic_map[..., label] 183 | 184 | #pdb.set_trace() 185 | if len(bboxes) == 1: 186 | instance_map[..., label] = pixels - 1 187 | else: 188 | for y in range(h): 189 | for x in range(w): 190 | if pixels[y, x] == 0: 191 | continue 192 | tl_x = 4 * (x + corner_offsets[label, y, x]) - 1 193 | tl_y = 4 * (y + corner_offsets[label + 80, y, x]) - 1 194 | br_x = 4 * (x + corner_offsets[label + 160, y, x]) - 1 195 | br_y = 4 * (y + corner_offsets[label + 240, y, x]) - 1 196 | #pdb.set_trace() 197 | instance_map[y, x, label] = KNN_cluster(centers, np.array([tl_x, tl_y, br_x, br_y])) 198 | 199 | #seg_maps = [] 200 | cls_segms = [[] for _ in range(80)] 201 | 202 | for label in range(80): 203 | map_with_id = instance_map[..., label] 204 | if map_with_id.max() == -1: 205 | continue 206 | 207 | for ins_id in range(map_with_id.max()+1): 208 | seg_map = (map_with_id == ins_id).astype('float32') 209 | seg_map *= score_map[...,label] 210 | seg_map = cv2.resize(seg_map, (img_w, img_h)) 211 | seg_map = (seg_map>0.4).astype('int') 212 | #seg_map = seg_map[border_y:border_y + ori_h, border_x:border_x + ori_w] 213 | if seg_map.sum()==0: 214 | continue 215 | seg_map = np.uint8(seg_map) 216 | 217 | rle = mask_util.encode(np.array(seg_map[:, :, np.newaxis], order='F'))[0] 218 | #rle['counts'].decode() 219 | #cls_segms[label].append(rle) 220 | cls_segms[label].append(seg_map) 221 | #pdb.set_trace() 222 | return cls_segms 223 | 224 | 225 | 226 | def KNN_cluster(centers, x): 227 | ''' 228 | :param centers: Nxd 229 | :param x: d 230 | :return: cluster id 231 | ''' 232 | return ((x - centers) ** 2).sum(1).argmin() 233 | -------------------------------------------------------------------------------- /src/datasets/transforms.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import torch 4 | import cv2 5 | 6 | __all__ = ['MaskTransform_cornernet', 'ImageTransform', 'ImageTransform_cornernet', 'BboxTransform', 'BboxTransform_cornernet', 'MaskTransform', 'Numpy2Tensor'] 7 | 8 | 9 | class ImageTransform(object): 10 | """ 11 | Preprocess an image. 12 | 1. rescale the image to expected size 13 | 2. normalize the image 14 | 3. flip the image (if needed) 15 | 4. pad the image (if needed) 16 | 5. transpose to (c, h, w) 17 | """ 18 | 19 | def __init__(self, 20 | mean=(0, 0, 0), 21 | std=(1, 1, 1), 22 | pixel_scale=1, 23 | to_rgb=True, 24 | size_divisor=None): 25 | self.mean = np.array(mean, dtype=np.float32) 26 | self.std = np.array(std, dtype=np.float32) 27 | self.pixel_scale = pixel_scale 28 | self.to_rgb = to_rgb 29 | self.size_divisor = size_divisor 30 | 31 | def __call__(self, img, scale, flip=False, keep_ratio=True, crop=False): 32 | if keep_ratio: 33 | img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) 34 | else: 35 | img, w_scale, h_scale = mmcv.imresize(img, scale, return_scale=True) 36 | scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) 37 | img_shape = img.shape 38 | img = img * float(self.pixel_scale) 39 | img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) 40 | if flip: 41 | img = mmcv.imflip(img) 42 | if self.size_divisor is not None: 43 | img = mmcv.impad_to_multiple(img, self.size_divisor) 44 | pad_shape = img.shape 45 | else: 46 | pad_shape = img_shape 47 | img = img.transpose(2, 0, 1) 48 | return img, img_shape, pad_shape, scale_factor 49 | 50 | class ImageTransform_cornernet(object): 51 | """Preprocess an image. 52 | 53 | 1. rescale the image to expected size 54 | 2. normalize the image 55 | 3. flip the image (if needed) 56 | 4. pad the image (if needed) 57 | 5. transpose to (c, h, w) 58 | """ 59 | 60 | def __init__(self, 61 | mean=(0, 0, 0), 62 | std=(1, 1, 1), 63 | pixel_scale=1, 64 | to_rgb=True, 65 | size_divisor=None): 66 | self.mean = np.array(mean, dtype=np.float32) 67 | self.std = np.array(std, dtype=np.float32) 68 | self.pixel_scale = pixel_scale 69 | self.to_rgb = to_rgb 70 | self.size_divisor = size_divisor 71 | 72 | def __call__(self, img, scale, flip=False, keep_ratio=True, crop=False): 73 | if crop: 74 | h, w, c = img.shape 75 | 76 | nh = int(h*scale) 77 | nw = int(w*scale) 78 | img = mmcv.imresize(img, (nw, nh)) 79 | h, w, c = img.shape 80 | 81 | inp_h = h | 127 82 | inp_w = w | 127 83 | center = np.array([h // 2, w // 2]) 84 | if flip: 85 | img = mmcv.imflip(img) 86 | img, border, offset = crop_image(img, center, [inp_h, inp_w]) 87 | img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) 88 | img = img.transpose(2, 0, 1) 89 | 90 | return img, border, offset 91 | 92 | '''if keep_ratio: 93 | img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) 94 | else: 95 | img, w_scale, h_scale = mmcv.imresize( 96 | img, scale, return_scale=True) 97 | scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], 98 | dtype=np.float32) 99 | img_shape = img.shape''' 100 | #img = mmcv.imnormalize(img, np.array((0, 0, 0), dtype=np.float32), np.array((1.0/float(self.pixel_scale), 1.0/float(self.pixel_scale), 1.0/float(self.pixel_scale)), dtype=np.float32), False) 101 | #img = img * float(self.pixel_scale) 102 | h, w, _ = img.shape 103 | img = mmcv.imresize(img,(511,511)) 104 | ratio = 511.0/float(h) 105 | img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) 106 | if flip: 107 | img = mmcv.imflip(img) 108 | '''if self.size_divisor is not None: 109 | img = mmcv.impad_to_multiple(img, self.size_divisor) 110 | pad_shape = img.shape 111 | else: 112 | pad_shape = img_shape''' 113 | img = img.transpose(2, 0, 1) 114 | #return img, (511, 511, 3), ratio#, pad_shape, scale_factor 115 | return img, (511, 511, 3), None, ratio 116 | 117 | def crop_image(image, center, size): 118 | cty, ctx = center 119 | height, width = size 120 | im_height, im_width = image.shape[0:2] 121 | cropped_image = np.zeros((height, width, 3), dtype=np.float32) 122 | cropped_image[:, :, 0] += 103.53 123 | cropped_image[:, :, 1] += 116.28 124 | cropped_image[:, :, 2] += 123.68 125 | 126 | x0, x1 = max(0, ctx - width // 2), min(ctx + width // 2, im_width) 127 | y0, y1 = max(0, cty - height // 2), min(cty + height // 2, im_height) 128 | 129 | left, right = ctx - x0, x1 - ctx 130 | top, bottom = cty - y0, y1 - cty 131 | 132 | cropped_cty, cropped_ctx = height // 2, width // 2 133 | y_slice = slice(cropped_cty - top, cropped_cty + bottom) 134 | x_slice = slice(cropped_ctx - left, cropped_ctx + right) 135 | cropped_image[y_slice, x_slice, :] = image[y0:y1, x0:x1, :] 136 | 137 | border = np.array([ 138 | cropped_cty - top, 139 | cropped_cty + bottom, 140 | cropped_ctx - left, 141 | cropped_ctx + right 142 | ], dtype=np.float32) 143 | 144 | offset = np.array([ 145 | cty - height // 2, 146 | ctx - width // 2 147 | ]) 148 | 149 | return cropped_image, border, offset 150 | 151 | 152 | def bbox_flip(bboxes, img_shape): 153 | """Flip bboxes horizontally. 154 | 155 | Args: 156 | bboxes(ndarray): shape (..., 4*k) 157 | img_shape(tuple): (height, width) 158 | """ 159 | assert bboxes.shape[-1] % 4 == 0 160 | w = img_shape[1] 161 | flipped = bboxes.copy() 162 | flipped[..., 0::4] = w - bboxes[..., 2::4] - 1 163 | flipped[..., 2::4] = w - bboxes[..., 0::4] - 1 164 | return flipped 165 | 166 | 167 | class BboxTransform_cornernet(object): 168 | """Preprocess gt bboxes. 169 | 170 | 1. rescale bboxes according to image size 171 | 2. flip bboxes (if needed) 172 | 3. pad the first dimension to `max_num_gts` 173 | """ 174 | 175 | def __init__(self, max_num_gts=None): 176 | self.max_num_gts = max_num_gts 177 | 178 | def __call__(self, bboxes, img_shape, scale_factor, flip=False): 179 | bboxes = np.array(bboxes) 180 | gt_bboxes = bboxes * scale_factor 181 | if flip: 182 | gt_bboxes = bbox_flip(gt_bboxes, img_shape) 183 | return gt_bboxes 184 | '''if len(gt_bboxes)>0: 185 | #try: 186 | gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1]) 187 | #except IndexError: 188 | # raise AssertionError(gt_bboxes) 189 | gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0]) 190 | if self.max_num_gts is None: 191 | return gt_bboxes 192 | else: 193 | num_gts = gt_bboxes.shape[0] 194 | padded_bboxes = np.zeros((self.max_num_gts, 4), dtype=np.float32) 195 | padded_bboxes[:num_gts, :] = gt_bboxes 196 | return padded_bboxes''' 197 | 198 | 199 | class BboxTransform(object): 200 | """Preprocess gt bboxes. 201 | 202 | 1. rescale bboxes according to image size 203 | 2. flip bboxes (if needed) 204 | 3. pad the first dimension to `max_num_gts` 205 | """ 206 | 207 | def __init__(self, max_num_gts=None): 208 | self.max_num_gts = max_num_gts 209 | 210 | def __call__(self, bboxes, img_shape, scale_factor, flip=False): 211 | bboxes = np.array(bboxes) 212 | gt_bboxes = bboxes * scale_factor 213 | if flip: 214 | gt_bboxes = bbox_flip(gt_bboxes, img_shape) 215 | if len(gt_bboxes)>0: 216 | #try: 217 | gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1]) 218 | #except IndexError: 219 | # raise AssertionError(gt_bboxes) 220 | gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0]) 221 | if self.max_num_gts is None: 222 | return gt_bboxes 223 | else: 224 | num_gts = gt_bboxes.shape[0] 225 | padded_bboxes = np.zeros((self.max_num_gts, 4), dtype=np.float32) 226 | padded_bboxes[:num_gts, :] = gt_bboxes 227 | return padded_bboxes 228 | 229 | 230 | class MaskTransform(object): 231 | """Preprocess masks. 232 | 233 | 1. resize masks to expected size and stack to a single array 234 | 2. flip the masks (if needed) 235 | 3. pad the masks (if needed) 236 | """ 237 | 238 | def __call__(self, masks, pad_shape, scale_factor, flip=False): 239 | masks = [ 240 | mmcv.imrescale(mask, scale_factor, interpolation='nearest') 241 | for mask in masks 242 | ] 243 | if flip: 244 | masks = [mask[:, ::-1] for mask in masks] 245 | padded_masks = [ 246 | mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks 247 | ] 248 | padded_masks = np.stack(padded_masks, axis=0) 249 | return padded_masks 250 | 251 | class MaskTransform_cornernet(object): 252 | def __call__(self, masks, new_scale, flip=False): 253 | masks = [mmcv.imrescale(mask, new_scale, interpolation='nearest') 254 | for mask in masks] 255 | #print(masks[0].shape) 256 | if flip: 257 | masks = [mask[:, ::-1] for mask in masks] 258 | 259 | return masks 260 | 261 | class Numpy2Tensor(object): 262 | 263 | def __init__(self): 264 | pass 265 | 266 | def __call__(self, *args): 267 | if len(args) == 1: 268 | return torch.from_numpy(args[0]) 269 | else: 270 | return tuple([torch.from_numpy(np.array(array)) for array in args]) 271 | -------------------------------------------------------------------------------- /src/core/corner/kp_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import pdb 5 | import cv2 6 | from mmcv.runner import get_dist_info 7 | import mmcv 8 | 9 | def _gather_feat(feat, ind, mask=None): 10 | dim = feat.size(2) 11 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) 12 | feat = feat.gather(1, ind) 13 | if mask is not None: 14 | mask = mask.unsqueeze(2).expand_as(feat) 15 | feat = feat[mask] 16 | feat = feat.view(-1, dim) 17 | return feat 18 | 19 | 20 | def _nms(heat, kernel=1): # kernel size is 3 in the paper 21 | pad = (kernel - 1) // 2 22 | 23 | hmax = nn.functional.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad) 24 | keep = (hmax == heat).float() 25 | return heat * keep 26 | 27 | 28 | def _tranpose_and_gather_feat(feat, ind): 29 | feat = feat.permute(0, 2, 3, 1).contiguous() 30 | feat = feat.view(feat.size(0), -1, feat.size(3)) # why flatten the feature maps? 31 | feat = _gather_feat(feat, ind) 32 | return feat 33 | 34 | 35 | def _topk(scores, K=20): 36 | batch, cat, height, width = scores.size() # cat is the num of categories 37 | 38 | topk_scores, topk_inds = torch.topk(scores.view(batch, -1), K) 39 | 40 | topk_clses = (topk_inds / (height * width)).int() 41 | 42 | topk_inds = topk_inds % (height * width) 43 | topk_ys = (topk_inds / width).int().float() 44 | topk_xs = (topk_inds % width).int().float() 45 | return topk_scores, topk_inds, topk_clses, topk_ys, topk_xs 46 | 47 | def _decode_center( 48 | tl_heat, br_heat, tl_off_c, br_off_c, tl_regr, br_regr, img_meta, 49 | scale_factor=None, rescale=False, obj=False, direct=False, 50 | linear_factor=8.0, K=100, kernel=3, ae_threshold=0.05, num_dets=1000 51 | ): 52 | batch, cat, height, width = tl_heat.size() 53 | _, inp_h, inp_w = img_meta['img_shape'] 54 | 55 | if not obj: 56 | tl_heat = torch.sigmoid(tl_heat) 57 | br_heat = torch.sigmoid(br_heat) 58 | 59 | # perform nms on heatmaps 60 | tl_heat = _nms(tl_heat, kernel=kernel) 61 | br_heat = _nms(br_heat, kernel=kernel) 62 | 63 | if direct: 64 | tl_off_c *= linear_factor 65 | br_off_c *= linear_factor 66 | else: 67 | tl_off_c = torch.exp(tl_off_c) 68 | br_off_c = torch.exp(br_off_c) 69 | 70 | tl_scores, tl_inds, tl_clses, tl_ys, tl_xs = _topk(tl_heat, K=K) 71 | br_scores, br_inds, br_clses, br_ys, br_xs = _topk(br_heat, K=K) 72 | tl_ys1 = tl_ys.view(batch, K, 1) 73 | tl_xs1 = tl_xs.view(batch, K, 1) 74 | br_ys1 = br_ys.view(batch, 1, K) 75 | br_xs1 = br_xs.view(batch, 1, K) 76 | 77 | tl_ys = tl_ys1.expand(batch, K, K) # expand for combine all possible boxes 78 | tl_xs = tl_xs1.expand(batch, K, K) 79 | br_ys = br_ys1.expand(batch, K, K) 80 | br_xs = br_xs1.expand(batch, K, K) 81 | 82 | if tl_regr is not None and br_regr is not None: 83 | tl_off_c = _tranpose_and_gather_feat(tl_off_c, tl_inds) 84 | br_off_c = _tranpose_and_gather_feat(br_off_c, br_inds) 85 | tl_off_c = tl_off_c.view(batch, K, 1, 2) 86 | br_off_c = br_off_c.view(batch, 1, K, 2) 87 | 88 | tl_regr = _tranpose_and_gather_feat(tl_regr, tl_inds) 89 | tl_regr = tl_regr.view(batch, K, 1, 2) 90 | br_regr = _tranpose_and_gather_feat(br_regr, br_inds) 91 | br_regr = br_regr.view(batch, 1, K, 2) 92 | 93 | tl_cxs = tl_xs + tl_off_c[..., 0] + tl_regr[..., 0] 94 | tl_cys = tl_ys + tl_off_c[..., 1] + tl_regr[..., 1] 95 | br_cxs = br_xs - br_off_c[..., 0] + br_regr[..., 0] 96 | br_cys = br_ys - br_off_c[..., 1] + br_regr[..., 1] 97 | 98 | tl_xs = tl_xs + tl_regr[..., 0] 99 | tl_ys = tl_ys + tl_regr[..., 1] 100 | br_xs = br_xs + br_regr[..., 0] 101 | br_ys = br_ys + br_regr[..., 1] 102 | 103 | 104 | # all possible boxes based on top k corners (ignoring class) 105 | tl_xs *= (inp_w / width) 106 | tl_ys *= (inp_h / height) 107 | br_xs *= (inp_w / width) 108 | br_ys *= (inp_h / height) 109 | 110 | tl_cxs *= (inp_w / width) 111 | tl_cys *= (inp_h / height) 112 | br_cxs *= (inp_w / width) 113 | br_cys *= (inp_h / height) 114 | 115 | x_off = img_meta['border'][2] 116 | y_off = img_meta['border'][0] 117 | 118 | tl_xs -= torch.Tensor([x_off]).type_as(tl_xs) 119 | tl_ys -= torch.Tensor([y_off]).type_as(tl_ys) 120 | br_xs -= torch.Tensor([x_off]).type_as(br_xs) 121 | br_ys -= torch.Tensor([y_off]).type_as(br_ys) 122 | 123 | tl_xs *= tl_xs.gt(0.0).type_as(tl_xs) 124 | tl_ys *= tl_ys.gt(0.0).type_as(tl_ys) 125 | br_xs *= br_xs.gt(0.0).type_as(br_xs) 126 | br_ys *= br_ys.gt(0.0).type_as(br_ys) 127 | 128 | tl_cxs -= torch.Tensor([x_off]).type_as(tl_cxs) 129 | tl_cys -= torch.Tensor([y_off]).type_as(tl_cys) 130 | br_cxs -= torch.Tensor([x_off]).type_as(br_cxs) 131 | br_cys -= torch.Tensor([y_off]).type_as(br_cys) 132 | 133 | tl_cxs *= tl_cxs.gt(0.0).type_as(tl_cxs) 134 | tl_cys *= tl_cys.gt(0.0).type_as(tl_cys) 135 | br_cxs *= br_cxs.gt(0.0).type_as(br_cxs) 136 | br_cys *= br_cys.gt(0.0).type_as(br_cys) 137 | 138 | bboxes = torch.stack((tl_xs, tl_ys, br_xs, br_ys), dim=3) 139 | 140 | group_bboxes = torch.stack((tl_xs, tl_ys, br_xs, br_ys), dim=3) 141 | centers = torch.stack((tl_cxs, tl_cys, br_cxs, br_cys), dim=3) 142 | cre = torch.zeros_like(centers) 143 | area_bbox = torch.abs(br_xs - tl_xs )*torch.abs(tl_ys - br_ys ) + 1e-16 144 | 145 | ns = torch.ones_like(area_bbox)*2.1#.6 146 | l_idxs = area_bbox>3500#22500 147 | ns[l_idxs]=2.4 148 | 149 | cre[...,0] = ((ns+1)*group_bboxes[...,0] + (ns-1)*group_bboxes[...,2])/(2*ns) 150 | cre[...,1] = ((ns+1)*group_bboxes[...,1] + (ns-1)*group_bboxes[...,3])/(2*ns) 151 | cre[...,2] = ((ns-1)*group_bboxes[...,0] + (ns+1)*group_bboxes[...,2])/(2*ns) 152 | cre[...,3] = ((ns-1)*group_bboxes[...,1] + (ns+1)*group_bboxes[...,3])/(2*ns) 153 | 154 | area_center = torch.abs(br_cxs - tl_cxs)*torch.abs(tl_cys - br_cys) 155 | #area_bbox = torch.abs(br_xs - tl_xs )*torch.abs(tl_ys - br_ys ) + 1e-16 156 | area_cre = torch.abs(cre[...,0] - cre[...,2])*torch.abs(cre[...,1] - cre[...,3]) 157 | dists = area_center/area_cre#area_bbox 158 | 159 | tl_cx_inds = ((centers[...,0]<=cre[...,0]) | (centers[...,0]>=cre[...,2]))#.unsqueeze(0) 160 | tl_cy_inds = ((centers[...,1]<=cre[...,1]) | (centers[...,1]>=cre[...,3]))#.unsqueeze(0) 161 | br_cx_inds = ((centers[...,2]<=cre[...,0]) | (centers[...,2]>=cre[...,2]))#.unsqueeze(0) 162 | br_cy_inds = ((centers[...,3]<=cre[...,1]) | (centers[...,3]>=cre[...,3]))#.unsqueeze(0) 163 | 164 | ctr_inds = (tl_cx_inds | tl_cy_inds) & (br_cx_inds | br_cy_inds) 165 | 166 | tl_scores = tl_scores.view(batch, K, 1).expand(batch, K, K) 167 | br_scores = br_scores.view(batch, 1, K).expand(batch, K, K) 168 | scores = (tl_scores + br_scores) / 2 # scores for all possible boxes 169 | 170 | # reject boxes based on classes 171 | tl_clses = tl_clses.view(batch, K, 1).expand(batch, K, K) 172 | br_clses = br_clses.view(batch, 1, K).expand(batch, K, K) 173 | cls_inds = (tl_clses != br_clses) # tl and br should have the same class 174 | 175 | # reject boxes based on distances 176 | dist_inds = (dists > ae_threshold) 177 | 178 | # reject boxes based on widths and heights 179 | # tl should be upper and lefter than br 180 | width_inds = (br_xs < tl_xs) 181 | height_inds = (br_ys < tl_ys) 182 | 183 | scores[cls_inds] = -1 184 | scores[width_inds] = -1 185 | scores[height_inds] = -1 186 | scores[tl_cx_inds] = -1 187 | scores[tl_cy_inds] = -1 188 | scores[br_cx_inds] = -1 189 | scores[br_cy_inds] = -1 190 | 191 | scores = scores.view(batch, -1) 192 | scores, inds = torch.topk(scores, num_dets) 193 | scores = scores.unsqueeze(2) 194 | 195 | bboxes = bboxes.view(batch, -1, 4) 196 | bboxes = _gather_feat(bboxes, inds) 197 | 198 | clses = tl_clses.contiguous().view(batch, -1, 1) 199 | clses = _gather_feat(clses, inds).float() 200 | 201 | tl_scores = tl_scores.contiguous().view(batch, -1, 1) 202 | tl_scores = _gather_feat(tl_scores, inds).float() 203 | br_scores = br_scores.contiguous().view(batch, -1, 1) 204 | br_scores = _gather_feat(br_scores, inds).float() 205 | 206 | return bboxes, scores, clses 207 | 208 | 209 | def _neg_loss(preds, gt): 210 | pos_inds = gt.eq(1) 211 | neg_inds = gt.lt(1) 212 | # 213 | neg_weights = torch.pow(1 - gt[neg_inds], 4) 214 | # 215 | loss = 0 216 | for pred in preds: 217 | pos_pred = pred[pos_inds] 218 | neg_pred = pred[neg_inds] 219 | # 220 | pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) 221 | neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights 222 | # 223 | num_pos = pos_inds.float().sum() 224 | pos_loss = pos_loss.sum() 225 | neg_loss = neg_loss.sum() 226 | # 227 | # avoid the error when num_pos is zero 228 | if pos_pred.nelement() == 0: 229 | loss = loss - neg_loss 230 | else: 231 | loss = loss - (pos_loss + neg_loss) / num_pos 232 | return loss 233 | 234 | 235 | def _sigmoid(x): 236 | x = torch.clamp(x.sigmoid_(), min=1e-4, max=1 - 1e-4) 237 | return x 238 | 239 | 240 | def _ae_loss(tag0, tag1, mask): # mask means only consider the loss of positive corner 241 | num = mask.sum(dim=1, keepdim=True).float() 242 | tag0 = tag0.squeeze() 243 | tag1 = tag1.squeeze() 244 | # 245 | tag_mean = (tag0 + tag1) / 2 246 | # 247 | tag0 = torch.pow(tag0 - tag_mean, 2) / (num + 1e-4) 248 | tag0 = tag0[mask].sum() 249 | tag1 = torch.pow(tag1 - tag_mean, 2) / (num + 1e-4) 250 | tag1 = tag1[mask].sum() 251 | pull = tag0 + tag1 # this is pull loss, smaller means tag0 and tag1 are more similiar 252 | # 253 | mask = mask.unsqueeze(1) + mask.unsqueeze(2) 254 | mask = mask.eq(2) 255 | num = num.unsqueeze(2) 256 | num2 = (num - 1) * num 257 | dist = tag_mean.unsqueeze(1) - tag_mean.unsqueeze(2) 258 | dist = 1 - torch.abs(dist) 259 | dist = nn.functional.relu(dist, inplace=True) 260 | dist = dist - 1 / (num + 1e-4) 261 | dist = dist / (num2 + 1e-4) 262 | dist = dist[mask] 263 | push = dist.sum() 264 | return pull, push 265 | 266 | 267 | def _regr_loss(regr, gt_regr, mask): # regression loss 268 | num = mask.float().sum() 269 | mask = mask.unsqueeze(2).expand_as(gt_regr) 270 | 271 | regr = regr[mask] 272 | gt_regr = gt_regr[mask] 273 | 274 | regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False) 275 | regr_loss = regr_loss / (num + 1e-4) 276 | return regr_loss 277 | 278 | 279 | def gaussian2D(shape, sigma=1): 280 | m, n = [(ss - 1.) / 2. for ss in shape] 281 | y, x = np.ogrid[-m:m+1,-n:n+1] 282 | # 283 | h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) 284 | 285 | h[h < np.finfo(h.dtype).eps * h.max()] = 0 286 | return h 287 | 288 | def draw_gaussian(heatmap, center, radius, k=1): 289 | diameter = 2 * radius + 1 290 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) 291 | x, y = center 292 | # 293 | height, width = heatmap.shape[0:2] 294 | #process the border 295 | left, right = min(x, radius), min(width - x, radius + 1) 296 | top, bottom = min(y, radius), min(height - y, radius + 1) 297 | # 298 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] 299 | masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right] 300 | np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) 301 | 302 | def gaussian_radius(det_size, min_overlap): 303 | height, width = det_size 304 | 305 | a1 = 1 306 | b1 = (height + width) 307 | c1 = width * height * (1 - min_overlap) / (1 + min_overlap) 308 | sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1) 309 | r1 = (b1 - sq1) / (2 * a1) 310 | 311 | a2 = 4 312 | b2 = 2 * (height + width) 313 | c2 = (1 - min_overlap) * width * height 314 | sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2) 315 | r2 = (b2 - sq2) / (2 * a2) 316 | 317 | a3 = 4 * min_overlap 318 | b3 = -2 * min_overlap * (height + width) 319 | c3 = (min_overlap - 1) * width * height 320 | sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3) 321 | r3 = (b3 + sq3) / (2 * a3) 322 | return min(r1, r2, r3) 323 | 324 | -------------------------------------------------------------------------------- /src/datasets/extra_aug.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | from numpy import random 4 | import pdb 5 | 6 | from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps 7 | 8 | 9 | class PhotoMetricDistortion(object): 10 | 11 | def __init__(self, 12 | brightness_delta=32, 13 | contrast_range=(0.5, 1.5), 14 | saturation_range=(0.5, 1.5), 15 | hue_delta=18): 16 | self.brightness_delta = brightness_delta 17 | self.contrast_lower, self.contrast_upper = contrast_range 18 | self.saturation_lower, self.saturation_upper = saturation_range 19 | self.hue_delta = hue_delta 20 | 21 | def __call__(self, img, boxes, labels): 22 | # random brightness 23 | if random.randint(2): 24 | delta = random.uniform(-self.brightness_delta, 25 | self.brightness_delta) 26 | img += delta 27 | 28 | # mode == 0 --> do random contrast first 29 | # mode == 1 --> do random contrast last 30 | mode = random.randint(2) 31 | if mode == 1: 32 | if random.randint(2): 33 | alpha = random.uniform(self.contrast_lower, 34 | self.contrast_upper) 35 | img *= alpha 36 | 37 | # convert color from BGR to HSV 38 | img = mmcv.bgr2hsv(img) 39 | 40 | # random saturation 41 | if random.randint(2): 42 | img[..., 1] *= random.uniform(self.saturation_lower, 43 | self.saturation_upper) 44 | 45 | # random hue 46 | if random.randint(2): 47 | img[..., 0] += random.uniform(-self.hue_delta, self.hue_delta) 48 | img[..., 0][img[..., 0] > 360] -= 360 49 | img[..., 0][img[..., 0] < 0] += 360 50 | 51 | # convert color from HSV to BGR 52 | img = mmcv.hsv2bgr(img) 53 | 54 | # random contrast 55 | if mode == 0: 56 | if random.randint(2): 57 | alpha = random.uniform(self.contrast_lower, 58 | self.contrast_upper) 59 | img *= alpha 60 | 61 | # randomly swap channels 62 | if random.randint(2): 63 | img = img[..., random.permutation(3)] 64 | 65 | return img, boxes, labels 66 | 67 | 68 | class Expand(object): 69 | 70 | def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)): 71 | if to_rgb: 72 | self.mean = mean[::-1] 73 | else: 74 | self.mean = mean 75 | self.min_ratio, self.max_ratio = ratio_range 76 | 77 | def __call__(self, img, boxes, labels): 78 | if random.randint(2): 79 | return img, boxes, labels 80 | 81 | h, w, c = img.shape 82 | ratio = random.uniform(self.min_ratio, self.max_ratio) 83 | expand_img = np.full((int(h * ratio), int(w * ratio), c), 84 | self.mean).astype(img.dtype) 85 | left = int(random.uniform(0, w * ratio - w)) 86 | top = int(random.uniform(0, h * ratio - h)) 87 | expand_img[top:top + h, left:left + w] = img 88 | img = expand_img 89 | boxes += np.tile((left, top), 2) 90 | return img, boxes, labels 91 | 92 | 93 | class RandomCrop(object): 94 | 95 | def __init__(self, 96 | min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), 97 | min_crop_size=0.3): 98 | # 1: return ori img 99 | self.sample_mode = (1, *min_ious, 0) 100 | self.min_crop_size = min_crop_size 101 | 102 | def __call__(self, img, boxes, labels): 103 | h, w, c = img.shape 104 | while True: 105 | mode = random.choice(self.sample_mode) 106 | if mode == 1: 107 | return img, boxes, labels 108 | 109 | min_iou = mode 110 | for i in range(50): 111 | new_w = random.uniform(self.min_crop_size * w, w) 112 | new_h = random.uniform(self.min_crop_size * h, h) 113 | 114 | # h / w in [0.5, 2] 115 | if new_h / new_w < 0.5 or new_h / new_w > 2: 116 | continue 117 | 118 | left = random.uniform(w - new_w) 119 | top = random.uniform(h - new_h) 120 | 121 | patch = np.array((int(left), int(top), int(left + new_w), 122 | int(top + new_h))) 123 | overlaps = bbox_overlaps( 124 | patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1) 125 | if overlaps.min() < min_iou: 126 | continue 127 | 128 | # center of boxes should inside the crop img 129 | center = (boxes[:, :2] + boxes[:, 2:]) / 2 130 | mask = (center[:, 0] > patch[0]) * ( 131 | center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * ( 132 | center[:, 1] < patch[3]) 133 | if not mask.any(): 134 | continue 135 | boxes = boxes[mask] 136 | labels = labels[mask] 137 | 138 | # adjust boxes 139 | img = img[patch[1]:patch[3], patch[0]:patch[2]] 140 | boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:]) 141 | boxes[:, :2] = boxes[:, :2].clip(min=patch[:2]) 142 | boxes -= np.tile(patch[:2], 2) 143 | 144 | return img, boxes, labels 145 | 146 | def _get_border(border, size): 147 | i = 1 148 | while size - border // i <= border // i: 149 | i *= 2 150 | return border // i 151 | 152 | class KeepRatioCrop(object): 153 | 154 | def __init__(self, 155 | random_scales=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3),# 1.4), 156 | size=(511,511), border=128): 157 | self.random_scales = random_scales 158 | self.crop_size = size 159 | self.border = border 160 | 161 | def __call__(self, img, boxes, labels): 162 | h, w, c = img.shape 163 | while True: 164 | scale = random.choice(self.random_scales) 165 | new_h = int(self.crop_size[0] * scale) 166 | new_w = int(self.crop_size[1] * scale) 167 | h_border = _get_border(self.border, h) 168 | w_border = _get_border(self.border, w) 169 | 170 | for i in range(50): 171 | ctx = np.random.randint(low=w_border, high=w-w_border) 172 | cty = np.random.randint(low=h_border, high=h-h_border) 173 | 174 | x0, x1 = max(ctx - new_w // 2, 0), min(ctx + new_w // 2, w) 175 | y0, y1 = max(cty - new_h // 2, 0), min(cty + new_h // 2, h) 176 | patch = np.array((int(x0), int(y0), int(x1), int(y1))) 177 | 178 | center = (boxes[:, :2] + boxes[:, 2:]) / 2 179 | mask = (center[:, 0] > patch[0]) * ( 180 | center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * ( 181 | center[:, 1] < patch[3]) 182 | if not mask.any(): 183 | continue 184 | boxes = boxes[mask] 185 | labels = labels[mask] 186 | 187 | cropped_img = np.zeros((new_h, new_w, 3), dtype=img.dtype) 188 | cropped_img[:,:,0] += 103.53 189 | cropped_img[:,:,1] += 116.28 190 | cropped_img[:,:,2] += 123.68 191 | 192 | left_w, right_w = ctx - x0, x1 - ctx 193 | top_h, bottom_h = cty - y0, y1 - cty 194 | 195 | # crop image 196 | cropped_ctx, cropped_cty = new_w // 2, new_h // 2 197 | x_slice = slice(cropped_ctx - left_w, cropped_ctx + right_w) 198 | y_slice = slice(cropped_cty - top_h, cropped_cty + bottom_h) 199 | cropped_img[y_slice, x_slice, :] = img[y0:y1, x0:x1, :] 200 | 201 | # crop detections 202 | cropped_detections = boxes.copy() 203 | cropped_detections[:, 0:4:2] -= x0 204 | cropped_detections[:, 1:4:2] -= y0 205 | cropped_detections[:, 0:4:2] += cropped_ctx - left_w 206 | cropped_detections[:, 1:4:2] += cropped_cty - top_h 207 | #print(boxes.shape,'ori') 208 | 209 | cropped_detections, labels, keep_inds = _clip_detections(cropped_img, cropped_detections, labels) 210 | #print(cropped_detections.shape) 211 | 212 | #import pdb 213 | #pdb.set_trace() 214 | crop_args = (mask, keep_inds, new_h, new_w, y_slice, x_slice, x0, y0, x1, y1) 215 | 216 | return cropped_img, cropped_detections, labels, crop_args 217 | 218 | def _clip_detections(image, detections, labels): 219 | detections = detections.copy() 220 | height, width = image.shape[0:2] 221 | 222 | detections[:, 0:4:2] = np.clip(detections[:, 0:4:2], 0, width - 1) 223 | detections[:, 1:4:2] = np.clip(detections[:, 1:4:2], 0, height - 1) 224 | keep_inds = ((detections[:, 2] - detections[:, 0]) > 0) & \ 225 | ((detections[:, 3] - detections[:, 1]) > 0) 226 | detections = detections[keep_inds] 227 | labels = labels[keep_inds] 228 | return detections, labels, keep_inds 229 | 230 | 231 | class Noise(object): 232 | 233 | def __init__(self, mean=0, std=1, noise_ratio=0): 234 | self.mean = mean 235 | self.std = std 236 | self.noise_ratio = noise_ratio 237 | 238 | def __call__(self, img, boxes, labels): 239 | if np.random.uniform(0,1) > self.noise_ratio: 240 | return img, boxes, labels 241 | 242 | h, w, c = img.shape 243 | noise_value = np.random.normal(self.mean, self.std, img.shape) 244 | img = img + noise_value 245 | return img, boxes, labels 246 | 247 | class MaskCrop(object): 248 | def __call__(self, gt_masks, crop_args): 249 | ''' 250 | :param gt_masks: a list of gt masks(np.ararry) 251 | :param crop_args: 252 | :return: 253 | ''' 254 | keepinds1, keepinds2, new_h, new_w, y_slice, x_slice, x0, y0, x1, y1 = crop_args 255 | gt_masks = np.stack(gt_masks, 0) 256 | #print('mask shape', gt_masks.shape) 257 | #pdb.set_trace() 258 | gt_masks = gt_masks[keepinds1] 259 | gt_masks = gt_masks[keepinds2] 260 | crop_masks = np.zeros([len(gt_masks), new_h, new_w]) 261 | 262 | crop_masks[:, y_slice, x_slice] = gt_masks[:, y0:y1, x0:x1] 263 | 264 | return list(crop_masks) 265 | 266 | 267 | class ExtraAugmentation_cornernet(object): 268 | 269 | def __init__(self, 270 | photo_metric_distortion=None, 271 | expand=None, 272 | random_crop=None, 273 | noise=None): 274 | self.transforms = [] 275 | if photo_metric_distortion is not None: 276 | self.transforms.append( 277 | PhotoMetricDistortion(**photo_metric_distortion)) 278 | #if expand is not None: 279 | # self.transforms.append(Expand(**expand)) 280 | if random_crop is not None: 281 | self.transforms.append(KeepRatioCrop()) 282 | #self.transforms.append(RandomCrop(**random_crop)) 283 | if noise is not None: 284 | self.transforms.append(Noise(**noise)) 285 | 286 | def __call__(self, img, boxes, labels): 287 | img = img.astype(np.float32) 288 | for transform in self.transforms: 289 | if isinstance(transform, KeepRatioCrop): 290 | img, boxes, labels, crop_args = transform(img, boxes, labels) 291 | else: 292 | img, boxes, labels = transform(img, boxes, labels) 293 | return img, boxes, labels, crop_args 294 | 295 | 296 | class ExtraAugmentation(object): 297 | 298 | def __init__(self, 299 | photo_metric_distortion=None, 300 | expand=None, 301 | random_crop=None, 302 | noise=None): 303 | self.transforms = [] 304 | if photo_metric_distortion is not None: 305 | self.transforms.append( 306 | PhotoMetricDistortion(**photo_metric_distortion)) 307 | if expand is not None: 308 | self.transforms.append(Expand(**expand)) 309 | if random_crop is not None: 310 | #self.transforms.append(KeepRatioCrop()) 311 | self.transforms.append(RandomCrop(**random_crop)) 312 | if noise is not None: 313 | self.transforms.append(Noise(**noise)) 314 | 315 | def __call__(self, img, boxes, labels): 316 | img = img.astype(np.float32) 317 | for transform in self.transforms: 318 | img, boxes, labels = transform(img, boxes, labels) 319 | return img, boxes, labels 320 | -------------------------------------------------------------------------------- /src/datasets/custom.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import cv2 3 | 4 | import os.path as osp 5 | 6 | import mmcv 7 | import numpy as np 8 | from mmcv.parallel import DataContainer as DC 9 | from torch.utils.data import Dataset 10 | 11 | from .transforms import (ImageTransform, BboxTransform, MaskTransform, MaskTransform_cornernet, 12 | Numpy2Tensor, ImageTransform_cornernet, BboxTransform_cornernet) 13 | from .utils import to_tensor, random_scale 14 | from .extra_aug import ExtraAugmentation, ExtraAugmentation_cornernet, MaskCrop 15 | 16 | import cv2 17 | import random 18 | 19 | class CustomDataset(Dataset): 20 | """Custom dataset for detection. 21 | 22 | Annotation format: 23 | [ 24 | { 25 | 'filename': 'a.jpg', 26 | 'width': 1280, 27 | 'height': 720, 28 | 'ann': { 29 | 'bboxes': (n, 4), 30 | 'labels': (n, ), 31 | 'bboxes_ignore': (k, 4), 32 | 'labels_ignore': (k, 4) (optional field) 33 | } 34 | }, 35 | ... 36 | ] 37 | 38 | The `ann` field is optional for testing. 39 | """ 40 | 41 | CLASSES = None 42 | 43 | def __init__(self, 44 | ann_file, 45 | img_prefix, 46 | img_scale, 47 | img_norm_cfg, 48 | size_divisor=None, 49 | proposal_file=None, 50 | num_max_proposals=1000, 51 | flip_ratio=0, 52 | with_mask=False, 53 | with_crowd=True, 54 | with_label=True, 55 | with_triple_grey=False, #default no triple-grey op 56 | mixup=False, 57 | mixup_sampler=np.random.beta, 58 | mixup_args=[0.4,0.4], 59 | extra_aug=None, 60 | resize_keep_ratio=True, 61 | test_mode=False, 62 | cornernet_mode=False, 63 | with_maskhead=False, 64 | **kwargs): 65 | # prefix of images path 66 | self.img_prefix = img_prefix 67 | 68 | # load annotations (and proposals) 69 | self.img_infos = self.load_annotations(ann_file) 70 | if proposal_file is not None: 71 | self.proposals = self.load_proposals(proposal_file) 72 | else: 73 | self.proposals = None 74 | # filter images with no annotation during training 75 | if not test_mode: 76 | valid_inds = self._filter_imgs() 77 | self.img_infos = [self.img_infos[i] for i in valid_inds] 78 | if self.proposals is not None: 79 | self.proposals = [self.proposals[i] for i in valid_inds] 80 | 81 | # (long_edge, short_edge) or [(long1, short1), (long2, short2), ...] 82 | self.img_scales = img_scale if isinstance(img_scale, 83 | list) else [img_scale] 84 | assert mmcv.is_list_of(self.img_scales, tuple) 85 | # normalization configs 86 | self.img_norm_cfg = img_norm_cfg 87 | 88 | # max proposals per image 89 | self.num_max_proposals = num_max_proposals 90 | # flip ratio 91 | self.flip_ratio = flip_ratio 92 | assert flip_ratio >= 0 and flip_ratio <= 1 93 | # padding border to ensure the image size can be divided by 94 | # size_divisor (used for FPN) 95 | self.size_divisor = size_divisor 96 | 97 | # with mask or not (reserved field, takes no effect) 98 | self.with_mask = with_mask 99 | # some datasets provide bbox annotations as ignore/crowd/difficult, 100 | # if `with_crowd` is True, then these info is returned. 101 | self.with_crowd = with_crowd 102 | # with label is False for RPN 103 | self.with_label = with_label 104 | # in test mode or not 105 | self.test_mode = test_mode 106 | # if apply triple grey op on training imgs 107 | self.with_triple_grey=with_triple_grey 108 | # if apply mixup op on training imgs 109 | self.mixup=mixup 110 | self.mixup_sampler=mixup_sampler 111 | self.mixup_args=mixup_args 112 | 113 | # set group flag for the sampler 114 | if not self.test_mode: 115 | self._set_group_flag() 116 | # transforms 117 | self.cornernet_mode = cornernet_mode 118 | self.with_maskhead = with_maskhead 119 | 120 | if self.cornernet_mode: 121 | self.img_transform = ImageTransform_cornernet( 122 | size_divisor=self.size_divisor, **self.img_norm_cfg) 123 | self.bbox_transform = BboxTransform_cornernet() 124 | self.mask_transform = MaskTransform_cornernet() 125 | else: 126 | self.img_transform = ImageTransform( 127 | size_divisor=self.size_divisor, **self.img_norm_cfg) 128 | self.bbox_transform = BboxTransform() 129 | self.mask_transform = MaskTransform() 130 | 131 | #self.mask_transform = MaskTransform() 132 | self.numpy2tensor = Numpy2Tensor() 133 | 134 | # if use extra augmentation 135 | if extra_aug is not None: 136 | if self.cornernet_mode: 137 | self.extra_aug = ExtraAugmentation_cornernet(**extra_aug) 138 | self.mask_crop = MaskCrop() 139 | else: 140 | self.extra_aug = ExtraAugmentation(**extra_aug) 141 | else: 142 | self.extra_aug = None 143 | 144 | # image rescale if keep ratio 145 | self.resize_keep_ratio = resize_keep_ratio 146 | 147 | def __len__(self): 148 | return len(self.img_infos) 149 | 150 | def load_annotations(self, ann_file): 151 | return mmcv.load(ann_file) 152 | 153 | def load_proposals(self, proposal_file): 154 | return mmcv.load(proposal_file) 155 | 156 | def get_ann_info(self, idx): 157 | return self.img_infos[idx]['ann'] 158 | 159 | def _filter_imgs(self, min_size=32): 160 | """Filter images too small.""" 161 | valid_inds = [] 162 | for i, img_info in enumerate(self.img_infos): 163 | if min(img_info['width'], img_info['height']) >= min_size: 164 | valid_inds.append(i) 165 | return valid_inds 166 | 167 | def _set_group_flag(self): 168 | """Set flag according to image aspect ratio. 169 | 170 | Images with aspect ratio greater than 1 will be set as group 1, 171 | otherwise group 0. 172 | """ 173 | self.flag = np.zeros(len(self), dtype=np.uint8) 174 | for i in range(len(self)): 175 | img_info = self.img_infos[i] 176 | if img_info['width'] / img_info['height'] > 1: 177 | self.flag[i] = 1 178 | 179 | def _rand_another(self, idx): 180 | pool = np.where(self.flag == self.flag[idx])[0] 181 | return np.random.choice(pool) 182 | 183 | def __getitem__(self, idx): 184 | if self.test_mode: 185 | return self.prepare_test_img(idx) 186 | while True: 187 | data = self.prepare_train_img(idx) 188 | if data is None: 189 | idx = self._rand_another(idx) 190 | continue 191 | return data 192 | def prepare_train_img_(self,idx): 193 | img_info = self.img_infos[idx] 194 | if 'COCO_val2014_' in img_info['filename']: 195 | s = 13 196 | elif 'COCO_train2014_' in img_info['filename']: 197 | s = 15 198 | else: 199 | s = 0 200 | # load image 201 | img = mmcv.imread(osp.join(self.img_prefix, img_info['filename'][s:])) 202 | # load proposals if necessary 203 | if self.proposals is not None: 204 | proposals = self.proposals[idx][:self.num_max_proposals] 205 | # TODO: Handle empty proposals properly. Currently images with 206 | # no proposals are just ignored, but they can be used for 207 | # training in concept. 208 | if len(proposals) == 0: 209 | return None 210 | if not (proposals.shape[1] == 4 or proposals.shape[1] == 5): 211 | raise AssertionError( 212 | 'proposals should have shapes (n, 4) or (n, 5), ' 213 | 'but found {}'.format(proposals.shape)) 214 | if proposals.shape[1] == 5: 215 | scores = proposals[:, 4, None] 216 | proposals = proposals[:, :4] 217 | else: 218 | scores = None 219 | else: 220 | proposals = None 221 | scores = None 222 | 223 | ann = self.get_ann_info(idx) 224 | gt_bboxes = ann['bboxes'] 225 | gt_labels = ann['labels'] 226 | if self.with_crowd: 227 | gt_bboxes_ignore = ann['bboxes_ignore'] 228 | else: 229 | gt_bboxes_ignore = None 230 | # skip the image if there is no valid gt bbox 231 | if len(gt_bboxes) == 0: 232 | #official version: 233 | #return None 234 | 235 | gt_bboxes = [[0,0,0,0]] 236 | if self.with_label: 237 | gt_labels = [0] 238 | if self.extra_aug is not None: 239 | if self.cornernet_mode: 240 | img, gt_bboxes, gt_labels, crop_args = self.extra_aug(img, gt_bboxes, gt_labels) 241 | else: 242 | img, gt_bboxes, gt_labels = self.extra_aug(img, gt_bboxes, gt_labels) 243 | # apply transforms 244 | #first step of transform: convert color 245 | if self.with_triple_grey: 246 | if random.random()>=0.5: 247 | gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) 248 | img = cv2.merge([gray,gray,gray]) 249 | else: 250 | pass 251 | 252 | #after color convert,test img color 253 | 254 | flip = True if np.random.rand() < self.flip_ratio else False 255 | img_scale = random_scale(self.img_scales) # sample a scale 256 | if self.with_mask: 257 | gt_masks = ann['masks'] 258 | if self.cornernet_mode: 259 | gt_masks = self.mask_crop(gt_masks, crop_args) 260 | 261 | else: 262 | gt_masks = None 263 | 264 | return img_info, img, proposals, scores, gt_bboxes, gt_labels, flip, img_scale, gt_bboxes_ignore, gt_masks 265 | 266 | def prepare_train_img(self, idx): 267 | img_info, img, proposals, scores, gt_bboxes, gt_labels, flip, img_scale, gt_bboxes_ignore, gt_masks = self.prepare_train_img_(idx) 268 | if self.mixup: 269 | idx_ = self._rand_another(idx) 270 | img_info_, img_, proposals_, scores_, gt_bboxes_, gt_labels_, flip_, img_scale_, gt_bboxes_ignore_, gt_masks_ = self.prepare_train_img_(idx_) 271 | lambd = max(0, min(1, self.mixup_sampler(*self.mixup_args))) 272 | height = max(img_info['height'], img_info_['height']) 273 | width = max(img_info['width'], img_info_['width']) 274 | mix_img = np.zeros(shape=(height, width, 3), dtype='float32') 275 | mix_img[:img.shape[0], :img.shape[1], :] = img.astype('float32') * lambd 276 | mix_img[:img_.shape[0], :img_.shape[1], :] += img_.astype('float32') * (1. - lambd) 277 | 278 | gt_bboxes = np.vstack((gt_bboxes,gt_bboxes_)) 279 | if self.with_label: 280 | gt_labels = np.hstack((gt_labels,gt_labels_)) 281 | if self.with_crowd: 282 | gt_bboxes_ignore = np.vstack((gt_bboxes_ignore, gt_bboxes_ignore_)) 283 | if self.with_mask: 284 | gt_masks = np.vstack((gt_masks, gt_masks_)) 285 | img = mix_img 286 | 287 | img, img_shape, pad_shape, scale_factor = self.img_transform(img, img_scale, flip, keep_ratio=self.resize_keep_ratio) 288 | 289 | img = img.copy() 290 | 291 | gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor, flip) 292 | 293 | if self.with_crowd: 294 | gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape, 295 | scale_factor, flip) 296 | if self.with_mask: 297 | if self.cornernet_mode: 298 | if self.with_maskhead: 299 | gt_masks = self.mask_transform(gt_masks, img_shape[:2], flip) 300 | else: 301 | gt_masks = self.mask_transform(gt_masks, (128, 128), flip) 302 | else: 303 | gt_masks = self.mask_transform(gt_masks, pad_shape, scale_factor, flip) 304 | 305 | if self.mixup: 306 | ori_shape = (height, width, 3) 307 | else: 308 | ori_shape = (img_info['height'], img_info['width'], 3) 309 | img_meta = dict( 310 | ori_shape=ori_shape, 311 | img_shape=img_shape, 312 | pad_shape=pad_shape, 313 | scale_factor=scale_factor, 314 | flip=flip) 315 | data = dict( 316 | img=DC(to_tensor(img), stack=True), 317 | img_meta=DC(img_meta, cpu_only=True), 318 | gt_bboxes=DC(to_tensor(gt_bboxes))) 319 | if self.proposals is not None: 320 | data['proposals'] = DC(to_tensor(proposals)) 321 | if self.with_label: 322 | data['gt_labels'] = DC(to_tensor(gt_labels)) 323 | if self.with_crowd: 324 | data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore)) 325 | if self.with_mask: 326 | data['gt_masks'] = DC(gt_masks, cpu_only=True) 327 | return data 328 | 329 | 330 | def prepare_test_img(self, idx, gt=True):#keep ratio and padding to desired size 331 | """Prepare an image for testing (multi-scale and flipping)""" 332 | img_info = self.img_infos[idx] 333 | img = mmcv.imread(osp.join(self.img_prefix, img_info['filename'])) 334 | 335 | if gt: 336 | ann = self.get_ann_info(idx) 337 | gt_bboxes = ann['bboxes'] 338 | gt_labels = ann['labels'] 339 | if self.with_mask: 340 | gt_masks = ann['masks'] 341 | 342 | def prepare_single(img, scale, flip): 343 | _img, border, offset = self.img_transform( 344 | img, scale, flip, keep_ratio=self.resize_keep_ratio, crop=True) 345 | _img_meta = dict( 346 | ori_shape=(img_info['height'], img_info['width'], 3), 347 | img_shape=_img.shape, scale=scale, 348 | border=border, offset=offset, 349 | flip=flip) 350 | _img = to_tensor(_img) 351 | 352 | return _img, _img_meta 353 | 354 | imgs = [] 355 | img_metas = [] 356 | 357 | for scale in [1.0]: 358 | _img, _img_meta, = prepare_single(img, scale, False) 359 | imgs.append(_img) 360 | img_metas.append(DC(_img_meta, cpu_only=True)) 361 | 362 | if self.flip_ratio > 0: 363 | _img, _img_meta= prepare_single( 364 | img, scale, True) 365 | imgs.append(_img) 366 | img_metas.append(DC(_img_meta, cpu_only=True)) 367 | data = dict(img=imgs, img_meta=img_metas) 368 | if not self.with_mask: 369 | h, w = _img.shape[0:2] 370 | gt_masks = [np.zeros([h, w])] 371 | 372 | if len(gt_labels)==0: 373 | gt_labels = np.array([-1]) 374 | h,w=_img.shape[0:2] 375 | gt_masks = [np.zeros([h,w])] 376 | gt_bboxes = np.array([[0,0,0,0]]) 377 | if gt: 378 | data['gt_bboxes'] = gt_bboxes 379 | data['gt_labels'] = gt_labels 380 | data['gt_masks'] = gt_masks 381 | data['idx'] = idx 382 | return data 383 | -------------------------------------------------------------------------------- /src/models/bbox_heads/centripetal_mask.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import division 3 | 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from mmdet.core.corner.corner_target import corner_target 9 | from mmcv.cnn import normal_init 10 | 11 | from mmdet.ops import soft_nms, DeformConv, TopPool, BottomPool, LeftPool, RightPool 12 | from mmdet.core import smooth_l1_loss 13 | 14 | from mmdet.core.corner.kp_utils import _decode_center 15 | 16 | from ..registry import HEADS 17 | from ..utils import ConvModule 18 | 19 | 20 | @HEADS.register_module 21 | class Centripetal_mask(nn.Module): 22 | 23 | def __init__(self, 24 | num_classes, 25 | in_channels, with_mask=False): 26 | super(Centripetal_mask, self).__init__() 27 | self.num_classes = num_classes - 1 28 | self.in_channels = in_channels 29 | 30 | self.tl_out_channels = self.num_classes + 2 + 2 # 2 is the dim for offset map, as there are 2 coordinates, x,y 31 | self.br_out_channels = self.num_classes + 2 + 2 32 | 33 | self.convs = nn.ModuleList() 34 | self.mid_convs = nn.ModuleList() 35 | 36 | self.with_mask = with_mask 37 | 38 | self._init_layers() 39 | 40 | def _init_layers(self): 41 | 42 | self.tl_fadp = DeformConv(self.in_channels, self.in_channels, 3, 1, 1) 43 | self.br_fadp = DeformConv(self.in_channels, self.in_channels, 3, 1, 1) 44 | self.mid_tl_fadp = DeformConv(self.in_channels, self.in_channels, 3, 1, 1) 45 | self.mid_br_fadp = DeformConv(self.in_channels, self.in_channels, 3, 1, 1) 46 | 47 | self.tl_offset = nn.Conv2d(2, 18, 1, bias=False) 48 | self.br_offset = nn.Conv2d(2, 18, 1, bias=False) 49 | self.mid_tl_offset = nn.Conv2d(2, 18, 1, bias=False) 50 | self.mid_br_offset = nn.Conv2d(2, 18, 1, bias=False) 51 | 52 | self.tl_pool = TopLeftPool(self.in_channels) 53 | self.br_pool = BottomRightPool(self.in_channels) 54 | self.mid_tl_pool = TopLeftPool(self.in_channels) 55 | self.mid_br_pool = BottomRightPool(self.in_channels) 56 | 57 | self.tl_heat = make_kp_layer(out_dim=self.num_classes) 58 | self.br_heat = make_kp_layer(out_dim=self.num_classes) 59 | 60 | self.tl_off_c = make_kp_layer(out_dim=2) 61 | self.br_off_c = make_kp_layer(out_dim=2) 62 | 63 | self.tl_off_c_2 = make_kp_layer(out_dim=2) 64 | self.br_off_c_2 = make_kp_layer(out_dim=2) 65 | 66 | self.tl_off = make_kp_layer(out_dim=2) 67 | self.br_off = make_kp_layer(out_dim=2) 68 | 69 | # middle supervision 70 | 71 | self.mid_tl_heat = make_kp_layer(out_dim=self.num_classes) 72 | self.mid_br_heat = make_kp_layer(out_dim=self.num_classes) 73 | 74 | self.mid_tl_off_c = make_kp_layer(out_dim=2) 75 | self.mid_br_off_c = make_kp_layer(out_dim=2) 76 | 77 | self.mid_tl_off_c_2 = make_kp_layer(out_dim=2) 78 | self.mid_br_off_c_2 = make_kp_layer(out_dim=2) 79 | 80 | self.mid_tl_off = make_kp_layer(out_dim=2) 81 | self.mid_br_off = make_kp_layer(out_dim=2) 82 | 83 | if self.with_mask: 84 | for i in range(4): 85 | self.convs.append( 86 | ConvModule(self.in_channels, self.in_channels, 3, padding=1) 87 | ) 88 | self.mid_convs.append( 89 | ConvModule(self.in_channels, self.in_channels, 3, padding=1) 90 | ) 91 | 92 | self.conv_logits = nn.Conv2d(self.in_channels, 81, 1) 93 | self.mid_conv_logits = nn.Conv2d(self.in_channels, 81, 1) 94 | 95 | def init_weights(self): 96 | """ 97 | TODO: weight init method 98 | """ 99 | self.tl_heat[-1].bias.data.fill_(-2.19) 100 | self.br_heat[-1].bias.data.fill_(-2.19) 101 | self.mid_tl_heat[-1].bias.data.fill_(-2.19) 102 | self.mid_br_heat[-1].bias.data.fill_(-2.19) 103 | normal_init(self.tl_offset, std=0.1) 104 | normal_init(self.tl_fadp , std=0.01) 105 | normal_init(self.br_offset, std=0.1) 106 | normal_init(self.br_fadp , std=0.01) 107 | normal_init(self.mid_tl_offset, std=0.1) 108 | normal_init(self.mid_tl_fadp , std=0.01) 109 | normal_init(self.mid_br_offset, std=0.1) 110 | normal_init(self.mid_br_fadp , std=0.01) 111 | 112 | 113 | def forward_single(self, feats): 114 | '''tl_result = self.tl_branch(x) 115 | br_result = self.br_branch(x)''' 116 | x = feats[-1] 117 | mask = None 118 | mask_mid = None 119 | if self.with_mask: 120 | mask = x 121 | for conv in self.convs: 122 | mask = conv(mask) 123 | mask = self.conv_logits(mask) 124 | 125 | tl_pool = self.tl_pool(x) 126 | tl_heat = self.tl_heat(tl_pool) 127 | tl_off_c = self.tl_off_c(tl_pool) 128 | tl_off = self.tl_off(tl_pool) 129 | tl_offmap = self.tl_offset(tl_off_c.detach()) 130 | x_tl_fadp = self.tl_fadp(tl_pool, tl_offmap) 131 | tl_off_c_2= self.tl_off_c_2(x_tl_fadp) 132 | 133 | 134 | br_pool = self.br_pool(x) 135 | br_heat = self.br_heat(br_pool) 136 | br_off_c = self.br_off_c(br_pool) 137 | br_off = self.br_off(br_pool) 138 | br_offmap = self.br_offset(br_off_c.detach()) 139 | x_br_fadp = self.br_fadp(br_pool, br_offmap) 140 | br_off_c_2= self.br_off_c_2(x_br_fadp) 141 | 142 | tl_result = torch.cat([tl_heat, tl_off_c, tl_off_c_2, tl_off], 1) 143 | br_result = torch.cat([br_heat, br_off_c, br_off_c_2, br_off], 1) 144 | 145 | x = feats[0] 146 | 147 | if self.with_mask: 148 | mask_mid = x 149 | for conv in self.mid_convs: 150 | mask_mid = conv(mask_mid) 151 | mask_mid = self.mid_conv_logits(mask_mid) 152 | 153 | tl_pool_mid = self.mid_tl_pool(x) 154 | tl_heat_mid = self.mid_tl_heat(tl_pool_mid) 155 | tl_off_c_mid = self.mid_tl_off_c(tl_pool_mid) 156 | tl_off_mid = self.mid_tl_off(tl_pool_mid) 157 | tl_offmap_mid = self.mid_tl_offset(tl_off_c_mid.detach()) 158 | x_tl_fadp_mid = self.mid_tl_fadp(tl_pool_mid, tl_offmap_mid) 159 | tl_off_c_2_mid= self.mid_tl_off_c_2(x_tl_fadp_mid) 160 | 161 | br_pool_mid = self.mid_br_pool(x) 162 | br_heat_mid = self.mid_br_heat(br_pool_mid) 163 | br_off_c_mid = self.mid_br_off_c(br_pool_mid) 164 | br_off_mid = self.mid_br_off(br_pool_mid) 165 | br_offmap_mid = self.mid_br_offset(br_off_c_mid.detach()) 166 | x_br_fadp_mid = self.mid_br_fadp(br_pool_mid, br_offmap_mid) 167 | br_off_c_2_mid= self.mid_br_off_c_2(x_br_fadp_mid) 168 | 169 | tl_result_mid = torch.cat([tl_heat_mid, tl_off_c_mid, tl_off_c_2_mid, tl_off_mid], 1) 170 | br_result_mid = torch.cat([br_heat_mid, br_off_c_mid, br_off_c_2_mid, br_off_mid], 1) 171 | 172 | if self.with_mask: 173 | return tl_result, br_result, mask, tl_result_mid, br_result_mid, mask_mid 174 | else: 175 | return tl_result, br_result, None, tl_result_mid, br_result_mid, None 176 | 177 | def forward(self, feats): 178 | """ 179 | :param feats: different layer's feature 180 | :return: the raw results 181 | """ 182 | feat = feats # [-1]# we only use the feature of the last layer 183 | return self.forward_single(feat) 184 | 185 | def loss(self, tl_result, br_result, mask, mid_tl_result, mid_br_result, mid_mask, gt_bboxes, gt_labels, gt_masks, img_metas, cfg, imgscale): 186 | gt_tl_heatmap, gt_br_heatmap, gt_tl_offsets, gt_br_offsets, gt_tl_off_c, gt_br_off_c,\ 187 | gt_tl_off_c2, gt_br_off_c2 = corner_target(gt_bboxes=gt_bboxes, gt_labels=gt_labels, feats=tl_result, imgscale=imgscale, direct=True, scale=1.0, dcn=True) 188 | # pred_tl_heatmap = _sigmoid(tl_result[:, :self.num_classes, :, :]) 189 | pred_tl_heatmap = tl_result[:, :self.num_classes, :, :].sigmoid() 190 | pred_tl_off_c = tl_result[:, self.num_classes:self.num_classes + 2, :, :] 191 | pred_tl_off_c2 = tl_result[:, self.num_classes+2:self.num_classes+4, :, :] 192 | pred_tl_offsets = tl_result[:, -2:, :, :] 193 | # pred_br_heatmap = _sigmoid(br_result[:, :self.num_classes, :, :]) 194 | pred_br_heatmap = br_result[:, :self.num_classes, :, :].sigmoid() 195 | pred_br_off_c = br_result[:, self.num_classes:self.num_classes + 2, :, :] 196 | pred_br_off_c2 = br_result[:, self.num_classes+2:self.num_classes+4, :, :] 197 | pred_br_offsets = br_result[:, -2:, :, :] 198 | 199 | # mid_pred_tl_heatmap = _sigmoid(mid_tl_result[:, :self.num_classes, :, :]) 200 | mid_pred_tl_heatmap = mid_tl_result[:, :self.num_classes, :, :].sigmoid() 201 | mid_pred_tl_off_c = mid_tl_result[:, self.num_classes:self.num_classes + 2, :, :] 202 | mid_pred_tl_off_c2 = mid_tl_result[:, self.num_classes+2:self.num_classes+4, :, :] 203 | mid_pred_tl_offsets = mid_tl_result[:, -2:, :, :] 204 | # mid_pred_br_heatmap = _sigmoid(mid_br_result[:, :self.num_classes, :, :]) 205 | mid_pred_br_heatmap = mid_br_result[:, :self.num_classes, :, :].sigmoid() 206 | mid_pred_br_off_c = mid_br_result[:, self.num_classes:self.num_classes + 2, :, :] 207 | mid_pred_br_off_c2 = mid_br_result[:, self.num_classes+2:self.num_classes+4, :, :] 208 | mid_pred_br_offsets = mid_br_result[:, -2:, :, :] 209 | 210 | tl_det_loss = det_loss_(pred_tl_heatmap, gt_tl_heatmap) + det_loss_(mid_pred_tl_heatmap, gt_tl_heatmap) 211 | br_det_loss = det_loss_(pred_br_heatmap, gt_br_heatmap) + det_loss_(mid_pred_br_heatmap, gt_br_heatmap) 212 | # tl_det_loss = _neg_loss([pred_tl_heatmap, mid_pred_tl_heatmap], gt_tl_heatmap) 213 | # br_det_loss = _neg_loss([pred_br_heatmap, mid_pred_br_heatmap], gt_br_heatmap) 214 | 215 | det_loss = (tl_det_loss + br_det_loss) / 2.0 216 | 217 | tl_off_mask = gt_tl_heatmap.eq(1).type_as(gt_tl_heatmap) 218 | br_off_mask = gt_br_heatmap.eq(1).type_as(gt_br_heatmap) 219 | 220 | 221 | tl_off_c_loss = off_loss_(pred_tl_off_c, gt_tl_off_c, mask=tl_off_mask) + off_loss_(mid_pred_tl_off_c, gt_tl_off_c,mask=tl_off_mask) 222 | br_off_c_loss = off_loss_(pred_br_off_c, gt_br_off_c, mask=br_off_mask) + off_loss_(mid_pred_br_off_c, gt_br_off_c,mask=br_off_mask) 223 | off_c_loss = tl_off_c_loss.sum() / tl_off_mask.sum() + br_off_c_loss.sum() / br_off_mask.sum() 224 | off_c_loss /= 2.0 225 | off_c_loss *= 0.05 226 | 227 | tl_off_c2_loss = off_loss_(pred_tl_off_c2, gt_tl_off_c2, mask=tl_off_mask) + off_loss_(mid_pred_tl_off_c2, gt_tl_off_c2,mask=tl_off_mask) 228 | br_off_c2_loss = off_loss_(pred_br_off_c2, gt_br_off_c2, mask=br_off_mask) + off_loss_(mid_pred_br_off_c2, gt_br_off_c2,mask=br_off_mask) 229 | off_c2_loss = tl_off_c2_loss.sum() / tl_off_mask.sum() + br_off_c2_loss.sum() / br_off_mask.sum() 230 | off_c2_loss /= 2.0 231 | 232 | tl_off_loss = off_loss_(pred_tl_offsets, gt_tl_offsets, mask=tl_off_mask) + off_loss_(mid_pred_tl_offsets, gt_tl_offsets,mask=tl_off_mask) 233 | br_off_loss = off_loss_(pred_br_offsets, gt_br_offsets, mask=br_off_mask) + off_loss_(mid_pred_br_offsets, gt_br_offsets,mask=br_off_mask) 234 | off_loss = tl_off_loss.sum() / tl_off_mask.sum() + br_off_loss.sum() / br_off_mask.sum() 235 | off_loss /= 2.0 236 | 237 | mask_loss = 0 238 | if self.with_mask: 239 | for b_id in range(len(gt_labels)): 240 | for mask_id in range(len(gt_labels[b_id])): 241 | mask_label = gt_labels[b_id][mask_id] 242 | m_pred = mask[b_id][mask_label] 243 | mid_m_pred = mid_mask[b_id][mask_label] 244 | m_gt = torch.from_numpy(gt_masks[b_id][mask_id]).float().cuda() 245 | mask_loss += F.binary_cross_entropy_with_logits(m_pred, m_gt) 246 | mask_loss += F.binary_cross_entropy_with_logits(mid_m_pred, m_gt) 247 | mask_loss /= mask.size(0) 248 | mask_loss /= 2.0 249 | 250 | # return dict(det_loss=det_loss, ae_loss=ae_loss, off_loss=off_loss) 251 | if self.with_mask: 252 | return dict(det_loss=det_loss, off_c_loss=off_c_loss, off_c2_loss=off_c2_loss, off_loss=off_loss, mask_loss=mask_loss) 253 | else: 254 | return dict(det_loss=det_loss, off_c_loss=off_c_loss, off_c2_loss=off_c2_loss, off_loss=off_loss) 255 | 256 | def get_bboxes(self, tl_result, br_result, mask, mid_tl_result, mid_br_result, mid_mask, img_metas, cfg, rescale=False): 257 | tl_heat = tl_result[:, :self.num_classes, :, :] 258 | tl_off_c= tl_result[:, self.num_classes+2:self.num_classes+4, :, :] 259 | tl_regr = tl_result[:, -2:, :, :] 260 | br_heat = br_result[:, :self.num_classes, :, :] 261 | br_off_c= br_result[:, self.num_classes+2:self.num_classes+4, :, :] 262 | br_regr = br_result[:, -2:, :, :] 263 | #pdb.set_trace() 264 | if len(tl_heat) == 2: 265 | img_metas = img_metas[0] 266 | 267 | if isinstance(img_metas, list): 268 | img_metas_1 = img_metas[0] 269 | else: 270 | img_metas_1 = img_metas 271 | 272 | batch_bboxes, batch_scores, batch_clses = _decode_center(tl_heat=tl_heat, br_heat=br_heat, tl_off_c=tl_off_c, br_off_c=br_off_c, tl_regr=tl_regr, br_regr=br_regr, img_meta=img_metas_1)#[0] 273 | h, w, _ = img_metas_1['ori_shape'] 274 | #h, w, _ = img_metas[0]['ori_shape'] 275 | 276 | scale = img_metas_1['scale'] 277 | #batch_bboxes /= scale 278 | 279 | if len(batch_bboxes) == 2: 280 | # print('flip') 281 | batch_bboxes[1, :, [0, 2]] = w - batch_bboxes[1, :, [2, 0]] 282 | 283 | 284 | batch_bboxes = batch_bboxes.view([-1, 4]).unsqueeze(0) 285 | batch_scores = batch_scores.view([-1, 1]).unsqueeze(0) 286 | batch_clses = batch_clses.view([-1, 1]).unsqueeze(0) 287 | # pdb.set_trace() 288 | # assert len(img_metas)==len(batch_bboxes) 289 | result_list = [] 290 | for img_id in range(len(img_metas)): 291 | # pdb.set_trace() 292 | bboxes = batch_bboxes[img_id] 293 | scores = batch_scores[img_id] 294 | clses = batch_clses[img_id] 295 | 296 | scores_n = scores.cpu().numpy() 297 | idx = scores_n.argsort(0)[::-1] 298 | idx = torch.Tensor(idx.astype(float)).long() 299 | 300 | bboxes = bboxes[idx].squeeze() 301 | scores = scores[idx].view(-1) 302 | clses = clses[idx].view(-1) 303 | 304 | det_num = len(bboxes) 305 | 306 | # img_h, img_w, _ = img_metas[img_id]['img_shape'] 307 | # ori_h, ori_w, _ = img_metas[img_id]['ori_shape'] 308 | # h_scale = float(ori_h) / float(img_h) 309 | # w_scale = float(ori_w) / float(img_w) 310 | 311 | # bboxes[:,0::2] *= w_scale 312 | # bboxes[:,1::2] *= h_scale 313 | 314 | '''clses_idx = (clses + 1).long() 315 | det_idx = torch.Tensor(np.arange(det_num)).long() 316 | scores_81 = -1*torch.ones(det_num, self.num_classes + 1).type_as(scores) 317 | scores_81[det_idx, clses_idx] = scores 318 | 319 | bboxes_scores = torch.cat([bboxes, scores.unsqueeze(-1)], 1) 320 | nms_bboxes, _ = nms(bboxes_scores, 0.5) 321 | #nms_bboxes, nms_labels = multiclass_nms(bboxes, scores_81, 0.5, cfg.nms, cfg.max_per_img) 322 | 323 | result_list.append((nms_bboxes, nms_labels))''' 324 | detections = torch.cat([bboxes, scores.unsqueeze(-1)], -1) 325 | keepinds = (detections[:, -1] > -0.1) # 0.05 326 | detections = detections[keepinds] 327 | labels = clses[keepinds] 328 | 329 | areas = (bboxes[:,2] - bboxes[:,0])*(bboxes[:,3] - bboxes[:,1]) 330 | areas = areas[keepinds] 331 | 332 | #pdb.set_trace() 333 | if scale == 0.8: 334 | keepinds2 = (areas >= 96**2) 335 | detections = detections[keepinds2] 336 | labels = labels[keepinds2] 337 | topk = 35 338 | #elif scale == 2.0: 339 | # keepinds2 = (areas <= 32**2) 340 | # detections = detections[keepinds2] 341 | # labels = labels[keepinds2] 342 | # topk = 40 343 | else: 344 | topk = 100 345 | 346 | 347 | # idx = detections[:,-1].topk(len(detections))[1] 348 | # detections = detections[idx] 349 | # labels = labels[idx] 350 | 351 | out_bboxes = [] 352 | out_labels = [] 353 | # pdb.set_trace() 354 | for i in range(80): 355 | keepinds = (labels == i) 356 | nms_detections = detections[keepinds] 357 | a = nms_detections.size(0) 358 | if nms_detections.size(0) == 0: 359 | # print('no NMS') 360 | continue 361 | nms_detections, _ = soft_nms(nms_detections, 0.5, 'gaussian', sigma=0.7) 362 | b = nms_detections.size(0) 363 | # print(a,b) 364 | 365 | out_bboxes.append(nms_detections) 366 | out_labels += [i for _ in range(len(nms_detections))] 367 | 368 | if len(out_bboxes) > 0: 369 | out_bboxes = torch.cat(out_bboxes) 370 | # out_labels = 1 + torch.Tensor(out_labels) 371 | out_labels = torch.Tensor(out_labels) 372 | else: 373 | out_bboxes = torch.Tensor(out_bboxes).cuda() 374 | out_labels = torch.Tensor(out_labels) 375 | 376 | # out_labels = 1+torch.Tensor(out_labels) 377 | 378 | # pdb.set_trace() 379 | if len(out_bboxes) > 0: 380 | out_bboxes_np = out_bboxes.cpu().numpy() 381 | out_labels_np = out_labels.cpu().numpy() 382 | idx = np.argsort(out_bboxes_np[:, -1])[::-1][:topk] #100 383 | out_bboxes_np = out_bboxes_np[idx, :] 384 | out_labels_np = out_labels_np[idx] 385 | out_bboxes = torch.Tensor(out_bboxes_np).type_as(out_bboxes) 386 | out_labels = torch.Tensor(out_labels_np).type_as(out_labels) 387 | 388 | 389 | # pdb.set_trace() 390 | 391 | result_list.append((out_bboxes, out_labels)) 392 | return result_list 393 | 394 | 395 | class pool(nn.Module): 396 | def __init__(self, dim, pool1, pool2): # pool1, pool2 should be Class name 397 | super(pool, self).__init__() 398 | self.p1_conv1 = convolution(3, dim, 128) 399 | self.p2_conv1 = convolution(3, dim, 128) 400 | 401 | self.p_conv1 = nn.Conv2d(128, dim, (3, 3), padding=(1, 1), bias=False) 402 | self.p_bn1 = nn.BatchNorm2d(dim) 403 | 404 | self.conv1 = nn.Conv2d(dim, dim, (1, 1), bias=False) 405 | self.bn1 = nn.BatchNorm2d(dim) 406 | self.relu1 = nn.ReLU(inplace=True) 407 | 408 | self.conv2 = convolution(3, dim, dim) 409 | 410 | self.pool1 = pool1() 411 | self.pool2 = pool2() 412 | 413 | def forward(self, x): 414 | # pool 1 415 | p1_conv1 = self.p1_conv1(x) 416 | pool1 = self.pool1(p1_conv1) 417 | 418 | # pool 2 419 | p2_conv1 = self.p2_conv1(x) 420 | pool2 = self.pool2(p2_conv1) 421 | 422 | # pool 1 + pool 2 423 | p_conv1 = self.p_conv1(pool1 + pool2) 424 | p_bn1 = self.p_bn1(p_conv1) 425 | 426 | conv1 = self.conv1(x) 427 | bn1 = self.bn1(conv1) 428 | relu1 = self.relu1(p_bn1 + bn1) 429 | 430 | conv2 = self.conv2(relu1) 431 | return conv2 432 | 433 | class pool_new(nn.Module): 434 | def __init__(self, dim, pool1, pool2): 435 | super(pool, self).__init__() 436 | self.p1_conv1 = convolution(3, dim, 128) 437 | self.p2_conv1 = convolution(3, dim, 128) 438 | 439 | self.p_conv1 = nn.Conv2d(128, dim, (3, 3), padding=(1, 1), bias=False) 440 | self.p_bn1 = nn.BatchNorm2d(dim) 441 | 442 | self.conv1 = nn.Conv2d(dim, dim, (1, 1), bias=False) 443 | self.bn1 = nn.BatchNorm2d(dim) 444 | self.relu1 = nn.ReLU(inplace=True) 445 | 446 | self.conv2 = convolution(3, dim, dim) 447 | 448 | self.pool1 = pool1() 449 | self.pool2 = pool2() 450 | 451 | self.look_conv1 = convolution(3, dim, 128) 452 | self.look_conv2 = convolution(3, dim, 128) 453 | self.P1_look_conv = nn.Conv2d(128, 128, (3, 3), padding=(1, 1), bias=False) 454 | self.P2_look_conv = nn.Conv2d(128, 128, (3, 3), padding=(1, 1), bias=False) 455 | 456 | def forward(self, x): 457 | # pool 1 458 | look_conv1 = self.look_conv1(x) 459 | p1_conv1 = self.p1_conv1(x) 460 | look_right = self.pool2(look_conv1) 461 | P1_look_conv = self.P1_look_conv(p1_conv1+look_right) 462 | pool1 = self.pool1(P1_look_conv) 463 | 464 | # pool 2 465 | look_conv2 = self.look_conv2(x) 466 | p2_conv1 = self.p2_conv1(x) 467 | look_down = self.pool1(look_conv2) 468 | P2_look_conv = self.P2_look_conv(p2_conv1+look_down) 469 | pool2 = self.pool2(P2_look_conv) 470 | 471 | # pool 1 + pool 2 472 | p_conv1 = self.p_conv1(pool1 + pool2) 473 | p_bn1 = self.p_bn1(p_conv1) 474 | 475 | conv1 = self.conv1(x) 476 | bn1 = self.bn1(conv1) 477 | relu1 = self.relu1(p_bn1 + bn1) 478 | 479 | conv2 = self.conv2(relu1) 480 | return conv2 481 | 482 | 483 | class TopLeftPool(pool): 484 | def __init__(self, dim): 485 | super(TopLeftPool, self).__init__(dim, TopPool, LeftPool) 486 | 487 | 488 | class BottomRightPool(pool): 489 | def __init__(self, dim): 490 | super(BottomRightPool, self).__init__(dim, BottomPool, RightPool) 491 | 492 | 493 | class convolution(nn.Module): 494 | def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True): 495 | super(convolution, self).__init__() 496 | 497 | pad = (k - 1) // 2 498 | self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn) 499 | self.bn = nn.BatchNorm2d(out_dim) if with_bn else nn.Sequential() 500 | self.relu = nn.ReLU(inplace=False) 501 | 502 | def forward(self, x): 503 | conv = self.conv(x) 504 | bn = self.bn(conv) 505 | relu = self.relu(bn) 506 | return relu 507 | 508 | 509 | def top_pool(x): # from right to left 510 | """ 511 | :param x:feature map x, a Tensor 512 | :return: feature map with the same size as x 513 | """ 514 | x_p = torch.zeros_like(x) 515 | x_p[:, :, :, -1] = x[:, :, :, -1] 516 | _, _, h, w = x.size() 517 | for col in range(w - 1, -1, -1): 518 | x_p[:, :, :, col] = x[:, :, :, col:].max(-1)[0] 519 | 520 | return x_p 521 | 522 | 523 | def left_pool(x): # from bottom to top 524 | x_p = torch.zeros_like(x) 525 | x_p[:, :, -1, :] = x[:, :, -1, :] 526 | _, _, h, w = x.size() 527 | for row in range(h - 1, -1, -1): 528 | x_p[:, :, row, :] = x[:, :, row:, :].max(-2)[0] 529 | 530 | return x_p 531 | 532 | 533 | def bottom_pool(x): # from left to right 534 | x_p = torch.zeros_like(x) 535 | x_p[:, :, :, 0] = x[:, :, :, 0] 536 | _, _, h, w = x.size() 537 | for col in range(1, w): 538 | x_p[:, :, :, col] = x[:, :, :, 0:col + 1].max(-1)[0] 539 | 540 | return x_p 541 | 542 | 543 | def right_pool(x): # from up to bottom 544 | x_p = torch.zeros_like(x) 545 | x_p[:, :, 0, :] = x[:, :, 0, :] 546 | _, _, h, w = x.size() 547 | for row in range(1, h): 548 | x_p[:, :, row, :] = x[:, :, 0:row + 1, :].max(-2)[0] 549 | 550 | return x_p 551 | 552 | 553 | def det_loss_(preds, gt, Epsilon=1e-12): 554 | # TODO: add Gaussian to gt_heatmap 555 | # _, t_num = gt.view([gt.size(0), -1]).size() 556 | pos_weights = (gt == 1.0).type_as(gt) 557 | neg_weights = torch.pow(1 - gt, 4).type_as(gt) 558 | pos_loss = -torch.log(preds + Epsilon) * torch.pow(1 - preds, 2) * pos_weights 559 | neg_loss = -torch.log(1 - preds + Epsilon) * torch.pow(preds, 2) * neg_weights 560 | # obj_num = pos_weights.sum(-1).sum(-1).sum(-1) 561 | obj_num = pos_weights.sum() 562 | # loss = pos_loss.sum(-1).sum(-1).sum(-1)/obj_num + neg_loss.sum(-1).sum(-1).sum(-1)/(t_num-obj_num) 563 | if obj_num < 1: 564 | loss = neg_loss.sum() 565 | else: 566 | loss = (pos_loss + neg_loss).sum() / obj_num 567 | 568 | return loss 569 | 570 | 571 | def _neg_loss(preds, gt, Epsilon=1e-12): 572 | pos_inds = gt.eq(1) 573 | neg_inds = gt.lt(1) 574 | # 575 | neg_weights = torch.pow(1 - gt[neg_inds], 4) 576 | # 577 | loss = 0 578 | for pred in preds: 579 | pos_pred = pred[pos_inds] 580 | neg_pred = pred[neg_inds] 581 | # 582 | pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) 583 | neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights 584 | # 585 | num_pos = pos_inds.float().sum() 586 | pos_loss = pos_loss.sum() 587 | neg_loss = neg_loss.sum() 588 | # 589 | # avoid the error when num_pos is zero 590 | if pos_pred.nelement() == 0: 591 | loss = loss - neg_loss 592 | else: 593 | loss = loss - (pos_loss + neg_loss) / num_pos 594 | return loss 595 | 596 | 597 | def off_loss_(preds, target, mask): 598 | """ 599 | :param preds: pred_offsets 600 | :param gt: gt_offsets 601 | :param mask: denotes where is those corners 602 | :return: smooth l1 loss of offsets 603 | """ 604 | mask = (mask.sum(1) > 0).unsqueeze(1).type_as(preds) 605 | preds *= mask 606 | target *= mask 607 | 608 | return smooth_l1_loss(preds, target, reduction='none') 609 | 610 | 611 | def ae_loss_(tl_preds, br_preds, match): 612 | """ 613 | :param tl_preds: predicted tensor of top-left embedding 614 | :param br_preds: predicted tensor of bottom-right embedding 615 | :param match: 616 | :return: pull loss and push loss 617 | """ 618 | b = tl_preds.size(0) 619 | 620 | loss = 0 621 | pull = 0 622 | push = 0 623 | for i in range(b): 624 | # loss += ae_loss_per_image(tl_preds[i], br_preds[i], match[i]) 625 | loss = ae_loss_per_image(tl_preds[i], br_preds[i], match[i]) 626 | pull += loss[0] 627 | push += loss[1] 628 | # return loss 629 | return pull, push 630 | 631 | 632 | def ae_loss_per_image(tl_preds, br_preds, match, pull_weight=0.25, push_weight=0.25): 633 | tl_list = torch.Tensor([]).type_as(tl_preds) 634 | br_list = torch.Tensor([]).type_as(tl_preds) 635 | me_list = torch.Tensor([]).type_as(tl_preds) 636 | for m in match: 637 | tl_y = m[0][0] 638 | tl_x = m[0][1] 639 | br_y = m[1][0] 640 | br_x = m[1][1] 641 | tl_e = tl_preds[:, tl_y, tl_x] 642 | br_e = br_preds[:, br_y, br_x] 643 | tl_list = torch.cat([tl_list, tl_e]) 644 | br_list = torch.cat([br_list, br_e]) 645 | me_list = torch.cat([me_list, ((tl_e + br_e) / 2.0)]) 646 | 647 | assert tl_list.size() == br_list.size() 648 | 649 | N = tl_list.size(0) 650 | 651 | if N > 0: 652 | pull_loss = (torch.pow(tl_list - me_list, 2) + torch.pow(br_list - me_list, 2)).sum() / N 653 | else: 654 | pull_loss = 0 655 | 656 | margin = 1 657 | push_loss = 0 658 | for i in range(N): 659 | mask = torch.ones(N, device=tl_preds.device) 660 | mask[i] = 0 661 | push_loss += (mask * F.relu(margin - abs(me_list[i] - me_list))).sum() 662 | 663 | if N > 1: 664 | push_loss /= (N * (N - 1)) 665 | else: 666 | pass 667 | '''if N>0: 668 | N2 = N*(N-1) 669 | x0 = me_list.unsqueeze(0) 670 | x1 = me_list.unsqueeze(1) 671 | push_loss = (F.relu(1 - torch.abs(x0-x1))-1/(N+1e-4))/(N2+1e-4) 672 | #push_loss -= 1/(N+1e-4) 673 | #push_loss /= (N2+1e-4) 674 | push_loss = push_loss.sum() 675 | else: 676 | push_loss = 0''' 677 | 678 | return pull_weight * pull_loss, push_weight * push_loss 679 | 680 | 681 | def make_kp_layer(out_dim, cnv_dim=256, curr_dim=256): 682 | return nn.Sequential( 683 | convolution(3, cnv_dim, curr_dim, with_bn=False), 684 | nn.Conv2d(curr_dim, out_dim, (1, 1)) 685 | ) 686 | 687 | 688 | def _sigmoid(x): 689 | x = torch.clamp(torch.sigmoid(x), min=1e-4, max=1 - 1e-4) 690 | return x 691 | 692 | --------------------------------------------------------------------------------