├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── config └── __init__.py ├── data ├── aug │ ├── compose.py │ ├── func.py │ └── ops │ │ ├── __init__.py │ │ ├── ops_det.py │ │ └── ops_img.py └── dataset │ ├── __init__.py │ ├── dataset.py │ ├── dota.py │ ├── hrsc2016.py │ └── usca_aod.py ├── demo ├── graphical-abstract.png └── result.png ├── model ├── backbone │ ├── __init__.py │ ├── darknet.py │ └── resnet │ │ ├── __init__.py │ │ ├── resnet.py │ │ └── splat.py └── rdd │ ├── __init__.py │ ├── rdd.py │ └── utils │ ├── detect.py │ ├── loss.py │ ├── modules.py │ └── priorbox.py ├── requirement.txt ├── run ├── dota │ ├── evaluate.py │ ├── prepare.py │ ├── train-dist.py │ └── train.py ├── hrsc2016 │ ├── evaluate.py │ ├── prepare.py │ ├── train-dist.py │ └── train.py └── ucas-aod │ ├── evaluate.py │ ├── prepare.py │ └── train.py ├── utils ├── adjust_lr.py ├── box │ ├── bbox.py │ ├── bbox_np.py │ ├── ext │ │ ├── rbbox_overlap_cpu │ │ │ ├── __init__.py │ │ │ ├── rbbox_overlap.h │ │ │ ├── rbbox_overlap.pyx │ │ │ └── setup.py │ │ └── rbbox_overlap_gpu │ │ │ ├── __init__.py │ │ │ ├── rbbox_overlap.cpp │ │ │ ├── rbbox_overlap.hpp │ │ │ ├── rbbox_overlap.pyx │ │ │ ├── rbbox_overlap_kernel.cu │ │ │ └── setup.py │ ├── metric.py │ ├── rbbox.py │ └── rbbox_np.py ├── crop_image.py ├── image.py ├── init.py ├── misc.py └── parallel │ ├── __init__.py │ ├── data_parallel.py │ └── sync_batchnorm │ ├── __init__.py │ ├── batchnorm.py │ ├── comm.py │ └── replicate.py └── xtorch ├── README.md └── xnn ├── __init__.py ├── containers.py └── layers.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # celery beat schedule file 95 | celerybeat-schedule 96 | 97 | # SageMath parsed files 98 | *.sage.py 99 | 100 | # Environments 101 | .env 102 | .venv 103 | env/ 104 | venv/ 105 | ENV/ 106 | env.bak/ 107 | venv.bak/ 108 | 109 | # Spyder project settings 110 | .spyderproject 111 | .spyproject 112 | 113 | # Rope project settings 114 | .ropeproject 115 | 116 | # mkdocs documentation 117 | /site 118 | 119 | # mypy 120 | .mypy_cache/ 121 | .dmypy.json 122 | dmypy.json 123 | 124 | # Pyre type checker 125 | .pyre/ 126 | 127 | .idea/ 128 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Capino512 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Single-Stage Rotation-Decoupled Detector for Oriented Object 2 | 3 | This is the repository of paper **Single-Stage Rotation-Decoupled Detector for Oriented Object**. [[Paper]](https://www.mdpi.com/2072-4292/12/19/3262/htm) [[PDF]](https://www.mdpi.com/2072-4292/12/19/3262/pdf) 4 | 5 | **Update:** Updated the code for training on the DOTA, HRSC2016 and UCAS-AOD datasets. Uploaded the weights trained on these datasets. 6 | 7 | Graphical Abstract 8 | 9 | 10 | 11 | ## Introduction 12 | 13 | We optimized the anchor-based oriented object detection method by decoupling the matching of the oriented bounding box and the oriented anchor into the matching of the horizontal bounding box and the horizontal anchor. 14 | 15 | ## Performance 16 | 17 | ### DOTA1.0 (Task1) 18 | 19 | Reported in our paper: 20 | 21 | | backbone | MS | mAP | PL | BD | BR | GTF | SV | LV | SH | TC | BC | ST | SBF | RA | HA | SP | HC | 22 | | --------- | ---- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | 23 | | ResNet101 | × | 75.52 | 89.7 | 84.33 | 46.35 | 68.62 | 73.89 | 73.19 | 86.92 | 90.41 | 86.46 | 84.3 | 64.22 | 64.95 | 73.55 | 72.59 | 73.31 | 24 | | ResNet101 | √ | 77.75 | 89.15 | 83.92 | 52.51 | 73.06 | 77.81 | 79 | 87.08 | 90.62 | 86.72 | 87.15 | 63.96 | 70.29 | 76.98 | 75.79 | 72.15 | 25 | 26 | Retested with the **original** weights and the **newly** released code: 27 | 28 | | backbone | MS | mAP | PL | BD | BR | GTF | SV | LV | SH | TC | BC | ST | SBF | RA | HA | SP | HC | 29 | | --------- | ---- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | 30 | | ResNet101 | × | 75.02 | 89.61 | 82.01 | 43.35 | 64.79 | 74.10 | 77.54 | 87.11 | 90.84 | 87.15 | 84.80 | 61.52 | 62.22 | 74.49 | 72.57 | 73.13 | 31 | | ResNet101 | √ | 77.87 | 89.21 | 84.80 | 53.40 | 73.17 | 78.11 | 79.44 | 87.28 | 90.78 | 86.46 | 87.43 | 63.46 | 69.91 | 77.52 | 76.00 | 71.06 | 32 | 33 | Checkpoint: 34 | 35 | - [Baidu Drive (l07k)](https://pan.baidu.com/s/1KppCwFpGT-x_BakpIw4KfA) (resnet101, original) 36 | - [Baidu Drive (v9lc)](https://pan.baidu.com/s/1YrsWWbmk9M6fokDWbfSy3A) (resnet101, newly) 37 | 38 | - [Google Drive](https://drive.google.com/file/d/1_QfLJMusMIsdDUpQzqwUCUyBT6N15HvJ/view?usp=sharing) (resnet101, original) 39 | - [Google Drive](https://drive.google.com/drive/folders/1aJ0lfPo-Zqh4g5kf8sXTNIzTLyjVsdLO?usp=sharing) (resnet101, newly) 40 | 41 | ### HRSC2016 42 | 43 | Reported in our paper: 44 | 45 | | backbone | AP(12) | 46 | | --------- | ------ | 47 | | ResNet101 | 94.29 | 48 | | ResNet152 | 94.61 | 49 | 50 | *****Updated the test results obtained using the VOC 07 11 point method. Retested with the original weights and the newly released code: 51 | 52 | | backbone | AP(12) | AP(07) | 53 | | --------- | ------ | ------ | 54 | | ResNet101 | 94.26 | 88.19 | 55 | | ResNet152 | 94.71 | 89.00 | 56 | 57 | 07 or 12 means use the VOC 07 or VOC 12 evaluation metric. 58 | 59 | Checkpoint: 60 | 61 | - [Baidu Drive (ka71)](https://pan.baidu.com/s/1TE98ntQFivqLpSi7PlofHw) (resnet101, original) 62 | - [Baidu Drive (kin2)](https://pan.baidu.com/s/1Sp5PQoHLUT0jD5yUMDo73g) (resnet152, original) 63 | - [Baidu Drive (9vjf)](https://pan.baidu.com/s/11gv3KZKMB4ZBkOaSygD3GA) (resnet101, newly) 64 | 65 | - [Google Drive](https://drive.google.com/file/d/1EqH8Ipb9Haa8Rcn0dlZCP3klKaWG990y/view?usp=sharing) (resnet101, original) 66 | - [Google Drive](https://drive.google.com/file/d/1it-7n7oGZeFHuUa8GZqbljLxLqUvgaSb/view?usp=sharing) (resnet152, original) 67 | - [Google Drive](https://drive.google.com/drive/folders/1i2rHVYQswf4c-iUiIjfj3WKUcDhJS7qS?usp=sharing) (resnet101, newly) 68 | 69 | ### UCAS-AOD 70 | 71 | Reported in our paper: 72 | 73 | | backbone | plane | car | mAP | 74 | | --------- | ----- | ----- | ----- | 75 | | ResNet101 | 98.86 | 94.96 | 96.86 | 76 | | ResNet152 | 98.85 | 95.18 | 97.01 | 77 | 78 | Retested with the original weights and the newly released code: 79 | 80 | | backbone | plane | car | mAP | 81 | | --------- | ----- | ----- | ----- | 82 | | ResNet101 | 98.86 | 94.96 | 96.91 | 83 | | ResNet152 | 98.93 | 95.14 | 97.03 | 84 | 85 | Checkpoint: 86 | 87 | - [Baidu Drive (2adc)](https://pan.baidu.com/s/1-w2QRXa_hhHPBkUSELbxPg) (resnet101, original) 88 | - [Baidu Drive (oxbo)](https://pan.baidu.com/s/1cmkV40p0POBwsr0f1HZEBw) (resnet152, original) 89 | - [Baidu Drive (1l2q)](https://pan.baidu.com/s/1sS5lc65F99lz7SmPMAw1uw) (resnet101, newly) 90 | 91 | - [Google Drive](https://drive.google.com/file/d/1HjdK5tXhRQ-wAkBmdJ5KaJRhoqmCELoB/view?usp=sharing) (resnet101, original) 92 | - [Google Drive](https://drive.google.com/file/d/1zr5tFDHxHIIU7vlsCvBgVmV8IkH591O5/view?usp=sharing) (resnet152, original) 93 | - [Google Drive](https://drive.google.com/drive/folders/1OBNjd-1I9AO0alKtYnoR_4rlDse3lVj4?usp=sharing) (resnet101, newly) 94 | 95 | ## Visualization 96 | 97 | ![Result](demo/result.png) 98 | 99 | ## Run 100 | 101 | ### Requirements 102 | 103 | ``` 104 | tqdm 105 | numpy 106 | pillow 107 | cython 108 | beautifulsoup4 109 | opnecv-python 110 | pytorch>=1.2 111 | torchvision>=0.4 112 | tensorboard>=2.2 113 | ``` 114 | 115 | ### Compile 116 | 117 | ``` 118 | # 'rbbox_batched_nms' will be used as post-processing in the interface stage 119 | # use gpu, for Linux only 120 | cd $PATH_ROOT/utils/box/ext/rbbox_overlap_gpu 121 | python setup.py build_ext --inplace 122 | 123 | # alternative, use cpu, for Windows and Linux 124 | cd $PATH_ROOT/utils/box/ext/rbbox_overlap_cpu 125 | python setup.py build_ext --inplace 126 | ``` 127 | 128 | ### Pre-training Weight 129 | 130 | Download pretrained weight files. 131 | 132 | - [Baidu Drive (4m2c)](https://pan.baidu.com/s/1u9i3giU5Q-7XAF_rkyL8Bw) 133 | 134 | - [Google Drive](https://drive.google.com/drive/folders/1ll4DxqkIa77tBq3H59cdv_ziAfYO8ek7?usp=sharing) 135 | 136 | Modify the `DIR_WEIGHT` defined in `config/__init__.py` to be the directory where the weight files are placed. 137 | 138 | ``` 139 | DIR_WEIGHT = /.../pre-training-weights 140 | ``` 141 | 142 | ### Train on DOTA 143 | 144 | #### Data Preprocessing 145 | 146 | Download the [DOTA](https://captain-whu.github.io/DOTA/index.html) dataset, and move files like: 147 | 148 | ``` 149 | $PATH_ROOT/images 150 | ----------/labelTxt-v1.0-obb 151 | $PATH_ROOT/images/train/P0000.png 152 | -----------------/train/... 153 | -----------------/val/... 154 | -----------------/test/... 155 | 156 | $PATH_ROOT/labelTxt/train/P0000.txt 157 | -------------------/train/... 158 | -------------------/val/... 159 | ``` 160 | 161 | Modify `dir_dataset` and `dir_dataset` defined in `run/dota/prepare.py`, `run/dota/train.py`, `run/dota/evaluate.py` to the local path. 162 | 163 | ``` 164 | dir_dataset = '/.../PATH_ROOT' # The directory where the dataset is located 165 | dir_save = '...' # Output directory 166 | ``` 167 | 168 | Then run the provided code: 169 | 170 | ``` 171 | REPO_ROOT$ python run/dota/prepare.py 172 | ``` 173 | 174 | #### Start Training 175 | 176 | ``` 177 | REPO_ROOT$ python run/dota/train.py 178 | ``` 179 | 180 | #### Evaluate 181 | 182 | ``` 183 | REPO_ROOT$ python run/dota/evaluate.py 184 | ``` 185 | 186 | ### Train on HRSC2016 187 | 188 | Similar to the steps on the DOTA dataset, the code is provided in `run/hrsc2016`. 189 | 190 | ### Train on UCAS-AOD 191 | 192 | Similar to the steps on the DOTA dataset, the code is provided in `run/ucas-aod`. 193 | 194 | ## To Do 195 | 196 | Update the code used for detection. 197 | 198 | ## Citation 199 | 200 | ``` 201 | @article{rdd, 202 | title={Single-Stage Rotation-Decoupled Detector for Oriented Object}, 203 | author={Zhong, Bo and Ao, Kai}, 204 | journal={Remote Sensing}, 205 | year={2020} 206 | } 207 | ``` -------------------------------------------------------------------------------- /config/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | DIR_WEIGHT = '' 4 | 5 | CATEGORY_DOTA_V10 = ['baseball-diamond', 'basketball-court', 'bridge', 'ground-track-field', 'harbor', 'helicopter', 6 | 'large-vehicle', 'plane', 'roundabout', 'ship', 'small-vehicle', 'soccer-ball-field', 7 | 'storage-tank', 'swimming-pool', 'tennis-court'] 8 | 9 | CATEGORY_UCAS_AOD = ['car', 'plane'] 10 | 11 | CATEGORY_HRSC2016 = ['ship'] 12 | -------------------------------------------------------------------------------- /data/aug/compose.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import numpy as np 4 | 5 | 6 | class Compose: 7 | def __init__(self, ops): 8 | self.ops = ops 9 | 10 | def __call__(self, *args): 11 | for op in self.ops: 12 | args = op(*args) 13 | return args 14 | 15 | 16 | class RandomSelect: 17 | def __init__(self, ops): 18 | self.ops = ops 19 | 20 | def __call__(self, *args): 21 | op = np.random.choice(self.ops) 22 | return op(*args) 23 | -------------------------------------------------------------------------------- /data/aug/func.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import cv2 as cv 4 | import numpy as np 5 | 6 | from utils.misc import containerize 7 | 8 | 9 | __all__ = ['hflip', 'vflip', 'rgb2gray', 'resize', 'rotate90', 'pad'] 10 | 11 | 12 | INTER_MODE = {'NEAREST': cv.INTER_NEAREST, 'BILINEAR': cv.INTER_LINEAR, 'BICUBIC': cv.INTER_CUBIC} 13 | 14 | 15 | def hflip(img): 16 | return np.ascontiguousarray(np.fliplr(img)) 17 | 18 | 19 | def vflip(img): 20 | return np.ascontiguousarray(np.flipud(img)) 21 | 22 | 23 | def rgb2gray(img): 24 | return cv.cvtColor(cv.cvtColor(img, cv.COLOR_RGB2GRAY), cv.COLOR_GRAY2RGB) 25 | 26 | 27 | def resize(img, size, interpolate='BILINEAR'): 28 | w, h = containerize(size, 2) 29 | ih, iw = img.shape[:2] 30 | if ih != h or iw != w: 31 | img = cv.resize(img, (w, h), interpolation=INTER_MODE[interpolate]) 32 | return img 33 | 34 | 35 | def rotate90(img, k): # CLOCKWISE k=0, 1, 2, 3 36 | if k % 4 != 0: 37 | img = np.ascontiguousarray(np.rot90(img, -k)) 38 | return img 39 | 40 | 41 | def pad(img, padding, mode='constant', **kwargs): 42 | if isinstance(padding, int): 43 | padding = [[padding, padding], [padding, padding]] 44 | else: 45 | padding = [containerize(p, 2) for p in padding] 46 | if img.ndim == 3 and len(padding) == 2: 47 | padding.append([0, 0]) 48 | return np.pad(img, padding, mode, **kwargs) 49 | -------------------------------------------------------------------------------- /data/aug/ops/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from .ops_img import * 4 | from .ops_det import * 5 | -------------------------------------------------------------------------------- /data/aug/ops/ops_det.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : ops_det.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 10:44 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import random 12 | import cv2 as cv 13 | import numpy as np 14 | 15 | from utils.misc import containerize 16 | from utils.box.bbox_np import xy42xywha, xywha2xy4 17 | 18 | from ..func import * 19 | 20 | 21 | __all__ = ['RandomHFlip', 'RandomVFlip', 'Resize', 'ResizeJitter', 'ResizeLong', 'ResizeBase', 'Pad', 'RandomPad', 22 | 'PadSize', 'PadSquare', 'PadBase', 'Rotate', 'RandomRotate', 'RandomRotate90', 'RandomCrop', 'BBoxFilter'] 23 | 24 | 25 | class RandomHFlip: 26 | def __call__(self, img, anno=None): 27 | if np.random.randint(2): 28 | if anno: 29 | ih, iw = img.shape[:2] 30 | anno['bboxes'][:, :, 0] = iw - 1 - anno['bboxes'][:, :, 0] 31 | img = hflip(img) 32 | return img, anno 33 | 34 | 35 | class RandomVFlip: 36 | def __call__(self, img, anno=None): 37 | if np.random.randint(2): 38 | if anno: 39 | ih, iw = img.shape[:2] 40 | anno['bboxes'][:, :, 1] = ih - 1 - anno['bboxes'][:, :, 1] 41 | img = vflip(img) 42 | return img, anno 43 | 44 | 45 | class Resize: 46 | def __init__(self, size, interpolate='BILINEAR'): 47 | self.size = containerize(size, 2) 48 | self.interpolate = interpolate 49 | 50 | def __call__(self, img, anno=None): 51 | if anno: 52 | ih, iw = img.shape[:2] 53 | rw, rh = self.size 54 | bboxes = anno['bboxes'] * [rw / iw, rh / ih] 55 | # Convert to rectangle, if not it should not affect much 56 | anno['bboxes'] = np.array([cv.boxPoints(cv.minAreaRect(bbox)) for bbox in bboxes.astype(np.float32)]) 57 | img = resize(img, self.size, self.interpolate) 58 | return img, anno 59 | 60 | 61 | class ResizeJitter: 62 | def __init__(self, scale=(0.8, 1.2)): 63 | self.scale = scale 64 | 65 | def __call__(self, img, anno=None): 66 | ih, iw = img.shape[:2] 67 | rh, rw = [ih, iw] * np.random.uniform(*self.scale, 2) 68 | img, anno = Resize((int(rw), int(rw)))(img, anno) 69 | return img, anno 70 | 71 | 72 | class ResizeLong: 73 | def __init__(self, length, interpolate='BILINEAR'): 74 | self.length = length 75 | self.interpolate = interpolate 76 | 77 | def __call__(self, img, anno=None): 78 | ih, iw = img.shape[:2] 79 | if ih > iw: 80 | size = (int(iw / ih * self.length), self.length) 81 | else: 82 | size = (self.length, int(ih / iw * self.length)) 83 | return Resize(size, self.interpolate)(img, anno) 84 | 85 | 86 | class ResizeBase: 87 | def __init__(self, base, scale=1., interpolate='BILINEAR'): 88 | self.base = base 89 | self.scale = scale 90 | self.interpolate = interpolate 91 | 92 | def __call__(self, img, anno=None): 93 | ih, iw = img.shape[:2] 94 | rh, rw = int(ih * self.scale), int(iw * self.scale) 95 | rh = (rh - rh % self.base + self.base) if rh % self.base else rh 96 | rw = (rw - rw % self.base + self.base) if rw % self.base else rw 97 | return Resize((rw, rh), self.interpolate)(img, anno) 98 | 99 | 100 | class _Pad: 101 | def get_padding(self, img): 102 | raise NotImplementedError 103 | 104 | def __call__(self, img, anno=None): 105 | padding = self.get_padding(img) 106 | if anno: 107 | anno['bboxes'] += [padding[1][0], padding[0][0]] 108 | img = pad(img, padding) 109 | return img, anno 110 | 111 | 112 | class Pad(_Pad): 113 | def __init__(self, padding): 114 | if isinstance(padding, (int, float)): 115 | padding = [[padding, padding], [padding, padding]] 116 | else: 117 | padding = [containerize(p, 2) for p in padding] 118 | self.padding = padding 119 | 120 | def get_padding(self, img): 121 | (ph1, ph2), (pw1, pw2) = self.padding 122 | ih, iw = img.shape[:2] 123 | ph1 = ph1 if isinstance(ph1, int) else int(ph1 * ih) 124 | ph2 = ph2 if isinstance(ph2, int) else int(ph2 * ih) 125 | pw1 = pw1 if isinstance(pw1, int) else int(pw1 * iw) 126 | pw2 = pw2 if isinstance(pw2, int) else int(pw2 * iw) 127 | padding = [[ph1, ph2], [pw1, pw2]] 128 | return padding 129 | 130 | 131 | class RandomPad: 132 | def __init__(self, padding): 133 | self.padding = padding 134 | 135 | def __call__(self, img, anno=None): 136 | ih, iw = img.shape[:2] 137 | if isinstance(self.padding, float): 138 | ph = pw = int(max(ih, iw) * np.random.uniform(0, self.padding)) 139 | else: 140 | ph = pw = random.randint(0, self.padding) 141 | ph1 = random.randint(0, ph) 142 | pw1 = random.randint(0, pw) 143 | return Pad([[ph1, ph - ph1], [pw1, pw - pw1]])(img, anno) 144 | 145 | 146 | class PadSize(_Pad): 147 | def __init__(self, size, check_size=False): 148 | self.size = containerize(size, 2) 149 | self.check_size = check_size 150 | 151 | def get_padding(self, img): 152 | pw, ph = self.size 153 | ih, iw = img.shape[:2] 154 | if self.check_size: 155 | assert ih <= ph and iw <= pw 156 | padding = (max(0, ph - ih) // 2), max(0, (pw - iw) // 2) 157 | padding = [[padding[0], max(0, ph - ih - padding[0])], [padding[1], max(0, pw - iw - padding[1])]] 158 | return padding 159 | 160 | 161 | class PadSquare: 162 | def __call__(self, img, anno=None): 163 | ih, iw = img.shape[:2] 164 | pw = ph = max(ih, iw) 165 | return PadSize([pw, ph])(img, anno) 166 | 167 | 168 | class PadBase: 169 | def __init__(self, base): 170 | self.base = base 171 | 172 | def __call__(self, img, anno=None): 173 | ih, iw = img.shape[:2] 174 | ph = (ih - ih % self.base + self.base) if ih % self.base else ih 175 | pw = (iw - iw % self.base + self.base) if iw % self.base else iw 176 | return PadSize((pw, ph))(img, anno) 177 | 178 | 179 | class Rotate: 180 | def __init__(self, angle, scale=1, expand=False, shift=False): 181 | self.angle = angle 182 | self.scale = scale 183 | self.expand = expand 184 | self.shift = shift 185 | 186 | def __call__(self, img, anno=None): 187 | nh, nw = ih, iw = img.shape[:2] 188 | center = ((iw - 1) / 2, (ih - 1) / 2) 189 | m = cv.getRotationMatrix2D(center, angle=-self.angle, scale=self.scale) 190 | if self.expand or self.shift: 191 | corner = np.array([[0, 0, 1], [iw - 1, 0, 1], [iw - 1, ih - 1, 1], [0, ih - 1, 1]], dtype=np.float32) 192 | corner = np.matmul(m, corner.T).T 193 | left, top = np.min(corner, axis=0) 194 | right, bottom = np.max(corner, axis=0) 195 | dx = (right - left - iw) / 2 196 | dy = (bottom - top - ih) / 2 197 | if self.expand: 198 | nw = int(np.ceil(right - left)) 199 | nh = int(np.ceil(bottom - top)) 200 | shiftX = dx 201 | shiftY = dy 202 | else: 203 | shiftX = np.random.uniform(-dx, dx) if dx > 0 else 0 204 | shiftY = np.random.uniform(-dy, dy) if dy > 0 else 0 205 | m[0, 2] += shiftX 206 | m[1, 2] += shiftY 207 | if anno: 208 | bound = (nw / 2, nh / 2), (nw, nh), 0 209 | bboxes, labels = [], [] 210 | for bbox, label in zip(anno['bboxes'], anno['labels']): 211 | corner = np.matmul(m, np.c_[bbox, np.ones((4, 1))].T).T 212 | if not self.expand: 213 | x, y, w, h, a = xy42xywha(corner) 214 | inter_points = cv.rotatedRectangleIntersection(bound, ((x, y), (w, h), a))[1] 215 | if inter_points is not None: 216 | order_points = cv.convexHull(inter_points, returnPoints=True) 217 | inter_area = cv.contourArea(order_points) 218 | iou = inter_area / (w * h) 219 | if iou >= 0.5: 220 | corner = cv.boxPoints(cv.minAreaRect(order_points)) 221 | else: 222 | continue 223 | bboxes.append(corner) 224 | labels.append(label) 225 | if bboxes: 226 | anno['bboxes'] = np.stack(bboxes) 227 | anno['labels'] = np.stack(labels) 228 | else: 229 | anno = None 230 | img = cv.warpAffine(img, m, (nw, nh)) 231 | return img, anno 232 | 233 | 234 | class RandomRotate: 235 | def __init__(self, angle=180, scale=1, expand=False, shift=False): 236 | self.angle = (-angle, angle) if isinstance(angle, (int, float)) else angle 237 | self.scale = containerize(scale, 2) 238 | self.expand = expand 239 | self.shift = shift 240 | 241 | def __call__(self, img, anno=None): 242 | angle = np.random.uniform(*self.angle) 243 | scale = np.random.uniform(*self.scale) 244 | return Rotate(angle, scale, self.expand, self.shift)(img, anno) 245 | 246 | 247 | class RandomRotate90: 248 | def __init__(self, k=(0, 1, 2, 3)): # CLOCKWISE 249 | self.k = k 250 | 251 | def __call__(self, img, anno=None): 252 | k = np.random.choice(self.k) 253 | ih, iw = img.shape[:2] 254 | if anno: 255 | if k == 1: 256 | anno['bboxes'][:, :, 1] = ih - 1 - anno['bboxes'][:, :, 1] 257 | anno['bboxes'] = anno['bboxes'][:, :, [1, 0]] 258 | if k == 2: 259 | anno['bboxes'] = ([iw - 1, ih - 1] - anno['bboxes']) 260 | if k == 3: 261 | anno['bboxes'][:, :, 0] = iw - 1 - anno['bboxes'][:, :, 0] 262 | anno['bboxes'] = anno['bboxes'][:, :, [1, 0]] 263 | img = rotate90(img, k) 264 | return img, anno 265 | 266 | 267 | class RandomCrop: 268 | def __init__(self, size, max_aspect=1.0, iou_thresh=0.5, max_try=100, nonempty=True): 269 | self.size = size 270 | self.max_aspect = max_aspect 271 | self.iou_thresh = iou_thresh 272 | self.max_try = max_try 273 | self.nonempty = nonempty 274 | 275 | def __call__(self, img, anno=None): 276 | ih, iw = img.shape[:2] 277 | polygons = [] 278 | if anno: 279 | for bbox in anno['bboxes']: 280 | x, y, w, h, a = xy42xywha(bbox) 281 | polygons.append(((x, y), (w, h), a)) 282 | for count in range(self.max_try): 283 | if isinstance(self.size, int): 284 | nh = nw = min(ih, iw, self.size) 285 | else: 286 | if self.max_aspect == 1: 287 | nh = nw = random.randint(min(ih, iw, self.size[0]), min(ih, iw, self.size[1])) 288 | else: 289 | nh = random.randint(min(ih, self.size[0]), min(ih, self.size[1])) 290 | nw = random.randint(min(iw, self.size[0]), min(iw, self.size[1])) 291 | if max(nh / nw, nw / nh) > self.max_aspect: 292 | continue 293 | oh = random.randint(0, ih - nh) 294 | ow = random.randint(0, iw - nw) 295 | a = np.random.uniform(0, 360) 296 | src = xywha2xy4([ow + nw / 2, oh + nh / 2, nw, nh, a]) 297 | dst = np.array([[0, 0], [nw, 0], [nw, nh]], dtype=np.float32) 298 | m = cv.getAffineTransform(src.astype(np.float32)[:3], dst) 299 | if anno: 300 | bound = (ow + nw / 2, oh + nh / 2), (nw, nh), a 301 | iou, intersections = [], [] 302 | for polygon in polygons: 303 | inter_points = cv.rotatedRectangleIntersection(bound, polygon)[1] 304 | if inter_points is None: 305 | iou.append(0) 306 | intersections.append(None) 307 | else: 308 | order_points = cv.convexHull(inter_points, returnPoints=True) 309 | inter_area = cv.contourArea(order_points) 310 | iou.append(inter_area / (polygon[1][0] * polygon[1][1])) 311 | intersections.append(cv.boxPoints(cv.minAreaRect(order_points))) 312 | iou = np.array(iou) 313 | if isinstance(self.iou_thresh, float): 314 | mask = iou >= self.iou_thresh 315 | else: 316 | mask = (iou > self.iou_thresh[0]) & (iou < self.iou_thresh[1]) 317 | if np.any(mask): 318 | continue 319 | mask = iou >= self.iou_thresh[1] 320 | if np.any(mask): 321 | bboxes = np.array([inter for inter, m in zip(intersections, mask) if m]) 322 | bboxes = np.concatenate([bboxes, np.ones_like(bboxes[:, :, [0]])], axis=-1) 323 | bboxes = np.matmul(m, bboxes.transpose([0, 2, 1])).transpose([0, 2, 1]) 324 | anno['bboxes'] = bboxes 325 | anno['labels'] = anno['labels'][mask] 326 | else: 327 | if self.nonempty: 328 | continue 329 | else: 330 | anno = None 331 | img = cv.warpAffine(img, m, (nw, nh)) 332 | break 333 | return img, anno 334 | 335 | 336 | class BBoxFilter: 337 | def __init__(self, min_area): 338 | self.min_area = min_area 339 | 340 | def __call__(self, img, anno=None): 341 | if anno: 342 | wh = np.stack([xy42xywha(bbox)[2:4] for bbox in anno['bboxes']]) 343 | area = wh[:, 0] * wh[:, 1] 344 | mask = area >= self.min_area 345 | if np.any(mask): 346 | anno['bboxes'] = anno['bboxes'][mask] 347 | anno['labels'] = anno['labels'][mask] 348 | else: 349 | anno.clear() 350 | return img, anno 351 | -------------------------------------------------------------------------------- /data/aug/ops/ops_img.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import cv2 as cv 4 | import numpy as np 5 | 6 | from ..func import * 7 | from ..compose import Compose 8 | 9 | 10 | __all__ = ['ToFloat', 'Normalize', 'ConvertColor', 'RandomGray', 'RandomBrightness', 'RandomContrast', 11 | 'RandomLightingNoise', 'RandomHue', 'RandomSaturation', 'PhotometricDistort'] 12 | 13 | 14 | class ToFloat: 15 | def __call__(self, img, anno=None): 16 | img = img.astype(np.float32) 17 | return img, anno 18 | 19 | 20 | class Normalize: 21 | def __init__(self, mean, std): 22 | self.mean = mean 23 | self.std = std 24 | 25 | def __call__(self, img, anno=None): 26 | img = (img - self.mean) / self.std 27 | return img, anno 28 | 29 | 30 | class ConvertColor: 31 | def __init__(self, current='RGB', transform='HSV'): 32 | self.transform = transform 33 | self.current = current 34 | 35 | def __call__(self, img, anno=None): 36 | if self.current == 'RGB' and self.transform == 'HSV': 37 | img = cv.cvtColor(img, cv.COLOR_RGB2HSV) 38 | elif self.current == 'HSV' and self.transform == 'RGB': 39 | img = cv.cvtColor(img, cv.COLOR_HSV2RGB) 40 | else: 41 | raise NotImplementedError 42 | return img, anno 43 | 44 | 45 | class RandomGray: # RGB 46 | def __call__(self, img, anno=None): 47 | if np.random.randint(2): 48 | img = rgb2gray(img) 49 | return img, anno 50 | 51 | 52 | class RandomBrightness: # RGB 53 | def __init__(self, delta=32): 54 | assert 0 <= delta <= 255 55 | self.delta = delta 56 | 57 | def __call__(self, img, anno=None): 58 | if np.random.randint(2): 59 | delta = np.random.uniform(-self.delta, self.delta) 60 | img = np.clip(img + delta, 0, 255) 61 | return img, anno 62 | 63 | 64 | class RandomContrast: # RGB 65 | def __init__(self, lower=0.5, upper=1.5): 66 | assert 0 < lower < upper 67 | self.lower = lower 68 | self.upper = upper 69 | 70 | def __call__(self, img, anno=None): 71 | if np.random.randint(2): 72 | alpha = np.random.uniform(self.lower, self.upper) 73 | img = np.clip(alpha * img, 0, 255) 74 | return img, anno 75 | 76 | 77 | class RandomLightingNoise: # RGB 78 | def __call__(self, img, anno=None): 79 | if np.random.randint(2): 80 | indexes = [0, 1, 2] 81 | np.random.shuffle(indexes) 82 | img = img[..., indexes] 83 | return img, anno 84 | 85 | 86 | class RandomHue: # HSV 87 | def __init__(self, delta=18.0): 88 | assert 0 <= delta <= 360 89 | self.delta = delta 90 | 91 | def __call__(self, img, anno=None): 92 | if np.random.randint(2): 93 | delta = np.random.uniform(-self.delta, self.delta) 94 | img[:, :, 0] = (img[:, :, 0] + delta) % 360 95 | return img, anno 96 | 97 | 98 | class RandomSaturation: # HSV 99 | def __init__(self, lower=0.5, upper=1.5): 100 | assert 0 < lower < upper 101 | self.lower = lower 102 | self.upper = upper 103 | 104 | def __call__(self, img, anno=None): 105 | if np.random.randint(2): 106 | alpha = np.random.uniform(self.lower, self.upper) 107 | img[:, :, 1] = np.clip(alpha * img[:, :, 1], 0, 1) 108 | return img, anno 109 | 110 | 111 | class PhotometricDistort: 112 | def __init__(self, prob_light_noise=0.2, prob_gray=0.2): 113 | self.prob_light_noise = prob_light_noise 114 | self.prob_gray = prob_gray 115 | self.pd = [ 116 | RandomContrast(), 117 | ConvertColor(current='RGB', transform='HSV'), 118 | RandomSaturation(), 119 | RandomHue(), 120 | ConvertColor(current='HSV', transform='RGB'), 121 | RandomContrast() 122 | ] 123 | self.rand_brightness = RandomBrightness() 124 | self.rand_light_noise = RandomLightingNoise() 125 | self.rand_gray = RandomGray() 126 | 127 | def __call__(self, img, anno=None): 128 | img, anno = self.rand_brightness(img, anno) 129 | distort = Compose(self.pd[:-1] if np.random.randint(2) else self.pd[1:]) 130 | img, anno = distort(img, anno) 131 | if np.random.randint(2): 132 | if np.random.rand() < self.prob_light_noise: 133 | img, anno = self.rand_light_noise(img, anno) 134 | else: 135 | if np.random.rand() < self.prob_gray: 136 | img, anno = self.rand_gray(img, anno) 137 | return img, anno 138 | -------------------------------------------------------------------------------- /data/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from .dataset import DetDataset 4 | from .dota import DOTA 5 | from .hrsc2016 import HRSC2016 6 | from .usca_aod import UCAS_AOD 7 | -------------------------------------------------------------------------------- /data/dataset/dataset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : dataset.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 10:44 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import os 12 | import json 13 | import torch 14 | import numpy as np 15 | 16 | from copy import deepcopy 17 | from torch.utils.data import Dataset 18 | from utils.misc import convert_path 19 | from utils.image import imread 20 | from utils.box.bbox_np import xy42xywha 21 | 22 | 23 | class DetDataset(Dataset): 24 | def __init__(self, root, image_set, names, aug=None, color_space='RGB'): 25 | self.names = names 26 | self.aug = aug 27 | self.color_space = color_space 28 | self.label2name = dict((label, name) for label, name in enumerate(self.names)) 29 | self.name2label = dict((name, label) for label, name in enumerate(self.names)) 30 | self.dataset = self.load_dataset(root, image_set) 31 | 32 | @staticmethod 33 | def load_dataset(root, image_set): 34 | image_sets = [image_set] if isinstance(image_set, str) else image_set 35 | dataset = [] 36 | for image_set in image_sets: 37 | for img, anno in json.load(open(os.path.join(root, 'image-sets', f'{image_set}.json'))): 38 | img = os.path.join(root, convert_path(img)) 39 | anno = (os.path.join(root, convert_path(anno)) if anno else None) 40 | dataset.append([img, anno]) 41 | return dataset 42 | 43 | @staticmethod 44 | def load_objs(path, name2label=None): 45 | objs = None 46 | if path: 47 | objs = json.load(open(path)) 48 | bboxes = [obj['bbox'] for obj in objs] 49 | labels = [name2label[obj['name']] if name2label else obj['name'] for obj in objs] 50 | objs = {'bboxes': np.array(bboxes, dtype=np.float32), 'labels': np.array(labels)} 51 | return objs 52 | 53 | @staticmethod 54 | def convert_objs(objs): 55 | target = dict() 56 | if objs: 57 | # Limit the angle between -45° and 45° by set flag=2 58 | target['bboxes'] = torch.from_numpy(np.stack([xy42xywha(bbox, flag=2) for bbox in objs['bboxes']])).float() 59 | target['labels'] = torch.from_numpy(objs['labels']).long() 60 | return target 61 | 62 | def __getitem__(self, index): 63 | img_path, anno_path = self.dataset[index] 64 | img = imread(img_path, self.color_space) 65 | objs = self.load_objs(anno_path, self.name2label) 66 | info = {'img_path': img_path, 'anno_path': anno_path, 'shape': img.shape, 'objs': objs} 67 | if self.aug is not None: 68 | img, objs = self.aug(img, deepcopy(objs)) 69 | return img, objs, info 70 | 71 | @staticmethod 72 | def collate(batch): 73 | images, targets, infos = [], [], [] 74 | # Ensure data balance when parallelizing 75 | batch = sorted(batch, key=lambda x: len(x[1]['labels']) if x[1] else 0) 76 | for i, (img, objs, info) in enumerate(batch): 77 | images.append(torch.from_numpy(img).reshape(*img.shape[:2], -1).float()) 78 | targets.append(DetDataset.convert_objs(objs)) 79 | infos.append(info) 80 | return torch.stack(images).permute(0, 3, 1, 2), targets, infos 81 | 82 | def __len__(self): 83 | return len(self.dataset) 84 | -------------------------------------------------------------------------------- /data/dataset/dota.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from config import CATEGORY_DOTA_V10 as NAMES 4 | 5 | from .dataset import DetDataset 6 | 7 | 8 | class DOTA(DetDataset): 9 | def __init__(self, root, image_sets, aug=None): 10 | super(DOTA, self).__init__(root, image_sets, NAMES, aug) 11 | -------------------------------------------------------------------------------- /data/dataset/hrsc2016.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from config import CATEGORY_HRSC2016 as NAMES 4 | 5 | from .dataset import DetDataset 6 | 7 | 8 | class HRSC2016(DetDataset): 9 | def __init__(self, root, image_sets, aug=None): 10 | super(HRSC2016, self).__init__(root, image_sets, NAMES, aug) 11 | -------------------------------------------------------------------------------- /data/dataset/usca_aod.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from config import CATEGORY_UCAS_AOD as NAMES 4 | 5 | from .dataset import DetDataset 6 | 7 | 8 | class UCAS_AOD(DetDataset): 9 | def __init__(self, root, image_sets, aug=None): 10 | super(UCAS_AOD, self).__init__(root, image_sets, NAMES, aug) 11 | -------------------------------------------------------------------------------- /demo/graphical-abstract.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Capino512/pytorch-rotation-decoupled-detector/c00d7ea3e33d10f00dfd3198b33eb3e94de61239/demo/graphical-abstract.png -------------------------------------------------------------------------------- /demo/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Capino512/pytorch-rotation-decoupled-detector/c00d7ea3e33d10f00dfd3198b33eb3e94de61239/demo/result.png -------------------------------------------------------------------------------- /model/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from .resnet import * 4 | from .darknet import darknet21, darknet53 5 | -------------------------------------------------------------------------------- /model/backbone/darknet.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import os 4 | import torch 5 | 6 | from torch import nn 7 | from xtorch import xnn 8 | from config import DIR_WEIGHT 9 | from utils.init import weight_init 10 | 11 | # all pre-trained on image-net 12 | 13 | weights = { 14 | # from YOLO-v3 15 | 'darknet21': os.path.join(DIR_WEIGHT, 'darknet', 'darknet21.pth'), 16 | 'darknet53': os.path.join(DIR_WEIGHT, 'darknet', 'darknet53.pth'), 17 | } 18 | 19 | 20 | def CBR(plane, kernel_size, stride=1, padding=0): 21 | return nn.Sequential(xnn.Conv2d(plane, kernel_size, stride, padding, bias=False), 22 | xnn.BatchNorm2d(), 23 | nn.ReLU(inplace=True)) 24 | 25 | 26 | class BasicBlock(xnn.Module): 27 | def __init__(self, plane): 28 | super(BasicBlock, self).__init__() 29 | self.body = nn.Sequential(CBR(plane // 2, kernel_size=1, stride=1, padding=0), 30 | CBR(plane, kernel_size=3, stride=1, padding=1)) 31 | 32 | def forward(self, x): 33 | return x + self.body(x) 34 | 35 | 36 | class Backbone(xnn.Module): 37 | def __init__(self, layers, name=None, fetch_feature=False): 38 | super(Backbone, self).__init__() 39 | self.name = name 40 | self.fetch_feature = fetch_feature 41 | self.head = CBR(32, kernel_size=3, stride=1, padding=1) 42 | self.layers = nn.ModuleList([self._make_layer(64 * 2 ** i, blocks) for i, blocks in enumerate(layers)]) 43 | 44 | @staticmethod 45 | def _make_layer(plane, blocks): 46 | layers = [CBR(plane, kernel_size=3, stride=2, padding=1)] 47 | for i in range(0, blocks): 48 | layers.append(BasicBlock(plane)) 49 | return nn.Sequential(*layers) 50 | 51 | def init(self): 52 | if self.name in weights: 53 | print('load pre-training weights for', self.name) 54 | weight = torch.load(weights[self.name]) 55 | ret = self.load_state_dict(weight, strict=False) 56 | print(ret) 57 | else: 58 | self.apply(weight_init['normal']) 59 | 60 | def forward(self, x): 61 | feature = self.head(x) 62 | features = [] 63 | for layer in self.layers: 64 | feature = layer(feature) 65 | if self.fetch_feature: 66 | features.append(feature) 67 | return features if self.fetch_feature else feature 68 | 69 | 70 | def darknet21(fetch_feature=False): 71 | return Backbone([1, 1, 2, 2, 1], 'darknet21', fetch_feature) 72 | 73 | 74 | def darknet53(fetch_feature=False): 75 | return Backbone([1, 2, 8, 8, 4], 'darknet53', fetch_feature) 76 | -------------------------------------------------------------------------------- /model/backbone/resnet/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from .resnet import resnet18, resnet34, resnet50, resnet101, resnet152 4 | from .resnet import resnest50, resnest101, resnest200, resnest269 5 | from .resnet import resnext50_32x4d, resnext101_32x8d 6 | from .resnet import resnet18_d, resnet34_d, resnet50_d, resnet101_d, resnet152_d 7 | -------------------------------------------------------------------------------- /model/backbone/resnet/resnet.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import os 4 | import torch 5 | 6 | from torch import nn 7 | from xtorch import xnn 8 | from config import DIR_WEIGHT 9 | from utils.init import weight_init 10 | 11 | from .splat import SplAtConv2d 12 | 13 | # all pre-trained on image-net 14 | 15 | weights = { 16 | # from pytorch 17 | 'resnet18': os.path.join(DIR_WEIGHT, 'resnet', 'resnet18-5c106cde.pth'), 18 | 'resnet34': os.path.join(DIR_WEIGHT, 'resnet', 'resnet34-333f7ec4.pth'), 19 | 'resnet50': os.path.join(DIR_WEIGHT, 'resnet', 'resnet50-19c8e357.pth'), 20 | 'resnet101': os.path.join(DIR_WEIGHT, 'resnet', 'resnet101-5d3b4d8f.pth'), 21 | 'resnet152': os.path.join(DIR_WEIGHT, 'resnet', 'resnet152-b121ed2d.pth'), 22 | 'resnext50': os.path.join(DIR_WEIGHT, 'resnet', 'resnext50_32x4d-7cdf4587.pth'), 23 | 'resnext101': os.path.join(DIR_WEIGHT, 'resnet', 'resnext101_32x8d-8ba56ff5.pth'), 24 | 25 | # from https://github.com/zhanghang1989/ResNeSt 26 | 'resnest50': os.path.join(DIR_WEIGHT, 'resnet', 'resnest50-528c19ca.pth'), 27 | 'resnest101': os.path.join(DIR_WEIGHT, 'resnet', 'resnest101-22405ba7.pth'), 28 | 'resnest200': os.path.join(DIR_WEIGHT, 'resnet', 'resnest200-75117900.pth'), 29 | 'resnest269': os.path.join(DIR_WEIGHT, 'resnet', 'resnest269-0cc87c48.pth'), 30 | 31 | 'resnet50-d': os.path.join(DIR_WEIGHT, 'resnet', 'resnet50_v1d.pth'), 32 | 'resnet101-d': os.path.join(DIR_WEIGHT, 'resnet', 'resnet101_v1d.pth'), 33 | 'resnet152-d': os.path.join(DIR_WEIGHT, 'resnet', 'resnet152_v1d.pth'), 34 | } 35 | 36 | 37 | class BasicBlock(xnn.Module): 38 | expansion = 1 39 | 40 | def __init__(self, planes, stride=1, downsample=None, *args, **kwargs): 41 | super(BasicBlock, self).__init__() 42 | self.body = nn.Sequential(xnn.Conv2d(planes, 3, stride, 1, bias=False), 43 | xnn.BatchNorm2d(), 44 | nn.ReLU(inplace=True), 45 | xnn.Conv2d(planes, 3, 1, 1, bias=False), 46 | xnn.BatchNorm2d()) 47 | self.downsample = downsample 48 | 49 | def forward(self, x): 50 | return nn.ReLU(inplace=True)(self.body(x) + (x if self.downsample is None else self.downsample(x))) 51 | 52 | 53 | class Bottleneck(nn.Module): 54 | expansion = 4 55 | 56 | def __init__(self, planes, stride=1, downsample=None, radix=1, cardinality=1, bottleneck_width=64, avd=False, 57 | avd_first=False, dilation=1, is_first=False): 58 | super(Bottleneck, self).__init__() 59 | group_width = int(planes * (bottleneck_width / 64)) * cardinality 60 | avd = avd and (stride > 1 or is_first) 61 | 62 | body = [xnn.Conv2d(group_width, kernel_size=1, bias=False), xnn.BatchNorm2d(), nn.ReLU(inplace=True)] 63 | if avd: 64 | avd_layer = nn.AvgPool2d(3, stride, padding=1) 65 | stride = 1 66 | if avd_first: 67 | body.append(avd_layer) 68 | if radix > 1: 69 | body.append(SplAtConv2d(group_width, 3, stride, dilation, dilation, cardinality, bias=False, radix=radix)) 70 | else: 71 | body.append(xnn.Conv2d(group_width, 3, stride, dilation, dilation, cardinality, bias=False)) 72 | body.append(xnn.BatchNorm2d()) 73 | body.append(nn.ReLU(inplace=True)) 74 | if avd and not avd_first: 75 | body.append(avd_layer) 76 | body.append(xnn.Conv2d(planes * self.expansion, 1, bias=False)) 77 | body.append(xnn.BatchNorm2d()) 78 | self.body = nn.Sequential(*body) 79 | self.downsample = downsample 80 | 81 | def forward(self, x): 82 | return nn.ReLU(inplace=True)(self.body(x) + (x if self.downsample is None else self.downsample(x))) 83 | 84 | 85 | class Backbone(xnn.Module): 86 | def __init__(self, block, layers, name=None, fetch_feature=False, radix=1, groups=1, bottleneck_width=64, 87 | dilated=False, dilation=1, deep_stem=False, stem_width=64, avg_down=False, avd=False, avd_first=False): 88 | self.cardinality = groups 89 | self.bottleneck_width = bottleneck_width 90 | # ResNet-D params 91 | self.inplanes = stem_width * 2 if deep_stem else 64 92 | self.avg_down = avg_down 93 | # ResNeSt params 94 | self.radix = radix 95 | self.avd = avd 96 | self.avd_first = avd_first 97 | 98 | super(Backbone, self).__init__() 99 | 100 | self.name = name 101 | self.fetch_feature = fetch_feature 102 | 103 | if deep_stem: 104 | head = [xnn.Conv2d(stem_width, kernel_size=3, stride=2, padding=1, bias=False), 105 | xnn.BatchNorm2d(), 106 | nn.ReLU(inplace=True), 107 | xnn.Conv2d(stem_width, kernel_size=3, stride=1, padding=1, bias=False), 108 | xnn.BatchNorm2d(), 109 | nn.ReLU(inplace=True), 110 | xnn.Conv2d(stem_width * 2, kernel_size=3, stride=1, padding=1, bias=False)] 111 | else: 112 | head = [xnn.Conv2d(64, kernel_size=7, stride=2, padding=3, bias=False)] 113 | self.head = nn.Sequential(*head, xnn.BatchNorm2d(), nn.ReLU(inplace=True), 114 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) 115 | self.layer1 = self._make_layer(block, 64, layers[0], is_first=False) 116 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 117 | if dilated or dilation == 4: 118 | self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2) 119 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4) 120 | elif dilation == 2: 121 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilation=1) 122 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=2) 123 | else: 124 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 125 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 126 | 127 | def init(self): 128 | if self.name in weights: 129 | print('load pre-training weights for', self.name) 130 | weight = torch.load(weights[self.name]) 131 | ret = self.load_state_dict(weight, strict=False) 132 | print(ret) 133 | else: 134 | self.apply(weight_init['kaiming_normal']) 135 | 136 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1, is_first=True): 137 | downsample = None 138 | if stride != 1 or self.inplanes != planes * block.expansion: 139 | down_layers = [] 140 | if self.avg_down: 141 | if dilation == 1: 142 | down_layers.append( 143 | nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False)) 144 | else: 145 | down_layers.append(nn.AvgPool2d(kernel_size=1, stride=1, ceil_mode=True, count_include_pad=False)) 146 | down_layers.append(xnn.Conv2d(planes * block.expansion, kernel_size=1, stride=1, bias=False)) 147 | else: 148 | down_layers.append(xnn.Conv2d(planes * block.expansion, kernel_size=1, stride=stride, bias=False)) 149 | down_layers.append(xnn.BatchNorm2d()) 150 | downsample = nn.Sequential(*down_layers) 151 | 152 | layers = [] 153 | if dilation == 1 or dilation == 2: 154 | layers.append( 155 | block(planes, stride, downsample, self.radix, self.cardinality, self.bottleneck_width, self.avd, 156 | self.avd_first, 1, is_first)) 157 | elif dilation == 4: 158 | layers.append( 159 | block(planes, stride, downsample, self.radix, self.cardinality, self.bottleneck_width, self.avd, 160 | self.avd_first, 2, is_first)) 161 | else: 162 | raise RuntimeError("=> unknown dilation size: {}".format(dilation)) 163 | 164 | self.inplanes = planes * block.expansion 165 | for i in range(1, blocks): 166 | layers.append( 167 | block(planes, 1, None, self.radix, self.cardinality, self.bottleneck_width, self.avd, self.avd_first, 168 | dilation)) 169 | 170 | return nn.Sequential(*layers) 171 | 172 | def forward(self, x): 173 | x = self.head(x) 174 | 175 | x1 = self.layer1(x) 176 | x2 = self.layer2(x1) 177 | x3 = self.layer3(x2) 178 | x4 = self.layer4(x3) 179 | 180 | return [x1, x2, x3, x4] if self.fetch_feature else x4 181 | 182 | 183 | def resnet18(fetch_feature=False): 184 | return Backbone(BasicBlock, (2, 2, 2, 2), 'resnet18', fetch_feature) 185 | 186 | 187 | def resnet34(fetch_feature=False): 188 | return Backbone(BasicBlock, (3, 4, 6, 3), 'resnet34', fetch_feature) 189 | 190 | 191 | def resnet50(fetch_feature=False): 192 | return Backbone(Bottleneck, (3, 4, 6, 3), 'resnet50', fetch_feature) 193 | 194 | 195 | def resnet101(fetch_feature=False): 196 | return Backbone(Bottleneck, (3, 4, 23, 3), 'resnet101', fetch_feature) 197 | 198 | 199 | def resnet152(fetch_feature=False): 200 | return Backbone(Bottleneck, (3, 8, 36, 3), 'resnet152', fetch_feature) 201 | 202 | 203 | def resnest50(fetch_feature=False): 204 | return Backbone(Bottleneck, (3, 4, 6, 3), 'resnest50', fetch_feature, radix=2, deep_stem=True, stem_width=32, 205 | avg_down=True, avd=True, avd_first=False) 206 | 207 | 208 | def resnest101(fetch_feature=False): 209 | return Backbone(Bottleneck, (3, 4, 23, 3), 'resnest101', fetch_feature, radix=2, deep_stem=True, stem_width=64, 210 | avg_down=True, avd=True, avd_first=False) 211 | 212 | 213 | def resnest200(fetch_feature=False): 214 | return Backbone(Bottleneck, (3, 24, 36, 3), 'resnest200', fetch_feature, radix=2, deep_stem=True, stem_width=64, 215 | avg_down=True, avd=True, avd_first=False) 216 | 217 | 218 | def resnest269(fetch_feature=False): 219 | return Backbone(Bottleneck, (3, 30, 48, 8), 'resnest269', fetch_feature, radix=2, deep_stem=True, stem_width=64, 220 | avg_down=True, avd=True, avd_first=False) 221 | 222 | 223 | def resnext50_32x4d(fetch_feature=False): 224 | return Backbone(Bottleneck, (3, 4, 6, 3), 'resnext50-32x4d', fetch_feature, groups=32, bottleneck_width=4) 225 | 226 | 227 | def resnext101_32x8d(fetch_feature=False): 228 | return Backbone(Bottleneck, (3, 4, 23, 3), 'resnext101-32x8d', fetch_feature, groups=32, bottleneck_width=8) 229 | 230 | 231 | def resnet18_d(fetch_feature=False): 232 | return Backbone(BasicBlock, (2, 2, 2, 2), 'resnet18-d', fetch_feature, deep_stem=True, stem_width=32) 233 | 234 | 235 | def resnet34_d(fetch_feature=False): 236 | return Backbone(BasicBlock, (3, 4, 6, 3), 'resnet34-d', fetch_feature, deep_stem=True, stem_width=32) 237 | 238 | 239 | def resnet50_d(fetch_feature=False): 240 | return Backbone(Bottleneck, (3, 4, 6, 3), 'resnet50-d', fetch_feature, deep_stem=True, stem_width=32) 241 | 242 | 243 | def resnet101_d(fetch_feature=False): 244 | return Backbone(Bottleneck, (3, 4, 23, 3), 'resnet101-d', fetch_feature, deep_stem=True, stem_width=32) 245 | 246 | 247 | def resnet152_d(fetch_feature=False): 248 | return Backbone(Bottleneck, (3, 8, 36, 3), 'resnet152-d', fetch_feature, deep_stem=True, stem_width=32) 249 | -------------------------------------------------------------------------------- /model/backbone/resnet/splat.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import torch 4 | 5 | from torch import nn 6 | from torch.nn import functional as F 7 | from xtorch import xnn 8 | 9 | 10 | class SplAtConv2d(xnn.Module): 11 | def __init__(self, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', radix=2, reduction_factor=4): 12 | super(SplAtConv2d, self).__init__() 13 | inter_channels = max(out_channels * radix // reduction_factor, 32) 14 | self.radix = radix 15 | self.conv = xnn.Conv2d(out_channels * radix, kernel_size, stride, padding, dilation, groups * radix, bias, padding_mode) 16 | self.bn0 = xnn.BatchNorm2d() 17 | self.relu = nn.ReLU(inplace=True) 18 | self.fc1 = xnn.Conv2d(inter_channels, 1, groups=groups) 19 | self.bn1 = xnn.BatchNorm2d() 20 | self.fc2 = xnn.Conv2d(out_channels * radix, 1, groups=groups) 21 | self.rsoftmax = rSoftMax(radix, groups) 22 | 23 | def forward(self, x): 24 | x = self.conv(x) 25 | x = self.bn0(x) 26 | x = self.relu(x) 27 | split = torch.chunk(x, self.radix, 1) 28 | gap = sum(split) 29 | gap = F.adaptive_avg_pool2d(gap, (1, 1)) 30 | gap = self.fc1(gap) 31 | gap = self.bn1(gap) 32 | gap = self.relu(gap) 33 | atten = self.fc2(gap) 34 | atten = self.rsoftmax(atten) 35 | atten = torch.chunk(atten, self.radix, 1) 36 | out = sum([att * split for (att, split) in zip(atten, split)]) 37 | return out 38 | 39 | 40 | class rSoftMax(xnn.Module): 41 | def __init__(self, radix, cardinality): 42 | super().__init__() 43 | self.radix = radix 44 | self.cardinality = cardinality 45 | 46 | def forward(self, x): 47 | shape = x.shape 48 | if self.radix > 1: 49 | x = x.view(x.size(0), self.cardinality, self.radix, -1).transpose(1, 2) 50 | x = F.softmax(x, dim=1) 51 | x = x.reshape(shape) 52 | else: 53 | x = torch.sigmoid(x) 54 | return x 55 | -------------------------------------------------------------------------------- /model/rdd/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from .rdd import RDD 4 | -------------------------------------------------------------------------------- /model/rdd/rdd.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : rdd.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 10:58 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import torch 12 | 13 | from torch import nn 14 | from xtorch import xnn 15 | from utils.init import weight_init 16 | 17 | from .utils.modules import FeaturePyramidNet, DetPredict 18 | from .utils.priorbox import LFUPriorBox 19 | from .utils.loss import calc_loss 20 | from .utils.detect import detect 21 | 22 | 23 | class RDD(xnn.Module): 24 | def __init__(self, backbone, cfg): 25 | super(RDD, self).__init__() 26 | 27 | cfg.setdefault('iou_thresh', [0.4, 0.5]) 28 | cfg.setdefault('variance', [0.1, 0.2, 0.1]) 29 | cfg.setdefault('balance', 0.5) 30 | 31 | cfg.setdefault('conf_thresh', 0.01) 32 | cfg.setdefault('nms_thresh', 0.5) 33 | cfg.setdefault('top_n', None) 34 | 35 | cfg.setdefault('extra', 0) 36 | cfg.setdefault('fpn_plane', 256) 37 | cfg.setdefault('extra_plane', 512) 38 | 39 | self.backbone = backbone 40 | self.prior_box = LFUPriorBox(cfg['prior_box']) 41 | self.num_levels = self.prior_box.num_levels 42 | self.num_classes = cfg['num_classes'] 43 | self.iou_thresh = cfg['iou_thresh'] 44 | self.variance = cfg['variance'] 45 | self.balance = cfg['balance'] 46 | 47 | self.conf_thresh = cfg['conf_thresh'] 48 | self.nms_thresh = cfg['nms_thresh'] 49 | self.top_n = cfg['top_n'] 50 | 51 | self.extra = cfg['extra'] 52 | self.fpn_plane = cfg['fpn_plane'] 53 | self.extra_plane = cfg['extra_plane'] 54 | 55 | self.fpn = FeaturePyramidNet(self.num_levels, self.fpn_plane) 56 | self.predict = DetPredict(self.num_levels, self.fpn_plane, self.prior_box.num_prior_boxes, self.num_classes, 5) 57 | 58 | if self.extra > 0: 59 | self.extra_layers = nn.ModuleList() 60 | for i in range(self.extra): 61 | self.extra_layers.append(nn.Sequential(xnn.Conv2d(self.extra_plane, 3, 2, 1, bias=False), 62 | xnn.BatchNorm2d(), 63 | nn.ReLU(inplace=True))) 64 | 65 | def init(self): 66 | self.apply(weight_init['normal']) 67 | self.backbone.init() 68 | 69 | def restore(self, path): 70 | weight = torch.load(path) 71 | self.load_state_dict(weight, strict=True) 72 | 73 | def forward(self, images, targets=None): 74 | features = list(self.backbone(images)) 75 | features = features[-(self.num_levels - self.extra):] 76 | if self.extra > 0: 77 | for layer in self.extra_layers: 78 | features.append(layer(features[-1])) 79 | features = self.fpn(features) 80 | 81 | pred_cls, pred_loc = self.predict(features) 82 | anchors = self.prior_box.get_anchors(images.shape[2:]).to(images) 83 | if self.training: 84 | if targets is not None: 85 | return calc_loss(pred_cls, pred_loc, targets, anchors, self.iou_thresh, self.variance, self.balance) 86 | else: 87 | pred_cls, pred_loc = pred_cls.detach(), pred_loc.detach() 88 | top_n = (images.size(2) // 32) * (images.size(3) // 32) if self.top_n is None else self.top_n 89 | return detect(pred_cls, pred_loc, anchors, self.variance, self.conf_thresh, self.nms_thresh, top_n) 90 | -------------------------------------------------------------------------------- /model/rdd/utils/detect.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : detect.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 10:58 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import torch 12 | 13 | from collections import Counter 14 | from utils.box.bbox import decode 15 | from utils.box.rbbox import rbbox_batched_nms as nms 16 | 17 | 18 | def detect(pred_cls, pred_loc, anchors, variance, conf_thresh, nms_thresh, top_n): 19 | scores = torch.sigmoid(pred_cls) 20 | bboxes = decode(pred_loc, anchors[None], variance) 21 | indexes_img, indexes_anchor, indexes_cls = torch.where(scores > conf_thresh) 22 | 23 | bboxes = bboxes[indexes_img, indexes_anchor] 24 | scores = scores[indexes_img, indexes_anchor, indexes_cls] 25 | labels = indexes_cls 26 | 27 | start = 0 28 | dets = [None] * pred_cls.size(0) 29 | for image_id, n in sorted(Counter(indexes_img.tolist()).items()): 30 | bboxes_ = bboxes[start: start + n] 31 | scores_ = scores[start: start + n] 32 | labels_ = labels[start: start + n] 33 | keeps = nms(bboxes_, scores_, labels_, nms_thresh)[:top_n] 34 | dets[image_id] = [bboxes_[keeps], scores_[keeps], labels_[keeps]] 35 | start += n 36 | 37 | return dets 38 | -------------------------------------------------------------------------------- /model/rdd/utils/loss.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : loss.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 10:59 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import torch 12 | 13 | from torch import nn 14 | from collections import OrderedDict 15 | from torch.nn.functional import one_hot 16 | from utils.box.bbox import bbox_switch, bbox_iou, encode 17 | 18 | 19 | def match(bboxes, anchors, iou_thresh, batch=16): 20 | # Reduce GPU memory usage 21 | ious = torch.cat([bbox_iou(bboxes[i: i + batch], anchors) for i in range(0, bboxes.size(0), batch)]) 22 | max_ious, bbox_indexes = torch.max(ious, dim=0) 23 | mask_neg = max_ious < iou_thresh[0] 24 | mask_pos = max_ious > iou_thresh[1] 25 | return mask_pos, mask_neg, bbox_indexes 26 | 27 | 28 | def calc_loss_v1(pred_cls, pred_loc, targets, anchors, iou_thresh, variance, balance): 29 | device = pred_cls.device 30 | num_classes = pred_cls.size(-1) 31 | weight_pos, weight_neg = 2 * balance, 2 * (1 - balance) 32 | anchors_xyxy = bbox_switch(anchors, 'xywh', 'xyxy') 33 | 34 | criterion_cls = nn.BCEWithLogitsLoss(reduction='none') 35 | criterion_loc = nn.SmoothL1Loss(reduction='sum') 36 | loss_cls, loss_loc = torch.zeros([2], dtype=torch.float, device=device, requires_grad=True) 37 | num_pos = 0 38 | for i, target in enumerate(targets): 39 | if target: 40 | bboxes = target['bboxes'].to(device) 41 | labels = target['labels'].to(device) 42 | bboxes_xyxy = bbox_switch(bboxes[:, :4], 'xywh', 'xyxy') 43 | mask_pos, mask_neg, bbox_indexes = match(bboxes_xyxy, anchors_xyxy, iou_thresh) 44 | 45 | labels = labels[bbox_indexes] 46 | indexes_pos = bbox_indexes[mask_pos] 47 | bboxes_matched = bboxes[indexes_pos] 48 | anchors_matched = anchors[mask_pos] 49 | bboxes_pred = pred_loc[i][mask_pos] 50 | gt_bboxes, det_bboxes = encode(bboxes_matched, bboxes_pred, anchors_matched, variance) 51 | 52 | labels = one_hot(labels, num_classes=num_classes).float() 53 | labels[mask_neg] = 0 54 | loss_cls_ = criterion_cls(pred_cls[i], labels) 55 | loss_cls = loss_cls + loss_cls_[mask_pos].sum() * weight_pos + loss_cls_[mask_neg].sum() * weight_neg 56 | loss_loc = loss_loc + criterion_loc(gt_bboxes, det_bboxes) 57 | num_pos += mask_pos.sum().item() 58 | else: 59 | loss_cls = loss_cls + criterion_cls(pred_cls[i], torch.zeros_like(pred_cls[i])).sum() 60 | num_pos = max(num_pos, 1) 61 | return OrderedDict([('loss_cls', loss_cls / num_pos), ('loss_loc', loss_loc / num_pos)]) 62 | 63 | 64 | def calc_loss_v2(pred_cls, pred_loc, targets, anchors, iou_thresh, variance, balance): 65 | # Calculate the loss centrally, has only a small acceleration effect 66 | device = pred_cls.device 67 | num_classes = pred_cls.size(-1) 68 | weight_pos, weight_neg = 2 * balance, 2 * (1 - balance) 69 | criterion_cls = nn.BCEWithLogitsLoss(reduction='none') 70 | criterion_loc = nn.SmoothL1Loss(reduction='sum') 71 | 72 | num_bboxes = [target['bboxes'].size(0) if target else 0 for target in targets] 73 | bboxes = [target['bboxes'] for target in targets if target] 74 | labels = [target['labels'] for target in targets if target] 75 | if len(bboxes) > 0: 76 | bboxes = torch.cat(bboxes).to(device) 77 | labels = torch.cat(labels).to(device) 78 | else: 79 | loss_cls = criterion_cls(pred_cls, torch.zeros_like(pred_cls)).sum() 80 | return OrderedDict([('loss_cls', loss_cls), ('loss_loc', torch.tensor(0., requires_grad=True))]) 81 | 82 | # Reduce GPU memory usage 83 | batch = 16 84 | iou = torch.cat([bbox_iou(bboxes[i: i + batch, :4], anchors, 'xywh') for i in range(0, bboxes.size(0), batch)]) 85 | start = 0 86 | max_iou_merged, bbox_indexes_merged = [], [] 87 | for i, num in enumerate(num_bboxes): 88 | if num == 0: 89 | max_iou = torch.zeros_like(pred_cls[i, :, 0]) 90 | bbox_indexes = torch.zeros_like(pred_cls[i, :, 0], dtype=torch.long) 91 | else: 92 | max_iou, bbox_indexes = torch.max(iou[start: start + num], dim=0) # a 93 | max_iou_merged.append(max_iou) 94 | bbox_indexes_merged.append(bbox_indexes + start) 95 | start += num 96 | max_iou_merged = torch.stack(max_iou_merged) 97 | bbox_indexes_merged = torch.stack(bbox_indexes_merged) 98 | masks_pos = max_iou_merged > iou_thresh[1] 99 | masks_neg = max_iou_merged < iou_thresh[0] 100 | labels_matched = labels[bbox_indexes_merged] 101 | labels_matched = one_hot(labels_matched, num_classes=num_classes) 102 | labels_matched[masks_neg] = 0 103 | bboxes_matched = bboxes[bbox_indexes_merged[masks_pos]] 104 | anchors_matched = anchors[None].repeat(len(targets), 1, 1)[masks_pos] 105 | loss_cls = criterion_cls(pred_cls, labels_matched.float()) 106 | loss_cls = loss_cls[masks_pos].sum() * weight_pos + loss_cls[masks_neg].sum() * weight_neg 107 | gt_bboxes, det_bboxes = encode(bboxes_matched, pred_loc[masks_pos], anchors_matched, variance) 108 | loss_loc = criterion_loc(det_bboxes, gt_bboxes) 109 | num_pos = max(masks_pos.sum().item(), 1) 110 | return OrderedDict([('loss_cls', loss_cls / num_pos), ('loss_loc', loss_loc / num_pos)]) 111 | 112 | 113 | calc_loss = calc_loss_v1 114 | -------------------------------------------------------------------------------- /model/rdd/utils/modules.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : modules.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 11:03 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import torch 12 | 13 | from torch import nn 14 | from xtorch import xnn 15 | 16 | 17 | class FeaturePyramidNet(xnn.Module): 18 | def __init__(self, depth, plane): 19 | super(FeaturePyramidNet, self).__init__() 20 | self.link = nn.ModuleList() 21 | self.fuse = nn.ModuleList() 22 | for i in range(depth): 23 | self.link.append(nn.Sequential(xnn.Conv2d(plane, 1, 1, 0, bias=False), 24 | xnn.BatchNorm2d())) 25 | if i != depth: 26 | self.fuse.append(nn.Sequential(nn.ReLU(inplace=True), 27 | xnn.Conv2d(plane, 3, 1, 1, bias=False), 28 | xnn.BatchNorm2d())) 29 | 30 | def forward(self, features): 31 | features = [self.link[i](feature) for i, feature in enumerate(features)] 32 | for i in range(len(features))[::-1]: 33 | if i != len(features) - 1: 34 | features[i] = self.fuse[i](features[i] + nn.Upsample(scale_factor=2)(features[i + 1])) 35 | features = [nn.ReLU(inplace=True)(feature) for feature in features] 36 | return features 37 | 38 | 39 | class PredictHead(xnn.Module): 40 | def __init__(self, plane, num_anchors, num_classes): 41 | super(PredictHead, self).__init__() 42 | self.num_classes = num_classes 43 | self.body = nn.Sequential(xnn.Conv2d(plane, 3, 1, 1, bias=False), 44 | xnn.BatchNorm2d(), 45 | nn.ReLU(inplace=True), 46 | xnn.Conv2d(num_anchors * num_classes, 3, 1, 1)) 47 | 48 | def forward(self, x): 49 | x = self.body(x) 50 | return x.permute(0, 2, 3, 1).reshape(x.size(0), -1, self.num_classes) 51 | 52 | 53 | class DetPredict(xnn.Module): 54 | def __init__(self, depth, plane, num_anchors, num_classes, num_loc_params): 55 | super(DetPredict, self).__init__() 56 | self.heads_cls = nn.ModuleList() 57 | self.heads_loc = nn.ModuleList() 58 | for i in range(depth): 59 | self.heads_cls.append(PredictHead(plane, num_anchors[i], num_classes)) 60 | self.heads_loc.append(PredictHead(plane, num_anchors[i], num_loc_params)) 61 | 62 | def forward(self, features): 63 | predict_cls, predict_loc = [], [] 64 | for i, feature in enumerate(features): 65 | predict_cls.append(self.heads_cls[i](feature)) 66 | predict_loc.append(self.heads_loc[i](feature)) 67 | predict_cls = torch.cat(predict_cls, dim=1) 68 | predict_loc = torch.cat(predict_loc, dim=1) 69 | return predict_cls, predict_loc 70 | -------------------------------------------------------------------------------- /model/rdd/utils/priorbox.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : priorbox.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 11:03 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import torch 12 | 13 | from collections import OrderedDict 14 | 15 | from utils.misc import LFUCache 16 | 17 | 18 | class PriorBox: 19 | def __init__(self, cfg): 20 | self.cfg = cfg 21 | self.prior_boxes = OrderedDict() 22 | 23 | for stride, size, aspects, scales in zip(cfg['strides'], cfg['sizes'], cfg['aspects'], cfg['scales']): 24 | self.prior_boxes[stride] = self._get_prior_box(stride, size, aspects, scales, cfg.get('old_version', False)) 25 | 26 | @staticmethod 27 | def _get_prior_box(stride, size, aspects, scales, old_version=False): 28 | boxes = [] 29 | if old_version: 30 | # To be compatible with previous weights 31 | pair = [[aspect, scale] for scale in scales for aspect in aspects] 32 | else: 33 | pair = [[aspect, scale] for aspect in aspects for scale in scales] 34 | for aspect, scale in pair: 35 | length = stride * size * scale 36 | if aspect == 1: 37 | boxes.append([length, length]) 38 | else: 39 | boxes.append([length * aspect ** 0.5, length / aspect ** 0.5]) 40 | boxes.append([length / aspect ** 0.5, length * aspect ** 0.5]) 41 | return boxes 42 | 43 | @staticmethod 44 | def _get_anchors(img_size, prior_boxes): 45 | h, w = img_size 46 | anchors = [] 47 | for stride, prior_box in prior_boxes: 48 | assert w % stride == 0 and h % stride == 0 49 | fmw, fmh = w // stride, h // stride 50 | prior_box = torch.tensor(prior_box, dtype=torch.float) 51 | offset_y, offset_x = torch.meshgrid([torch.arange(fmh), torch.arange(fmw)]) 52 | offset_x = offset_x.to(prior_box) + 0.5 53 | offset_y = offset_y.to(prior_box) + 0.5 54 | offset = torch.stack([offset_x, offset_y], dim=-1) * stride 55 | offset = offset[:, :, None, :].repeat(1, 1, prior_box.size(0), 1) 56 | prior_box = prior_box[None, None, :, :].repeat(fmh, fmw, 1, 1) 57 | anchors.append(torch.cat([offset, prior_box], dim=-1).reshape(-1, 4)) 58 | anchors = torch.cat(anchors) 59 | return anchors 60 | 61 | def get_anchors(self, img_size): 62 | return self._get_anchors(img_size, self.prior_boxes.items()) 63 | 64 | 65 | class LFUPriorBox: 66 | def __init__(self, prior_box_cfg, capacity=3): 67 | self.prior_box = PriorBox(prior_box_cfg) 68 | self.num_levels = len(self.prior_box.prior_boxes) 69 | self.num_prior_boxes = [len(prior_boxes) for prior_boxes in self.prior_box.prior_boxes.values()] 70 | self.lfu_cache = LFUCache(capacity) 71 | 72 | def get_anchors(self, img_size): 73 | name = 'anchors-%d-%d' % tuple(img_size) 74 | anchors = self.lfu_cache.get(name, None) 75 | if anchors is None: 76 | anchors = self.prior_box.get_anchors(img_size) 77 | self.lfu_cache.put(name, anchors) 78 | return anchors 79 | -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- 1 | tqdm 2 | numpy 3 | pillow 4 | cython 5 | beautifulsoup4 6 | opnecv-python 7 | pytorch>=1.2 8 | torchvision>=0.4 9 | tensorboard>=2.2 -------------------------------------------------------------------------------- /run/dota/evaluate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : evaluate.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 11:10 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import sys 12 | 13 | sys.path.append('.') 14 | 15 | import os 16 | import tqdm 17 | import torch 18 | import numpy as np 19 | 20 | from collections import defaultdict 21 | from torch.utils.data import DataLoader 22 | 23 | from data.aug import ops 24 | from data.aug.compose import Compose 25 | from data.dataset import DOTA 26 | 27 | from model.rdd import RDD 28 | from model.backbone import resnet 29 | 30 | from utils.box.bbox_np import xywha2xy4, xy42xywha 31 | from utils.box.rbbox_np import rbbox_batched_nms 32 | from utils.parallel import CustomDetDataParallel 33 | 34 | 35 | @torch.no_grad() 36 | def main(): 37 | global checkpoint 38 | if checkpoint is None: 39 | dir_weight = os.path.join(dir_save, 'weight') 40 | indexes = [int(os.path.splitext(path)[0]) for path in os.listdir(dir_weight)] 41 | current_step = max(indexes) 42 | checkpoint = os.path.join(dir_weight, '%d.pth' % current_step) 43 | 44 | batch_size = 32 45 | num_workers = 4 46 | 47 | image_size = 768 48 | aug = Compose([ops.PadSquare(), ops.Resize(image_size)]) 49 | dataset = DOTA(dir_dataset, image_set, aug) 50 | loader = DataLoader(dataset, batch_size, num_workers=num_workers, pin_memory=True, collate_fn=dataset.collate) 51 | num_classes = len(dataset.names) 52 | 53 | prior_box = { 54 | 'strides': [8, 16, 32, 64, 128], 55 | 'sizes': [3] * 5, 56 | 'aspects': [[1, 2, 4, 8]] * 5, 57 | 'scales': [[2 ** 0, 2 ** (1 / 3), 2 ** (2 / 3)]] * 5, 58 | 'old_version': old_version 59 | } 60 | conf_thresh = 0.01 61 | nms_thresh = 0.45 62 | cfg = { 63 | 'prior_box': prior_box, 64 | 'num_classes': num_classes, 65 | 'extra': 2, 66 | 'conf_thresh': conf_thresh, 67 | 'nms_thresh': nms_thresh, 68 | } 69 | 70 | model = RDD(backbone(fetch_feature=True), cfg) 71 | model.build_pipe(shape=[2, 3, image_size, image_size]) 72 | model.restore(checkpoint) 73 | if len(device_ids) > 1: 74 | model = CustomDetDataParallel(model, device_ids) 75 | model.cuda() 76 | model.eval() 77 | 78 | ret_raw = defaultdict(list) 79 | for images, targets, infos in tqdm.tqdm(loader): 80 | images = images.cuda() / 255 81 | dets = model(images) 82 | for (det, info) in zip(dets, infos): 83 | if det: 84 | bboxes, scores, labels = det 85 | bboxes = bboxes.cpu().numpy() 86 | scores = scores.cpu().numpy() 87 | labels = labels.cpu().numpy() 88 | fname, x, y, w, h = os.path.splitext(os.path.basename(info['img_path']))[0].split('-')[:5] 89 | x, y, w, h = int(x), int(y), int(w), int(h) 90 | long_edge = max(w, h) 91 | pad_x, pad_y = (long_edge - w) // 2, (long_edge - h) // 2 92 | bboxes = np.stack([xywha2xy4(bbox) for bbox in bboxes]) 93 | bboxes *= long_edge / image_size 94 | bboxes -= [pad_x, pad_y] 95 | bboxes += [x, y] 96 | bboxes = np.stack([xy42xywha(bbox) for bbox in bboxes]) 97 | ret_raw[fname].append([bboxes, scores, labels]) 98 | 99 | print('merging results...') 100 | ret = [] 101 | 102 | for fname, dets in ret_raw.items(): 103 | bboxes, scores, labels = zip(*dets) 104 | bboxes = np.concatenate(list(bboxes)) 105 | scores = np.concatenate(list(scores)) 106 | labels = np.concatenate(list(labels)) 107 | keeps = rbbox_batched_nms(bboxes, scores, labels, nms_thresh) 108 | ret.append([fname, [bboxes[keeps], scores[keeps], labels[keeps]]]) 109 | 110 | print('converting to submission format...') 111 | ret_save = defaultdict(list) 112 | for fname, (bboxes, scores, labels) in ret: 113 | for bbox, score, label in zip(bboxes, scores, labels): 114 | bbox = xywha2xy4(bbox).ravel() 115 | line = '%s %.12f %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f' % (fname, score, *bbox) 116 | ret_save[dataset.label2name[label]].append(line) 117 | 118 | print('saving...') 119 | os.makedirs(os.path.join(dir_save, 'submission'), exist_ok=True) 120 | for name, dets in ret_save.items(): 121 | with open(os.path.join(dir_save, 'submission', 'Task%d_%s.txt' % (1, name)), 'wt') as f: 122 | f.write('\n'.join(dets)) 123 | 124 | print('finished') 125 | 126 | 127 | if __name__ == '__main__': 128 | 129 | device_ids = [0] 130 | torch.cuda.set_device(device_ids[0]) 131 | 132 | dir_dataset = '' 133 | dir_save = '' 134 | 135 | backbone = resnet.resnet101 136 | checkpoint = None 137 | old_version = False # set True when using the original weights 138 | image_set = 'test' # test-768 139 | 140 | main() 141 | -------------------------------------------------------------------------------- /run/dota/prepare.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : prepare.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 11:10 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import sys 12 | 13 | sys.path.append('.') 14 | 15 | import os 16 | import json 17 | import cv2 as cv 18 | import numpy as np 19 | 20 | from utils.crop_image import Cropper 21 | 22 | 23 | def txt2json(dir_txt, dir_json): 24 | os.makedirs(dir_json, exist_ok=True) 25 | for file in os.listdir(dir_txt): 26 | objs = [] 27 | for i, line in enumerate(open(os.path.join(dir_txt, file)).readlines()): 28 | line = line.strip() 29 | line_split = line.split(' ') 30 | if len(line_split) == 10: 31 | obj = dict() 32 | coord = np.array(line_split[:8], dtype=np.float32).reshape([4, 2]) 33 | bbox = cv.boxPoints(cv.minAreaRect(coord)).astype(np.int).tolist() 34 | obj['name'] = line_split[8].lower() 35 | obj['bbox'] = bbox 36 | objs.append(obj) 37 | else: 38 | print(' %s' % line) 39 | if objs: 40 | json.dump(objs, open(os.path.join(dir_json, file.replace('txt', 'json')), 'wt'), indent=2) 41 | 42 | 43 | def main(image_set, single_scale=False): 44 | # (1) 45 | if image_set != 'test': 46 | dir_txt = os.path.join(dir_dataset, 'labelTxt', image_set) 47 | out_dir_json = os.path.join(dir_dataset, 'annotations', image_set) 48 | txt2json(dir_txt, out_dir_json) 49 | 50 | # (2) 51 | pairs = [] 52 | for filename in os.listdir(os.path.join(dir_dataset, 'images', image_set)): 53 | anno = os.path.join(dir_dataset, 'annotations', image_set, filename.replace('png', 'json')) 54 | img = os.path.join(dir_dataset, 'images', image_set, filename) 55 | if not os.path.exists(anno): 56 | anno = None 57 | pairs.append([img, anno]) 58 | 59 | overlap = 0.25 60 | sizes = [768] if single_scale else [512, 768, 1024, 1536] 61 | save_empty = image_set == 'test' 62 | image_set = f'{image_set}-{sizes[0]}' if single_scale else image_set 63 | 64 | out_dir_images = os.path.join(dir_dataset, 'images', f'{image_set}-crop') 65 | out_dir_annos = os.path.join(dir_dataset, 'annotations', f'{image_set}-crop') 66 | 67 | cropper = Cropper(sizes, overlap) 68 | cropper.crop_batch(pairs, out_dir_images, out_dir_annos, save_empty) 69 | 70 | # (3) 71 | pairs = [] 72 | for filename in os.listdir(out_dir_images): 73 | img = os.path.join('images', f'{image_set}-crop', filename) 74 | anno = None if image_set == 'test' else os.path.join('annotations', f'{image_set}-crop', filename.replace('jpg', 'json')) 75 | pairs.append([img, anno]) 76 | out_dir = os.path.join(dir_dataset, 'image-sets') 77 | os.makedirs(out_dir, exist_ok=True) 78 | json.dump(pairs, open(os.path.join(out_dir, f'{image_set}.json'), 'wt'), indent=2) 79 | 80 | 81 | if __name__ == '__main__': 82 | 83 | # directory hierarchy 84 | 85 | # root/images/train/P0000.png 86 | # -----------/train/... 87 | # -----------/val/... 88 | # -----------/test/... 89 | 90 | # root/labelTxt/train/P0000.txt 91 | # -------------/train/... 92 | # -------------/val/... 93 | 94 | # (1) convert annotation files 95 | # (2) crop images 96 | # (3) generate image-set files 97 | 98 | dir_dataset = '' 99 | 100 | main('train') 101 | main('val') 102 | main('test') 103 | main('test', True) 104 | -------------------------------------------------------------------------------- /run/dota/train-dist.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : train.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2021/03/20 16:00 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | 12 | def main(batch_size, rank, world_size): 13 | 14 | import os 15 | import tqdm 16 | import torch 17 | import tempfile 18 | 19 | from torch import optim 20 | from torch import distributed as dist 21 | from torch.nn import SyncBatchNorm 22 | from torch.utils.data import DataLoader 23 | from torch.utils.tensorboard import SummaryWriter 24 | 25 | from data.aug.compose import Compose 26 | from data.aug import ops 27 | from data.dataset import DOTA 28 | 29 | from model.rdd import RDD 30 | from model.backbone import resnet 31 | 32 | from utils.adjust_lr import adjust_lr_multi_step 33 | 34 | torch.manual_seed(0) 35 | torch.backends.cudnn.benchmark = True 36 | torch.cuda.set_device(rank) 37 | dist.init_process_group("nccl", init_method='env://', rank=rank, world_size=world_size) 38 | 39 | backbone = resnet.resnet101 40 | 41 | dir_dataset = '' 42 | dir_save = '' 43 | 44 | dir_weight = os.path.join(dir_save, 'weight') 45 | dir_log = os.path.join(dir_save, 'log') 46 | os.makedirs(dir_weight, exist_ok=True) 47 | if rank == 0: 48 | writer = SummaryWriter(dir_log) 49 | 50 | indexes = [int(os.path.splitext(path)[0]) for path in os.listdir(dir_weight)] 51 | current_step = max(indexes) if indexes else 0 52 | 53 | image_size = 768 54 | lr = 1e-3 55 | batch_size //= world_size 56 | num_workers = 4 57 | 58 | max_step = 250000 59 | lr_cfg = [[100000, lr], [200000, lr / 10], [max_step, lr / 50]] 60 | warm_up = [1000, lr / 50, lr] 61 | save_interval = 1000 62 | 63 | aug = Compose([ 64 | ops.ToFloat(), 65 | ops.PhotometricDistort(), 66 | ops.RandomHFlip(), 67 | ops.RandomVFlip(), 68 | ops.RandomRotate90(), 69 | ops.ResizeJitter([0.8, 1.2]), 70 | ops.PadSquare(), 71 | ops.Resize(image_size), 72 | ops.BBoxFilter(24 * 24 * 0.4) 73 | ]) 74 | dataset = DOTA(dir_dataset, ['train', 'val'], aug) 75 | train_sampler = torch.utils.data.distributed.DistributedSampler(dataset, world_size, rank) 76 | batch_sampler = torch.utils.data.BatchSampler(train_sampler, batch_size, drop_last=True) 77 | loader = DataLoader(dataset, batch_sampler=batch_sampler, num_workers=num_workers, collate_fn=dataset.collate) 78 | num_classes = len(dataset.names) 79 | 80 | prior_box = { 81 | 'strides': [8, 16, 32, 64, 128], 82 | 'sizes': [3] * 5, 83 | 'aspects': [[1, 2, 4, 8]] * 5, 84 | 'scales': [[2 ** 0, 2 ** (1 / 3), 2 ** (2 / 3)]] * 5, 85 | } 86 | 87 | cfg = { 88 | 'prior_box': prior_box, 89 | 'num_classes': num_classes, 90 | 'extra': 2, 91 | } 92 | device = torch.device(f'cuda:{rank}') 93 | model = RDD(backbone(fetch_feature=True), cfg) 94 | model.build_pipe(shape=[2, 3, image_size, image_size]) 95 | model = SyncBatchNorm.convert_sync_batchnorm(model) 96 | model.to(device) 97 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank]) 98 | if current_step: 99 | model.module.load_state_dict(torch.load(os.path.join(dir_weight, '%d.pth' % current_step), map_location=device)) 100 | else: 101 | checkpoint = os.path.join(tempfile.gettempdir(), "initial-weights.pth") 102 | if rank == 0: 103 | model.module.init() 104 | torch.save(model.module.state_dict(), checkpoint) 105 | dist.barrier() 106 | if rank > 0: 107 | model.module.load_state_dict(torch.load(checkpoint, map_location=device)) 108 | dist.barrier() 109 | if rank == 0: 110 | os.remove(checkpoint) 111 | 112 | optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4) 113 | training = True 114 | while training and current_step < max_step: 115 | tqdm_loader = tqdm.tqdm(loader) if rank == 0 else loader 116 | for images, targets, infos in tqdm_loader: 117 | current_step += 1 118 | adjust_lr_multi_step(optimizer, current_step, lr_cfg, warm_up) 119 | 120 | images = images.cuda() / 255 121 | losses = model(images, targets) 122 | loss = sum(losses.values()) 123 | loss.backward() 124 | optimizer.step() 125 | optimizer.zero_grad() 126 | 127 | if rank == 0: 128 | for key, val in list(losses.items()): 129 | losses[key] = val.item() 130 | writer.add_scalar(key, val, global_step=current_step) 131 | writer.flush() 132 | tqdm_loader.set_postfix(losses) 133 | tqdm_loader.set_description(f'<{current_step}/{max_step}>') 134 | 135 | if current_step % save_interval == 0: 136 | save_path = os.path.join(dir_weight, '%d.pth' % current_step) 137 | state_dict = model.module.state_dict() 138 | torch.save(state_dict, save_path) 139 | cache_file = os.path.join(dir_weight, '%d.pth' % (current_step - save_interval)) 140 | if os.path.exists(cache_file): 141 | os.remove(cache_file) 142 | 143 | if current_step >= max_step: 144 | training = False 145 | if rank == 0: 146 | writer.close() 147 | break 148 | 149 | 150 | if __name__ == "__main__": 151 | 152 | import os 153 | import sys 154 | import argparse 155 | import multiprocessing 156 | 157 | sys.path.append('.') 158 | multiprocessing.set_start_method('spawn') 159 | 160 | parser = argparse.ArgumentParser() 161 | parser.add_argument('--batch_size', default=12, type=int) 162 | parser.add_argument('--device_ids', default='0,1', type=str) 163 | parser.add_argument('--local_rank', default=0, type=int) 164 | 165 | args = parser.parse_args() 166 | os.environ["CUDA_VISIBLE_DEVICES"] = args.device_ids 167 | device_ids = list(range(len(args.device_ids.split(',')))) 168 | 169 | processes = [] 170 | for device_id in device_ids: 171 | p = multiprocessing.Process(target=main, args=(args.batch_size, device_id, len(device_ids))) 172 | p.start() 173 | processes.append(p) 174 | for p in processes: 175 | p.join() 176 | 177 | # python -m torch.distributed.launch run/dota/train-dist.py --batch_size=12 --device_ids=0,1 178 | -------------------------------------------------------------------------------- /run/dota/train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : train.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 11:10 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import sys 12 | 13 | sys.path.append('.') 14 | 15 | import os 16 | import tqdm 17 | import torch 18 | 19 | from torch import optim 20 | from torch.utils.data import DataLoader 21 | from torch.utils.tensorboard import SummaryWriter 22 | 23 | from data.aug.compose import Compose 24 | from data.aug import ops 25 | from data.dataset import DOTA 26 | 27 | from model.rdd import RDD 28 | from model.backbone import resnet 29 | 30 | from utils.adjust_lr import adjust_lr_multi_step 31 | from utils.parallel import convert_model, CustomDetDataParallel 32 | 33 | 34 | def main(): 35 | dir_weight = os.path.join(dir_save, 'weight') 36 | dir_log = os.path.join(dir_save, 'log') 37 | os.makedirs(dir_weight, exist_ok=True) 38 | writer = SummaryWriter(dir_log) 39 | 40 | indexes = [int(os.path.splitext(path)[0]) for path in os.listdir(dir_weight)] 41 | current_step = max(indexes) if indexes else 0 42 | 43 | image_size = 768 44 | lr = 1e-3 45 | batch_size = 12 46 | num_workers = 4 47 | 48 | max_step = 250000 49 | lr_cfg = [[100000, lr], [200000, lr / 10], [max_step, lr / 50]] 50 | warm_up = [1000, lr / 50, lr] 51 | save_interval = 1000 52 | 53 | aug = Compose([ 54 | ops.ToFloat(), 55 | ops.PhotometricDistort(), 56 | ops.RandomHFlip(), 57 | ops.RandomVFlip(), 58 | ops.RandomRotate90(), 59 | ops.ResizeJitter([0.8, 1.2]), 60 | ops.PadSquare(), 61 | ops.Resize(image_size), 62 | ops.BBoxFilter(24 * 24 * 0.4) 63 | ]) 64 | dataset = DOTA(dir_dataset, ['train', 'val'], aug) 65 | loader = DataLoader(dataset, batch_size, shuffle=True, num_workers=num_workers, pin_memory=True, drop_last=True, 66 | collate_fn=dataset.collate) 67 | num_classes = len(dataset.names) 68 | 69 | prior_box = { 70 | 'strides': [8, 16, 32, 64, 128], 71 | 'sizes': [3] * 5, 72 | 'aspects': [[1, 2, 4, 8]] * 5, 73 | 'scales': [[2 ** 0, 2 ** (1 / 3), 2 ** (2 / 3)]] * 5, 74 | } 75 | 76 | cfg = { 77 | 'prior_box': prior_box, 78 | 'num_classes': num_classes, 79 | 'extra': 2, 80 | } 81 | 82 | model = RDD(backbone(fetch_feature=True), cfg) 83 | model.build_pipe(shape=[2, 3, image_size, image_size]) 84 | if current_step: 85 | model.restore(os.path.join(dir_weight, '%d.pth' % current_step)) 86 | else: 87 | model.init() 88 | if len(device_ids) > 1: 89 | model = convert_model(model) 90 | model = CustomDetDataParallel(model, device_ids) 91 | model.cuda() 92 | optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4) 93 | training = True 94 | while training and current_step < max_step: 95 | tqdm_loader = tqdm.tqdm(loader) 96 | for images, targets, infos in tqdm_loader: 97 | current_step += 1 98 | adjust_lr_multi_step(optimizer, current_step, lr_cfg, warm_up) 99 | 100 | images = images.cuda() / 255 101 | losses = model(images, targets) 102 | loss = sum(losses.values()) 103 | loss.backward() 104 | optimizer.step() 105 | optimizer.zero_grad() 106 | 107 | for key, val in list(losses.items()): 108 | losses[key] = val.item() 109 | writer.add_scalar(key, val, global_step=current_step) 110 | writer.flush() 111 | tqdm_loader.set_postfix(losses) 112 | tqdm_loader.set_description(f'<{current_step}/{max_step}>') 113 | 114 | if current_step % save_interval == 0: 115 | save_path = os.path.join(dir_weight, '%d.pth' % current_step) 116 | state_dict = model.state_dict() if len(device_ids) == 1 else model.module.state_dict() 117 | torch.save(state_dict, save_path) 118 | cache_file = os.path.join(dir_weight, '%d.pth' % (current_step - save_interval)) 119 | if os.path.exists(cache_file): 120 | os.remove(cache_file) 121 | 122 | if current_step >= max_step: 123 | training = False 124 | writer.close() 125 | break 126 | 127 | 128 | if __name__ == '__main__': 129 | 130 | torch.manual_seed(0) 131 | torch.backends.cudnn.benchmark = True 132 | 133 | device_ids = [0, 1] 134 | torch.cuda.set_device(device_ids[0]) 135 | backbone = resnet.resnet101 136 | 137 | dir_dataset = '' 138 | dir_save = '' 139 | 140 | main() 141 | -------------------------------------------------------------------------------- /run/hrsc2016/evaluate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : evaluate.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 11:10 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import sys 12 | 13 | sys.path.append('.') 14 | 15 | import os 16 | import tqdm 17 | import torch 18 | import cv2 as cv 19 | import numpy as np 20 | 21 | from torch.utils.data import DataLoader 22 | 23 | from data.aug import ops 24 | from data.dataset import HRSC2016 25 | 26 | from model.rdd import RDD 27 | from model.backbone import resnet 28 | 29 | from utils.box.bbox_np import xy42xywha, xywha2xy4 30 | from utils.box.metric import get_det_aps 31 | from utils.parallel import CustomDetDataParallel 32 | 33 | 34 | @torch.no_grad() 35 | def main(): 36 | global checkpoint 37 | if checkpoint is None: 38 | dir_weight = os.path.join(dir_save, 'weight') 39 | indexes = [int(os.path.splitext(path)[0]) for path in os.listdir(dir_weight)] 40 | current_step = max(indexes) 41 | checkpoint = os.path.join(dir_weight, '%d.pth' % current_step) 42 | 43 | image_size = 768 44 | batch_size = 32 45 | num_workers = 4 46 | 47 | aug = ops.Resize(image_size) 48 | dataset = HRSC2016(dir_dataset, 'test', aug) 49 | loader = DataLoader(dataset, batch_size, num_workers=num_workers, pin_memory=True, collate_fn=dataset.collate) 50 | num_classes = len(dataset.names) 51 | 52 | prior_box = { 53 | 'strides': [8, 16, 32, 64, 128], 54 | 'sizes': [3] * 5, 55 | 'aspects': [[1.5, 3, 5, 8]] * 5, 56 | 'scales': [[2 ** 0, 2 ** (1 / 3), 2 ** (2 / 3)]] * 5, 57 | 'old_version': old_version 58 | } 59 | conf_thresh = 0.01 60 | nms_thresh = 0.45 61 | cfg = { 62 | 'prior_box': prior_box, 63 | 'num_classes': num_classes, 64 | 'extra': 2, 65 | 'conf_thresh': conf_thresh, 66 | 'nms_thresh': nms_thresh, 67 | } 68 | 69 | model = RDD(backbone(fetch_feature=True), cfg) 70 | model.build_pipe(shape=[2, 3, image_size, image_size]) 71 | model.restore(checkpoint) 72 | if len(device_ids) > 1: 73 | model = CustomDetDataParallel(model, device_ids) 74 | model.cuda() 75 | model.eval() 76 | 77 | count = 0 78 | gt_list, det_list = [], [] 79 | for images, targets, infos in tqdm.tqdm(loader): 80 | images = images.cuda() / 255 81 | dets = model(images) 82 | for target, det, info in zip(targets, dets, infos): 83 | if target: 84 | bboxes = np.stack([xy42xywha(bbox) for bbox in info['objs']['bboxes']]) 85 | labels = info['objs']['labels'] 86 | gt_list.extend([count, bbox, 1, label] for bbox, label in zip(bboxes, labels)) 87 | if det: 88 | ih, iw = info['shape'][:2] 89 | bboxes, scores, labels = list(map(lambda x: x.cpu().numpy(), det)) 90 | bboxes = np.stack([xywha2xy4(bbox) for bbox in bboxes]) 91 | bboxes_ = bboxes * [iw / image_size, ih / image_size] 92 | # bboxes = np.stack([xy42xywha(bbox) for bbox in bboxes_]) 93 | bboxes = [] 94 | for bbox in bboxes_.astype(np.float32): 95 | (x, y), (w, h), a = cv.minAreaRect(bbox) 96 | bboxes.append([x, y, w, h, a]) 97 | bboxes = np.array(bboxes) 98 | det_list.extend([count, bbox, score, label] for bbox, score, label in zip(bboxes, scores, labels)) 99 | count += 1 100 | APs = get_det_aps(det_list, gt_list, num_classes, use_07_metric=use_07_metric) 101 | mAP = sum(APs) / len(APs) 102 | print('AP') 103 | for label in range(num_classes): 104 | print(f'{dataset.label2name[label]}: {APs[label]}') 105 | print(f'mAP: {mAP}') 106 | 107 | 108 | if __name__ == '__main__': 109 | 110 | device_ids = [0] 111 | torch.cuda.set_device(device_ids[0]) 112 | 113 | dir_dataset = '' 114 | dir_save = '' 115 | 116 | backbone = resnet.resnet101 117 | checkpoint = None 118 | use_07_metric = False 119 | old_version = False # set True when using the original weights 120 | 121 | main() 122 | -------------------------------------------------------------------------------- /run/hrsc2016/prepare.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : prepare.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 11:11 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import sys 12 | 13 | sys.path.append('.') 14 | 15 | import os 16 | import json 17 | import numpy as np 18 | 19 | from bs4 import BeautifulSoup as bs 20 | from utils.box.bbox_np import xywha2xy4 21 | 22 | 23 | def xml2json(dir_xml, dir_json): 24 | os.makedirs(dir_json, exist_ok=True) 25 | for xml in os.listdir(dir_xml): 26 | objs = [] 27 | name = os.path.splitext(xml)[0] 28 | for obj in bs(open(os.path.join(dir_xml, xml)), "html.parser").findAll('hrsc_object'): 29 | xywha = [] 30 | xywha.append(float(obj.select_one('mbox_cx').text)) 31 | xywha.append(float(obj.select_one('mbox_cy').text)) 32 | xywha.append(float(obj.select_one('mbox_w').text)) 33 | xywha.append(float(obj.select_one('mbox_h').text)) 34 | xywha.append(np.rad2deg(float(obj.select_one('mbox_ang').text))) 35 | obj = dict() 36 | obj['name'] = 'ship' 37 | obj['bbox'] = xywha2xy4(xywha).tolist() 38 | objs.append(obj) 39 | if objs: 40 | json.dump(objs, open(os.path.join(dir_json, name + '.json'), 'wt'), indent=2) 41 | 42 | 43 | def main(): 44 | # (1) 45 | dir_xml = os.path.join(dir_dataset, 'labelXml') 46 | out_dir_json = os.path.join(dir_dataset, 'annotations') 47 | xml2json(dir_xml, out_dir_json) 48 | 49 | # (2) 50 | out_dir = os.path.join(dir_dataset, 'image-sets') 51 | os.makedirs(out_dir, exist_ok=True) 52 | for image_set in ['trainval', 'test']: 53 | pairs = [] 54 | with open(os.path.join(dir_dataset, 'ImageSets', f'{image_set}.txt')) as f: 55 | for line in f.readlines(): 56 | line = line.strip() 57 | img = os.path.join('images', f'{line}.bmp') 58 | anno = os.path.join('annotations', f'{line}.json') 59 | if not os.path.exists(os.path.join(dir_dataset, anno)): 60 | anno = None 61 | pairs.append([img, anno]) 62 | json.dump(pairs, open(os.path.join(out_dir, f'{image_set}.json'), 'wt'), indent=2) 63 | 64 | 65 | if __name__ == '__main__': 66 | 67 | # directory hierarchy 68 | 69 | # root/images/100000001.bmp 70 | # ----/images/... 71 | 72 | # root/labelXml/100000001.xml 73 | # ----/labelXml/... 74 | 75 | # (1) convert annotation files 76 | # (2) generate image-set files 77 | 78 | dir_dataset = '' 79 | 80 | main() 81 | -------------------------------------------------------------------------------- /run/hrsc2016/train-dist.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : train.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2021/03/20 16:00 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | 12 | def main(batch_size, rank, world_size): 13 | 14 | import os 15 | import tqdm 16 | import torch 17 | import tempfile 18 | 19 | from torch import optim 20 | from torch import distributed as dist 21 | from torch.nn import SyncBatchNorm 22 | from torch.utils.data import DataLoader 23 | from torch.utils.tensorboard import SummaryWriter 24 | 25 | from data.aug.compose import Compose 26 | from data.aug import ops 27 | from data.dataset import HRSC2016 28 | 29 | from model.rdd import RDD 30 | from model.backbone import resnet 31 | 32 | from utils.adjust_lr import adjust_lr_multi_step 33 | 34 | torch.manual_seed(0) 35 | torch.backends.cudnn.benchmark = True 36 | torch.cuda.set_device(rank) 37 | dist.init_process_group("nccl", init_method='env://', rank=rank, world_size=world_size) 38 | 39 | backbone = resnet.resnet101 40 | 41 | dir_dataset = '' 42 | dir_save = '' 43 | 44 | dir_weight = os.path.join(dir_save, 'weight') 45 | dir_log = os.path.join(dir_save, 'log') 46 | os.makedirs(dir_weight, exist_ok=True) 47 | if rank == 0: 48 | writer = SummaryWriter(dir_log) 49 | 50 | indexes = [int(os.path.splitext(path)[0]) for path in os.listdir(dir_weight)] 51 | current_step = max(indexes) if indexes else 0 52 | 53 | image_size = 768 54 | lr = 1e-3 55 | batch_size //= world_size 56 | num_workers = 4 57 | 58 | max_step = 12000 59 | lr_cfg = [[7500, lr], [max_step, lr / 10]] 60 | warm_up = [500, lr / 50, lr] 61 | save_interval = 1000 62 | 63 | aug = Compose([ 64 | ops.ToFloat(), 65 | ops.PhotometricDistort(), 66 | ops.RandomHFlip(), 67 | ops.RandomVFlip(), 68 | ops.RandomRotate90(), 69 | ops.ResizeJitter([0.8, 1.2]), 70 | ops.PadSquare(), 71 | ops.Resize(image_size), 72 | ]) 73 | dataset = HRSC2016(dir_dataset, ['trainval'], aug) 74 | train_sampler = torch.utils.data.distributed.DistributedSampler(dataset, world_size, rank) 75 | batch_sampler = torch.utils.data.BatchSampler(train_sampler, batch_size, drop_last=True) 76 | loader = DataLoader(dataset, batch_sampler=batch_sampler, num_workers=num_workers, collate_fn=dataset.collate) 77 | num_classes = len(dataset.names) 78 | 79 | prior_box = { 80 | 'strides': [8, 16, 32, 64, 128], 81 | 'sizes': [3] * 5, 82 | 'aspects': [[1.5, 3, 5, 8]] * 5, 83 | 'scales': [[2 ** 0, 2 ** (1 / 3), 2 ** (2 / 3)]] * 5, 84 | } 85 | 86 | cfg = { 87 | 'prior_box': prior_box, 88 | 'num_classes': num_classes, 89 | 'extra': 2, 90 | } 91 | device = torch.device(f'cuda:{rank}') 92 | model = RDD(backbone(fetch_feature=True), cfg) 93 | model.build_pipe(shape=[2, 3, image_size, image_size]) 94 | model = SyncBatchNorm.convert_sync_batchnorm(model) 95 | model.to(device) 96 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank]) 97 | if current_step: 98 | model.module.load_state_dict(torch.load(os.path.join(dir_weight, '%d.pth' % current_step), map_location=device)) 99 | else: 100 | checkpoint = os.path.join(tempfile.gettempdir(), "initial-weights.pth") 101 | if rank == 0: 102 | model.module.init() 103 | torch.save(model.module.state_dict(), checkpoint) 104 | dist.barrier() 105 | if rank > 0: 106 | model.module.load_state_dict(torch.load(checkpoint, map_location=device)) 107 | dist.barrier() 108 | if rank == 0: 109 | os.remove(checkpoint) 110 | 111 | optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4) 112 | training = True 113 | while training and current_step < max_step: 114 | tqdm_loader = tqdm.tqdm(loader) if rank == 0 else loader 115 | for images, targets, infos in tqdm_loader: 116 | current_step += 1 117 | adjust_lr_multi_step(optimizer, current_step, lr_cfg, warm_up) 118 | 119 | images = images.cuda() / 255 120 | losses = model(images, targets) 121 | loss = sum(losses.values()) 122 | loss.backward() 123 | optimizer.step() 124 | optimizer.zero_grad() 125 | 126 | if rank == 0: 127 | for key, val in list(losses.items()): 128 | losses[key] = val.item() 129 | writer.add_scalar(key, val, global_step=current_step) 130 | writer.flush() 131 | tqdm_loader.set_postfix(losses) 132 | tqdm_loader.set_description(f'<{current_step}/{max_step}>') 133 | 134 | if current_step % save_interval == 0: 135 | save_path = os.path.join(dir_weight, '%d.pth' % current_step) 136 | state_dict = model.module.state_dict() 137 | torch.save(state_dict, save_path) 138 | cache_file = os.path.join(dir_weight, '%d.pth' % (current_step - save_interval)) 139 | if os.path.exists(cache_file): 140 | os.remove(cache_file) 141 | 142 | if current_step >= max_step: 143 | training = False 144 | if rank == 0: 145 | writer.close() 146 | break 147 | 148 | 149 | if __name__ == "__main__": 150 | 151 | import os 152 | import sys 153 | import argparse 154 | import multiprocessing 155 | 156 | sys.path.append('.') 157 | multiprocessing.set_start_method('spawn') 158 | 159 | parser = argparse.ArgumentParser() 160 | parser.add_argument('--batch_size', default=12, type=int) 161 | parser.add_argument('--device_ids', default='0,1', type=str) 162 | parser.add_argument('--local_rank', default=0, type=int) 163 | 164 | args = parser.parse_args() 165 | os.environ["CUDA_VISIBLE_DEVICES"] = args.device_ids 166 | device_ids = list(range(len(args.device_ids.split(',')))) 167 | 168 | processes = [] 169 | for device_id in device_ids: 170 | p = multiprocessing.Process(target=main, args=(args.batch_size, device_id, len(device_ids))) 171 | p.start() 172 | processes.append(p) 173 | for p in processes: 174 | p.join() 175 | 176 | # python -m torch.distributed.launch run/hrsc2016/train-dist.py --batch_size=12 --device_ids=0,1 177 | -------------------------------------------------------------------------------- /run/hrsc2016/train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : train.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 11:11 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import sys 12 | 13 | sys.path.append('.') 14 | 15 | import os 16 | import tqdm 17 | import torch 18 | 19 | from torch import optim 20 | from torch.utils.data import DataLoader 21 | from torch.utils.tensorboard import SummaryWriter 22 | 23 | from data.aug.compose import Compose 24 | from data.aug import ops 25 | from data.dataset import HRSC2016 26 | 27 | from model.rdd import RDD 28 | from model.backbone import resnet 29 | 30 | from utils.adjust_lr import adjust_lr_multi_step 31 | from utils.parallel import convert_model, CustomDetDataParallel 32 | 33 | 34 | def main(): 35 | dir_weight = os.path.join(dir_save, 'weight') 36 | dir_log = os.path.join(dir_save, 'log') 37 | os.makedirs(dir_weight, exist_ok=True) 38 | writer = SummaryWriter(dir_log) 39 | 40 | indexes = [int(os.path.splitext(path)[0]) for path in os.listdir(dir_weight)] 41 | current_step = max(indexes) if indexes else 0 42 | 43 | image_size = 768 44 | lr = 1e-3 45 | batch_size = 12 46 | num_workers = 4 47 | 48 | max_step = 12000 49 | lr_cfg = [[7500, lr], [max_step, lr / 10]] 50 | warm_up = [500, lr / 50, lr] 51 | save_interval = 1000 52 | 53 | aug = Compose([ 54 | ops.ToFloat(), 55 | ops.PhotometricDistort(), 56 | ops.RandomHFlip(), 57 | ops.RandomVFlip(), 58 | ops.RandomRotate90(), 59 | ops.ResizeJitter([0.8, 1.2]), 60 | ops.PadSquare(), 61 | ops.Resize(image_size), 62 | ]) 63 | dataset = HRSC2016(dir_dataset, ['trainval'], aug) 64 | loader = DataLoader(dataset, batch_size, shuffle=True, num_workers=num_workers, pin_memory=True, drop_last=True, 65 | collate_fn=dataset.collate) 66 | num_classes = len(dataset.names) 67 | 68 | prior_box = { 69 | 'strides': [8, 16, 32, 64, 128], 70 | 'sizes': [3] * 5, 71 | 'aspects': [[1.5, 3, 5, 8]] * 5, 72 | 'scales': [[2 ** 0, 2 ** (1 / 3), 2 ** (2 / 3)]] * 5, 73 | } 74 | 75 | cfg = { 76 | 'prior_box': prior_box, 77 | 'num_classes': num_classes, 78 | 'extra': 2, 79 | } 80 | 81 | model = RDD(backbone(fetch_feature=True), cfg) 82 | model.build_pipe(shape=[2, 3, image_size, image_size]) 83 | if current_step: 84 | model.restore(os.path.join(dir_weight, '%d.pth' % current_step)) 85 | else: 86 | model.init() 87 | if len(device_ids) > 1: 88 | model = convert_model(model) 89 | model = CustomDetDataParallel(model, device_ids) 90 | model = model.cuda() 91 | optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4) 92 | training = True 93 | 94 | while training and current_step < max_step: 95 | tqdm_loader = tqdm.tqdm(loader) 96 | for images, targets, infos in tqdm_loader: 97 | current_step += 1 98 | adjust_lr_multi_step(optimizer, current_step, lr_cfg, warm_up) 99 | 100 | images = images.cuda() / 255 101 | losses = model(images, targets) 102 | loss = sum(losses.values()) 103 | loss.backward() 104 | optimizer.step() 105 | optimizer.zero_grad() 106 | 107 | for key, val in list(losses.items()): 108 | losses[key] = val.item() 109 | writer.add_scalar(key, val, global_step=current_step) 110 | writer.flush() 111 | tqdm_loader.set_postfix(losses) 112 | tqdm_loader.set_description(f'<{current_step}/{max_step}>') 113 | 114 | if current_step % save_interval == 0: 115 | save_path = os.path.join(dir_weight, '%d.pth' % current_step) 116 | state_dict = model.state_dict() if len(device_ids) == 1 else model.module.state_dict() 117 | torch.save(state_dict, save_path) 118 | cache_file = os.path.join(dir_weight, '%d.pth' % (current_step - save_interval)) 119 | if os.path.exists(cache_file): 120 | os.remove(cache_file) 121 | 122 | if current_step >= max_step: 123 | training = False 124 | writer.close() 125 | break 126 | 127 | 128 | if __name__ == '__main__': 129 | 130 | torch.manual_seed(0) 131 | torch.backends.cudnn.benchmark = True 132 | 133 | device_ids = [0, 1] 134 | torch.cuda.set_device(device_ids[0]) 135 | backbone = resnet.resnet101 136 | 137 | dir_dataset = '' 138 | dir_save = '' 139 | 140 | main() 141 | -------------------------------------------------------------------------------- /run/ucas-aod/evaluate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : evaluate.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 11:11 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import sys 12 | 13 | sys.path.append('.') 14 | 15 | import os 16 | import tqdm 17 | import torch 18 | import cv2 as cv 19 | import numpy as np 20 | 21 | from torch.utils.data import DataLoader 22 | 23 | from data.aug import ops 24 | from data.dataset import UCAS_AOD 25 | 26 | from model.rdd import RDD 27 | from model.backbone import resnet 28 | 29 | from utils.box.bbox_np import xy42xywha, xywha2xy4 30 | from utils.box.metric import get_det_aps 31 | 32 | 33 | @torch.no_grad() 34 | def main(): 35 | global checkpoint 36 | if checkpoint is None: 37 | dir_weight = os.path.join(dir_save, 'weight') 38 | indexes = [int(os.path.splitext(path)[0]) for path in os.listdir(dir_weight)] 39 | current_step = max(indexes) 40 | checkpoint = os.path.join(dir_weight, '%d.pth' % current_step) 41 | 42 | image_size = 768 43 | batch_size = 1 44 | num_workers = 4 45 | 46 | aug = ops.ResizeBase(64) 47 | dataset = UCAS_AOD(dir_dataset, 'test', aug) 48 | loader = DataLoader(dataset, batch_size, num_workers=num_workers, pin_memory=True, collate_fn=dataset.collate) 49 | num_classes = len(dataset.names) 50 | 51 | prior_box = { 52 | 'strides': [8, 16, 32, 64], 53 | 'sizes': [3] * 4, 54 | 'aspects': [[1, 2]] * 4, 55 | 'scales': [[2 ** 0, 2 ** (1 / 3), 2 ** (2 / 3)]] * 4, 56 | 'old_version': old_version 57 | } 58 | conf_thresh = 0.01 59 | nms_thresh = 0.45 60 | cfg = { 61 | 'prior_box': prior_box, 62 | 'num_classes': num_classes, 63 | 'extra': 1, 64 | 'conf_thresh': conf_thresh, 65 | 'nms_thresh': nms_thresh, 66 | } 67 | 68 | model = RDD(backbone(fetch_feature=True), cfg) 69 | model.build_pipe(shape=[2, 3, image_size, image_size]) 70 | model.restore(checkpoint) 71 | model.cuda() 72 | model.eval() 73 | 74 | count = 0 75 | gt_list, det_list = [], [] 76 | for images, targets, infos in tqdm.tqdm(loader): 77 | images = images.cuda() / 255 78 | rh, rw = images.shape[2:] 79 | dets = model(images) 80 | for target, det, info in zip(targets, dets, infos): 81 | if target: 82 | bboxes = np.stack([xy42xywha(bbox) for bbox in info['objs']['bboxes']]) 83 | labels = info['objs']['labels'] 84 | gt_list.extend([count, bbox, 1, label] for bbox, label in zip(bboxes, labels)) 85 | if det: 86 | ih, iw = info['shape'][:2] 87 | bboxes, scores, labels = list(map(lambda x: x.cpu().numpy(), det)) 88 | bboxes = np.stack([xywha2xy4(bbox) for bbox in bboxes]) 89 | bboxes_ = bboxes * [iw / rw, ih / rh] 90 | # bboxes = np.stack([xy42xywha(bbox) for bbox in bboxes_]) 91 | bboxes = [] 92 | for bbox in bboxes_.astype(np.float32): 93 | (x, y), (w, h), a = cv.minAreaRect(bbox) 94 | bboxes.append([x, y, w, h, a]) 95 | bboxes = np.array(bboxes) 96 | det_list.extend([count, bbox, score, label] for bbox, score, label in zip(bboxes, scores, labels)) 97 | count += 1 98 | APs = get_det_aps(det_list, gt_list, num_classes) 99 | mAP = sum(APs) / len(APs) 100 | print('AP') 101 | for label in range(num_classes): 102 | print(f'{dataset.label2name[label]}: {APs[label]}') 103 | print(f'mAP: {mAP}') 104 | 105 | 106 | if __name__ == '__main__': 107 | 108 | device_id = 0 109 | torch.cuda.set_device(device_id) 110 | 111 | dir_dataset = '' 112 | dir_save = '' 113 | 114 | backbone = resnet.resnet101 115 | checkpoint = None 116 | old_version = False # set True when using the original weights 117 | 118 | main() 119 | -------------------------------------------------------------------------------- /run/ucas-aod/prepare.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : prepare.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 11:11 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import sys 12 | 13 | sys.path.append('.') 14 | 15 | import os 16 | import json 17 | import cv2 as cv 18 | import numpy as np 19 | 20 | from utils.crop_image import Cropper 21 | 22 | 23 | def txt2json(dir_txt, dir_json, category): 24 | os.makedirs(dir_json, exist_ok=True) 25 | for txt in os.listdir(dir_txt): 26 | objs = [] 27 | name = os.path.splitext(txt)[0] 28 | for line in open(os.path.join(dir_txt, txt)).readlines(): 29 | bbox = line.strip().split('\t')[:8] 30 | bbox = np.array(bbox, dtype=np.float32).reshape([4, 2]) 31 | bbox = cv.boxPoints(cv.minAreaRect(bbox)) 32 | bbox = bbox.tolist() 33 | obj = dict() 34 | obj['name'] = category 35 | obj['bbox'] = bbox 36 | objs.append(obj) 37 | if objs: 38 | json.dump(objs, open(os.path.join(dir_json, name + '.json'), 'wt'), indent=2) 39 | 40 | 41 | def main(): 42 | # (1) 43 | dir_txt = os.path.join(dir_dataset, 'labelTxt', 'car') 44 | dir_anno_car = os.path.join(dir_dataset, 'annotations', 'car') 45 | txt2json(dir_txt, dir_anno_car, 'car') 46 | 47 | dir_txt = os.path.join(dir_dataset, 'labelTxt', 'plane') 48 | dir_anno_plane = os.path.join(dir_dataset, 'annotations', 'plane') 49 | txt2json(dir_txt, dir_anno_plane, 'plane') 50 | 51 | # (2) 52 | dir_img_car = os.path.join(dir_dataset, 'images', 'car') 53 | dir_img_plane = os.path.join(dir_dataset, 'images', 'plane') 54 | num_car = len(os.listdir(dir_img_car)) 55 | num_plane = len(os.listdir(dir_img_plane)) 56 | num_test = 400 57 | indexes_test = np.linspace(1, num_car + num_plane, num_test, endpoint=False, dtype=np.int).tolist() 58 | 59 | size = 768 60 | overlap = 0 61 | save_empty = False 62 | 63 | cropper = Cropper(size, overlap) 64 | 65 | pair_train, pair_test = [], [] 66 | for category, dir_img, dir_anno in [['car', dir_img_car, dir_anno_car], ['plane', dir_img_plane, dir_anno_plane]]: 67 | pair_train_ = [] 68 | for filename in os.listdir(dir_img): 69 | index = int(filename[1: -4]) 70 | if index + (0 if category == 'car' else num_car) in indexes_test: 71 | img = os.path.join('images', category, filename) 72 | anno = os.path.join('annotations', category, filename.replace('png', 'json')) 73 | pair_test.append([img, anno]) 74 | else: 75 | img = os.path.join(dir_img, filename) 76 | anno = os.path.join(dir_anno, filename.replace('png', 'json')) 77 | pair_train_.append([img, anno]) 78 | 79 | out_dir_images = os.path.join(dir_dataset, 'images', f'{category}-crop') 80 | out_dir_annos = os.path.join(dir_dataset, 'annotations', f'{category}-crop') 81 | cropper.crop_batch(pair_train_, out_dir_images, out_dir_annos, save_empty) 82 | 83 | for filename in os.listdir(out_dir_images): 84 | img = os.path.join('images', f'{category}-crop', filename) 85 | anno = os.path.join('annotations', f'{category}-crop', filename.replace('jpg', 'json')) 86 | pair_train.append([img, anno]) 87 | 88 | # (3) 89 | out_dir = os.path.join(dir_dataset, 'image-sets') 90 | os.makedirs(out_dir, exist_ok=True) 91 | json.dump(pair_train, open(os.path.join(out_dir, 'train.json'), 'wt'), indent=2) 92 | json.dump(pair_test, open(os.path.join(out_dir, 'test.json'), 'wt'), indent=2) 93 | 94 | 95 | if __name__ == '__main__': 96 | 97 | # directory hierarchy 98 | 99 | # dir_dataset/images/car/P0001.png 100 | # -----------/car/... 101 | # -----------/plane/... 102 | 103 | # dir_dataset/labelTxt/car/P0001.txt 104 | # -------------/car/... 105 | # -------------/plane/... 106 | 107 | # (1) convert annotation files 108 | # (2) crop images 109 | # (3) generate image-set files 110 | 111 | dir_dataset = '' 112 | 113 | main() 114 | -------------------------------------------------------------------------------- /run/ucas-aod/train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : train.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 11:11 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import sys 12 | 13 | sys.path.append('.') 14 | 15 | import os 16 | import tqdm 17 | import torch 18 | 19 | from torch import optim 20 | from torch.utils.data import DataLoader 21 | from torch.utils.tensorboard import SummaryWriter 22 | 23 | from data.aug.compose import Compose 24 | from data.aug import ops 25 | from data.dataset import UCAS_AOD 26 | 27 | from model.rdd import RDD 28 | from model.backbone import resnet 29 | 30 | from utils.adjust_lr import adjust_lr_multi_step 31 | from utils.parallel import convert_model, CustomDetDataParallel 32 | 33 | 34 | def main(): 35 | dir_weight = os.path.join(dir_save, 'weight') 36 | dir_log = os.path.join(dir_save, 'log') 37 | os.makedirs(dir_weight, exist_ok=True) 38 | writer = SummaryWriter(dir_log) 39 | 40 | indexes = [int(os.path.splitext(path)[0]) for path in os.listdir(dir_weight)] 41 | current_step = max(indexes) if indexes else 0 42 | 43 | image_size = 768 44 | lr = 1e-3 45 | batch_size = 12 46 | num_workers = 4 47 | 48 | max_step = 30000 49 | lr_cfg = [[15000, lr], [24000, lr / 10], [max_step, lr / 50]] 50 | warm_up = [500, lr / 50, lr] 51 | save_interval = 1000 52 | 53 | aug = Compose([ 54 | ops.ToFloat(), 55 | ops.PhotometricDistort(), 56 | ops.RandomHFlip(), 57 | ops.RandomVFlip(), 58 | ops.RandomRotate90(), 59 | ops.ResizeJitter([0.8, 1.2]), 60 | ops.PadSquare(), 61 | ops.Resize(image_size), 62 | ]) 63 | dataset = UCAS_AOD(dir_dataset, ['train'], aug) 64 | loader = DataLoader(dataset, batch_size, shuffle=True, num_workers=num_workers, pin_memory=True, drop_last=True, 65 | collate_fn=dataset.collate) 66 | num_classes = len(dataset.names) 67 | 68 | prior_box = { 69 | 'strides': [8, 16, 32, 64], 70 | 'sizes': [3] * 4, 71 | 'aspects': [[1, 2]] * 4, 72 | 'scales': [[2 ** 0, 2 ** (1 / 3), 2 ** (2 / 3)]] * 4, 73 | } 74 | 75 | cfg = { 76 | 'prior_box': prior_box, 77 | 'num_classes': num_classes, 78 | 'extra': 1, 79 | } 80 | 81 | model = RDD(backbone(fetch_feature=True), cfg) 82 | model.build_pipe(shape=[2, 3, image_size, image_size]) 83 | if current_step: 84 | model.restore(os.path.join(dir_weight, '%d.pth' % current_step)) 85 | else: 86 | model.init() 87 | if len(device_ids) > 1: 88 | model = convert_model(model) 89 | model = CustomDetDataParallel(model, device_ids) 90 | model = model.cuda() 91 | optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4) 92 | training = True 93 | 94 | while training and current_step < max_step: 95 | tqdm_loader = tqdm.tqdm(loader) 96 | for images, targets, infos in tqdm_loader: 97 | current_step += 1 98 | adjust_lr_multi_step(optimizer, current_step, lr_cfg, warm_up) 99 | 100 | images = images.cuda() / 255 101 | losses = model(images, targets) 102 | loss = sum(losses.values()) 103 | loss.backward() 104 | optimizer.step() 105 | optimizer.zero_grad() 106 | 107 | for key, val in list(losses.items()): 108 | losses[key] = val.item() 109 | writer.add_scalar(key, val, global_step=current_step) 110 | writer.flush() 111 | tqdm_loader.set_postfix(losses) 112 | tqdm_loader.set_description(f'<{current_step}/{max_step}>') 113 | 114 | if current_step % save_interval == 0: 115 | save_path = os.path.join(dir_weight, '%d.pth' % current_step) 116 | state_dict = model.state_dict() if len(device_ids) == 1 else model.module.state_dict() 117 | torch.save(state_dict, save_path) 118 | cache_file = os.path.join(dir_weight, '%d.pth' % (current_step - save_interval)) 119 | if os.path.exists(cache_file): 120 | os.remove(cache_file) 121 | 122 | if current_step >= max_step: 123 | training = False 124 | writer.close() 125 | break 126 | 127 | 128 | if __name__ == '__main__': 129 | 130 | torch.manual_seed(0) 131 | torch.backends.cudnn.benchmark = True 132 | 133 | device_ids = [0, 1] 134 | torch.cuda.set_device(device_ids[0]) 135 | backbone = resnet.resnet101 136 | 137 | dir_dataset = '' 138 | dir_save = '' 139 | 140 | main() 141 | -------------------------------------------------------------------------------- /utils/adjust_lr.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def adjust_lr_multi_step(optimizer, step, cfg, warm_up=None): 4 | for param_group in optimizer.param_groups: 5 | if warm_up is not None and step <= warm_up[0]: 6 | param_group['lr'] = warm_up[1] + step / warm_up[0] * (warm_up[2] - warm_up[1]) 7 | else: 8 | for s, lr in cfg: 9 | if s is None or step <= s: 10 | param_group['lr'] = lr 11 | break 12 | -------------------------------------------------------------------------------- /utils/box/bbox.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : bbox.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 11:08 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import torch 12 | 13 | from torchvision.ops.boxes import nms, batched_nms, box_iou 14 | 15 | 16 | def bbox_switch(bbox, in_type, out_type): # 'xyxy', 'xywh' 17 | if in_type == 'xyxy' and out_type == 'xywh': 18 | bbox = torch.cat([(bbox[..., 0: 2] + bbox[..., 2: 4]) / 2, bbox[..., 2: 4] - bbox[..., 0: 2]], dim=-1) 19 | elif in_type == 'xywh' and out_type == 'xyxy': 20 | bbox = torch.cat([bbox[..., 0: 2] - bbox[..., 2: 4] / 2, bbox[..., 0: 2] + bbox[..., 2: 4] / 2], dim=-1) 21 | return bbox 22 | 23 | 24 | def bbox_iou(bbox1, bbox2, bbox_type='xyxy'): # nx4, mx4 -> nxm 25 | bbox1 = bbox_switch(bbox1, bbox_type, 'xyxy') 26 | bbox2 = bbox_switch(bbox2, bbox_type, 'xyxy') 27 | return box_iou(bbox1, bbox2) 28 | 29 | 30 | def bbox_nms(bboxes, scores, iou_thresh): 31 | return nms(bboxes, scores, iou_thresh) 32 | 33 | 34 | def bbox_batched_nms(bboxes, scores, labels, iou_thresh): 35 | return batched_nms(bboxes, scores, labels, iou_thresh) 36 | 37 | 38 | def encode(gt_bbox, det_bbox, anchor, variance): 39 | xy = (gt_bbox[..., 0: 2] - anchor[..., 0: 2]) / anchor[..., 2: 4] / variance[0] 40 | wh = torch.log(gt_bbox[..., 2: 4] / anchor[..., 2: 4]) / variance[1] 41 | a = gt_bbox[..., [4]] / 45 / variance[2] 42 | gt_bbox = torch.cat([xy, wh, a], dim=-1) 43 | det_bbox = torch.cat([det_bbox[..., :4], torch.tanh(det_bbox[..., [4]]) / variance[2]], dim=-1) 44 | return gt_bbox, det_bbox 45 | 46 | 47 | def decode(det_bbox, anchor, variance): 48 | xy = det_bbox[..., 0: 2] * variance[0] * anchor[..., 2: 4] + anchor[..., 0: 2] 49 | wh = torch.exp(det_bbox[..., 2: 4] * variance[1]) * anchor[..., 2: 4] 50 | a = torch.tanh(det_bbox[..., [4]]) * 45 51 | return torch.cat([xy, wh, a], dim=-1) 52 | -------------------------------------------------------------------------------- /utils/box/bbox_np.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : bbox_np.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 11:08 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import numpy as np 12 | 13 | 14 | def bbox_switch(bbox, in_type, out_type): # 'xyxy', 'xywh' 15 | if in_type == 'xyxy' and out_type == 'xywh': 16 | bbox = np.concatenate([(bbox[..., 0: 2] + bbox[..., 2: 4]) / 2, bbox[..., 2: 4] - bbox[..., 0: 2]], axis=-1) 17 | elif in_type == 'xywh' and out_type == 'xyxy': 18 | bbox = np.concatenate([bbox[..., 0: 2] - bbox[..., 2: 4] / 2, bbox[..., 0: 2] + bbox[..., 2: 4] / 2], axis=-1) 19 | return bbox 20 | 21 | 22 | def xywha2xy4(xywha): # a represents the angle(degree), clockwise, a=0 along the X axis 23 | x, y, w, h, a = xywha 24 | corner = np.array([[-w / 2, -h / 2], [w / 2, -h / 2], [w / 2, h / 2], [-w / 2, h / 2]]) 25 | a = np.deg2rad(a) 26 | transform = np.array([[np.cos(a), -np.sin(a)], [np.sin(a), np.cos(a)]]) 27 | return transform.dot(corner.T).T + [x, y] 28 | 29 | 30 | def xy42xywha(xy4, flag=0): # bbox(4x2) represents a rectangle 31 | # flag=0, 0 <= a < 180 32 | # flag=1, 0 <= a < 180, w >= h 33 | # flag=2, -45 <= a < 45 34 | x, y = np.mean(xy4, axis=0) 35 | diff01 = xy4[0] - xy4[1] 36 | diff03 = xy4[0] - xy4[3] 37 | w = np.sqrt(np.square(diff01).sum()) 38 | h = np.sqrt(np.square(diff03).sum()) 39 | if w >= h: 40 | a = np.rad2deg(np.arctan2(diff01[1], diff01[0])) 41 | else: 42 | a = np.rad2deg(np.arctan2(diff03[1], diff03[0])) + 90 43 | if flag > 0: 44 | if w < h: 45 | w, h = h, w 46 | a += 90 47 | a = (a % 180 + 180) % 180 48 | if flag > 1: 49 | if 45 <= a < 135: 50 | w, h = h, w 51 | a -= 90 52 | elif a >= 135: 53 | a -= 180 54 | return np.stack([x, y, w, h, a]) 55 | -------------------------------------------------------------------------------- /utils/box/ext/rbbox_overlap_cpu/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from .rbbox_overlap import rbbox_iou 4 | from .rbbox_overlap import rbbox_iou_1x1 5 | from .rbbox_overlap import rbbox_iou_nxn 6 | from .rbbox_overlap import rbbox_nms -------------------------------------------------------------------------------- /utils/box/ext/rbbox_overlap_cpu/rbbox_overlap.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include 4 | 5 | 6 | # define PI 3.14159265358979323846 7 | 8 | 9 | double trangle_area(double * a, double * b, double * c) { 10 | return ((a[0] - c[0]) * (b[1] - c[1]) - (a[1] - c[1]) * (b[0] - c[0]))/2.0; 11 | } 12 | 13 | 14 | double area(double * int_pts, int num_of_inter) { 15 | 16 | double area = 0; 17 | for(int i = 0;i < num_of_inter - 2;i++) { 18 | area += fabs(trangle_area(int_pts, int_pts + 2 * i + 2, int_pts + 2 * i + 4)); 19 | } 20 | return area; 21 | } 22 | 23 | 24 | void reorder_pts(double * int_pts, int num_of_inter) { 25 | 26 | if(num_of_inter > 0) { 27 | 28 | double center[2]; 29 | 30 | center[0] = 0.0; 31 | center[1] = 0.0; 32 | 33 | for(int i = 0;i < num_of_inter;i++) { 34 | center[0] += int_pts[2 * i]; 35 | center[1] += int_pts[2 * i + 1]; 36 | } 37 | center[0] /= num_of_inter; 38 | center[1] /= num_of_inter; 39 | 40 | double vs[16]; 41 | double v[2]; 42 | double d; 43 | for(int i = 0;i < num_of_inter;i++) { 44 | v[0] = int_pts[2 * i]-center[0]; 45 | v[1] = int_pts[2 * i + 1]-center[1]; 46 | d = sqrt(v[0] * v[0] + v[1] * v[1]); 47 | v[0] = v[0] / d; 48 | v[1] = v[1] / d; 49 | if(v[1] < 0) { 50 | v[0]= - 2 - v[0]; 51 | } 52 | vs[i] = v[0]; 53 | } 54 | 55 | double temp,tx,ty; 56 | int j; 57 | for(int i=1;ivs[i]){ 59 | temp = vs[i]; 60 | tx = int_pts[2*i]; 61 | ty = int_pts[2*i+1]; 62 | j=i; 63 | while(j>0&&vs[j-1]>temp){ 64 | vs[j] = vs[j-1]; 65 | int_pts[j*2] = int_pts[j*2-2]; 66 | int_pts[j*2+1] = int_pts[j*2-1]; 67 | j--; 68 | } 69 | vs[j] = temp; 70 | int_pts[j*2] = tx; 71 | int_pts[j*2+1] = ty; 72 | } 73 | } 74 | } 75 | } 76 | 77 | bool inter2line(double * pts1, double *pts2, int i, int j, double * temp_pts) { 78 | 79 | double a[2]; 80 | double b[2]; 81 | double c[2]; 82 | double d[2]; 83 | 84 | double area_abc, area_abd, area_cda, area_cdb; 85 | 86 | a[0] = pts1[2 * i]; 87 | a[1] = pts1[2 * i + 1]; 88 | 89 | b[0] = pts1[2 * ((i + 1) % 4)]; 90 | b[1] = pts1[2 * ((i + 1) % 4) + 1]; 91 | 92 | c[0] = pts2[2 * j]; 93 | c[1] = pts2[2 * j + 1]; 94 | 95 | d[0] = pts2[2 * ((j + 1) % 4)]; 96 | d[1] = pts2[2 * ((j + 1) % 4) + 1]; 97 | 98 | area_abc = trangle_area(a, b, c); 99 | area_abd = trangle_area(a, b, d); 100 | 101 | if(area_abc * area_abd >= 0) { 102 | return false; 103 | } 104 | 105 | area_cda = trangle_area(c, d, a); 106 | area_cdb = area_cda + area_abc - area_abd; 107 | 108 | if (area_cda * area_cdb >= 0) { 109 | return false; 110 | } 111 | double t = area_cda / (area_abd - area_abc); 112 | 113 | double dx = t * (b[0] - a[0]); 114 | double dy = t * (b[1] - a[1]); 115 | temp_pts[0] = a[0] + dx; 116 | temp_pts[1] = a[1] + dy; 117 | 118 | return true; 119 | } 120 | 121 | bool in_rect(double pt_x, double pt_y, double * pts) { 122 | 123 | double ab[2]; 124 | double ad[2]; 125 | double ap[2]; 126 | 127 | double abab; 128 | double abap; 129 | double adad; 130 | double adap; 131 | 132 | ab[0] = pts[2] - pts[0]; 133 | ab[1] = pts[3] - pts[1]; 134 | 135 | ad[0] = pts[6] - pts[0]; 136 | ad[1] = pts[7] - pts[1]; 137 | 138 | ap[0] = pt_x - pts[0]; 139 | ap[1] = pt_y - pts[1]; 140 | 141 | abab = ab[0] * ab[0] + ab[1] * ab[1]; 142 | abap = ab[0] * ap[0] + ab[1] * ap[1]; 143 | adad = ad[0] * ad[0] + ad[1] * ad[1]; 144 | adap = ad[0] * ap[0] + ad[1] * ap[1]; 145 | 146 | return abab >= abap && abap >= 0 && adad >= adap && adap >= 0; 147 | } 148 | 149 | int inter_pts(double * pts1, double * pts2, double * int_pts) { 150 | 151 | int num_of_inter = 0; 152 | 153 | for(int i = 0;i < 4;i++) { 154 | if(in_rect(pts1[2 * i], pts1[2 * i + 1], pts2)) { 155 | int_pts[num_of_inter * 2] = pts1[2 * i]; 156 | int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1]; 157 | num_of_inter++; 158 | } 159 | if(in_rect(pts2[2 * i], pts2[2 * i + 1], pts1)) { 160 | int_pts[num_of_inter * 2] = pts2[2 * i]; 161 | int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1]; 162 | num_of_inter++; 163 | } 164 | } 165 | 166 | double temp_pts[2]; 167 | 168 | for(int i = 0;i < 4;i++) { 169 | for(int j = 0;j < 4;j++) { 170 | bool has_pts = inter2line(pts1, pts2, i, j, temp_pts); 171 | if(has_pts) { 172 | int_pts[num_of_inter * 2] = temp_pts[0]; 173 | int_pts[num_of_inter * 2 + 1] = temp_pts[1]; 174 | num_of_inter++; 175 | } 176 | } 177 | } 178 | 179 | 180 | return num_of_inter; 181 | } 182 | 183 | 184 | void convert_region(double * pts , double * region) { 185 | 186 | double angle = region[4]; 187 | double a_cos = cos(angle/180.0*PI); 188 | double a_sin = sin(angle/180.0*PI); 189 | 190 | double ctr_x = region[0]; 191 | double ctr_y = region[1]; 192 | 193 | double w = region[2]; 194 | double h = region[3]; 195 | 196 | double pts_x[4]; 197 | double pts_y[4]; 198 | 199 | pts_x[0] = - w / 2; 200 | pts_x[1] = w / 2; 201 | pts_x[2] = w / 2; 202 | pts_x[3] = - w / 2; 203 | 204 | pts_y[0] = - h / 2; 205 | pts_y[1] = - h / 2; 206 | pts_y[2] = h / 2; 207 | pts_y[3] = h / 2; 208 | 209 | for(int i = 0;i < 4;i++) { 210 | pts[7 - 2 * i - 1] = a_cos * pts_x[i] - a_sin * pts_y[i] + ctr_x; 211 | pts[7 - 2 * i] = a_sin * pts_x[i] + a_cos * pts_y[i] + ctr_y; 212 | 213 | } 214 | 215 | } 216 | 217 | 218 | double inter(double * region1, double * region2) { 219 | 220 | double pts1[8]; 221 | double pts2[8]; 222 | double int_pts[16]; 223 | int num_of_inter; 224 | 225 | convert_region(pts1, region1); 226 | convert_region(pts2, region2); 227 | 228 | num_of_inter = inter_pts(pts1, pts2, int_pts); 229 | 230 | reorder_pts(int_pts, num_of_inter); 231 | 232 | return area(int_pts, num_of_inter); 233 | 234 | 235 | } 236 | 237 | double RotateIoU(double * region1, double * region2) { 238 | 239 | double area1 = region1[2] * region1[3]; 240 | double area2 = region2[2] * region2[3]; 241 | double area_inter = inter(region1, region2); 242 | 243 | return area_inter / (area1 + area2 - area_inter); 244 | 245 | } 246 | 247 | 248 | void RotateIoU_1x1(double * region1, double * region2, int n, double * ret){ 249 | for ( int i = 0; i < n; i++ ){ 250 | ret[i] = RotateIoU(region1 + i * 5, region2 + i * 5); 251 | } 252 | } 253 | 254 | 255 | void RotateIoU_nxn(double * region1, double * region2, int n1, int n2, double * ret){ 256 | for ( int i = 0; i < n1; i++ ){ 257 | for ( int j = 0; j < n2; j++ ){ 258 | ret[i * n2 + j] = RotateIoU(region1 + i * 5, region2 + j * 5); 259 | } 260 | } 261 | } 262 | 263 | void RotateNMS(double * bboxes, int n, double thresh, int * keeps){ 264 | int i, flag; 265 | n--; 266 | while(n > 0){ 267 | flag = 0; 268 | for ( i = 0; i < n; i++ ){ 269 | if (keeps[i]){ 270 | if (RotateIoU(bboxes + n * 5, bboxes + i * 5) > thresh){ 271 | keeps[i] = 0; 272 | } 273 | else{ 274 | flag = i; 275 | } 276 | } 277 | } 278 | n = flag; 279 | } 280 | } -------------------------------------------------------------------------------- /utils/box/ext/rbbox_overlap_cpu/rbbox_overlap.pyx: -------------------------------------------------------------------------------- 1 | 2 | 3 | # distutils: language = c++ 4 | 5 | 6 | import numpy as np 7 | cimport numpy as np 8 | 9 | 10 | assert sizeof(int) == sizeof(np.int32_t) 11 | 12 | 13 | cdef extern from 'rbbox_overlap.h': 14 | cdef float RotateIoU(np.float64_t * region1, np.float64_t * region2) 15 | cdef void RotateIoU_1x1(np.float64_t * region1, np.float64_t * region2, int n, np.float64_t * ret) 16 | cdef void RotateIoU_nxn(np.float64_t * region1, np.float64_t * region2, int n1, int n2, np.float64_t * ret) 17 | cdef void RotateNMS(np.float64_t * bboxes, int n, float thresh, np.int32_t * keeps) 18 | 19 | 20 | def rbbox_iou(np.ndarray[np.float64_t, ndim=1] a, np.ndarray[np.float64_t, ndim=1] b): 21 | return RotateIoU(&a[0], &b[0]) 22 | 23 | 24 | def rbbox_iou_1x1(np.ndarray[np.float64_t, ndim=2] a, np.ndarray[np.float64_t, ndim=2] b): 25 | cdef int n1 = a.shape[0] 26 | cdef int n2 = b.shape[0] 27 | assert n1 == n2 28 | cdef np.ndarray[np.float64_t, ndim=1] ret = np.zeros([n1], dtype=np.float64) 29 | RotateIoU_1x1(&a[0, 0], &b[0, 0], n1, &ret[0]) 30 | return ret 31 | 32 | 33 | def rbbox_iou_nxn(np.ndarray[np.float64_t, ndim=2] a, np.ndarray[np.float64_t, ndim=2] b): 34 | cdef int n1 = a.shape[0] 35 | cdef int n2 = b.shape[0] 36 | cdef np.ndarray[np.float64_t, ndim=2] ret = np.zeros([n1, n2], dtype=np.float64) 37 | RotateIoU_nxn(&a[0, 0], &b[0, 0], n1, n2, &ret[0, 0]) 38 | return ret 39 | 40 | 41 | def rbbox_nms(np.ndarray[np.float64_t, ndim=2] boxes, np.ndarray[np.float64_t, ndim=1] scores, float thresh): 42 | cdef int n = boxes.shape[0] 43 | cdef np.ndarray[np.int32_t, ndim=1] keeps = np.ones([n], dtype=np.int32) 44 | cdef np.ndarray[np.int32_t, ndim=1] indexes = np.argsort(scores).astype(np.int32) 45 | boxes = boxes[indexes] 46 | RotateNMS(&boxes[0, 0], n, thresh, &keeps[0]) 47 | keeps = indexes[keeps.astype(np.bool)] 48 | if len(keeps) > 1: 49 | keeps = np.ascontiguousarray(keeps[::-1]) 50 | return keeps 51 | 52 | 53 | # python setup.py build_ext --inplace 54 | 55 | # iou.cpp(2961): error C2664: 'void RotateNMS(float *,int,float,int *)': cannot convert argument 4 from '__pyx_t_5numpy_int32_t *' to 'int *' 56 | # 57 | # go to line(2961) in the generated file in iou.cpp 58 | # Modify corresponding __pyx_t_5numpy_int32_t to int 59 | 60 | 61 | -------------------------------------------------------------------------------- /utils/box/ext/rbbox_overlap_cpu/setup.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import numpy as np 4 | 5 | from distutils.core import setup 6 | from Cython.Build import cythonize 7 | 8 | 9 | try: 10 | numpy_include = np.get_include() 11 | except AttributeError: 12 | numpy_include = np.get_numpy_include() 13 | 14 | 15 | setup( 16 | ext_modules=cythonize("rbbox_overlap.pyx"), 17 | include_dirs=[numpy_include], 18 | ) 19 | -------------------------------------------------------------------------------- /utils/box/ext/rbbox_overlap_gpu/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from .rbbox_overlap import rbbox_overlaps as rbbox_iou 4 | from .rbbox_overlap import rotate_gpu_nms as rbbox_nms -------------------------------------------------------------------------------- /utils/box/ext/rbbox_overlap_gpu/rbbox_overlap.hpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | void _overlaps(float* overlaps,const float* boxes,const float* query_boxes, int n, int k, int device_id); 4 | 5 | 6 | void _rotate_nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 7 | int boxes_dim, float nms_overlap_thresh, int device_id); 8 | -------------------------------------------------------------------------------- /utils/box/ext/rbbox_overlap_gpu/rbbox_overlap.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | 4 | assert sizeof(int) == sizeof(np.int32_t) 5 | 6 | cdef extern from "rbbox_overlap.hpp": 7 | void _rotate_nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 8 | void _overlaps(np.float32_t*, np.float32_t*, np.float32_t*, int, int, int) 9 | 10 | 11 | def rbbox_overlaps (np.ndarray[np.float32_t, ndim=2] boxes, np.ndarray[np.float32_t, ndim=2] query_boxes, np.int32_t device_id=0): 12 | # boxes: [x, y, w, h, theta] 13 | cdef int N = boxes.shape[0] 14 | cdef int K = query_boxes.shape[0] 15 | cdef np.ndarray[np.float32_t, ndim=2] overlaps = np.zeros((N, K), dtype = np.float32) 16 | _overlaps(&overlaps[0, 0], &boxes[0, 0], &query_boxes[0, 0], N, K, device_id) 17 | return overlaps 18 | 19 | 20 | def rotate_gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float_t thresh, np.int32_t device_id=0): 21 | cdef int boxes_num = dets.shape[0] 22 | cdef int boxes_dim = dets.shape[1] 23 | cdef int num_out 24 | cdef np.ndarray[np.int32_t, ndim=1] \ 25 | keep = np.zeros(boxes_num, dtype=np.int32) 26 | cdef np.ndarray[np.float32_t, ndim=1] \ 27 | scores = dets[:, 5] 28 | cdef np.ndarray[np.int_t, ndim=1] \ 29 | order = scores.argsort()[::-1] 30 | cdef np.ndarray[np.float32_t, ndim=2] \ 31 | sorted_dets = dets[order, :] 32 | thresh = thresh 33 | _rotate_nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 34 | keep = keep[:num_out] 35 | return order[keep] 36 | -------------------------------------------------------------------------------- /utils/box/ext/rbbox_overlap_gpu/rbbox_overlap_kernel.cu: -------------------------------------------------------------------------------- 1 | 2 | #include "rbbox_overlap.hpp" 3 | #include 4 | #include 5 | #include 6 | 7 | #define CUDA_CHECK(condition) \ 8 | /* Code block avoids redefinition of cudaError_t error */ \ 9 | do { \ 10 | cudaError_t error = condition; \ 11 | if (error != cudaSuccess) { \ 12 | std::cout << cudaGetErrorString(error) << std::endl; \ 13 | } \ 14 | } while (0) 15 | 16 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 17 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 18 | 19 | __device__ inline float trangle_area(float * a, float * b, float * c) { 20 | return ((a[0] - c[0]) * (b[1] - c[1]) - (a[1] - c[1]) * (b[0] - c[0]))/2.0; 21 | } 22 | 23 | __device__ inline float area(float * int_pts, int num_of_inter) { 24 | 25 | float area = 0.0; 26 | for(int i = 0;i < num_of_inter - 2;i++) { 27 | area += fabs(trangle_area(int_pts, int_pts + 2 * i + 2, int_pts + 2 * i + 4)); 28 | } 29 | return area; 30 | } 31 | 32 | __device__ inline void reorder_pts(float * int_pts, int num_of_inter) { 33 | 34 | 35 | 36 | if(num_of_inter > 0) { 37 | 38 | float center[2]; 39 | 40 | center[0] = 0.0; 41 | center[1] = 0.0; 42 | 43 | for(int i = 0;i < num_of_inter;i++) { 44 | center[0] += int_pts[2 * i]; 45 | center[1] += int_pts[2 * i + 1]; 46 | } 47 | center[0] /= num_of_inter; 48 | center[1] /= num_of_inter; 49 | 50 | float vs[16]; 51 | float v[2]; 52 | float d; 53 | for(int i = 0;i < num_of_inter;i++) { 54 | v[0] = int_pts[2 * i]-center[0]; 55 | v[1] = int_pts[2 * i + 1]-center[1]; 56 | d = sqrt(v[0] * v[0] + v[1] * v[1]); 57 | v[0] = v[0] / d; 58 | v[1] = v[1] / d; 59 | if(v[1] < 0) { 60 | v[0]= - 2 - v[0]; 61 | } 62 | vs[i] = v[0]; 63 | } 64 | 65 | float temp,tx,ty; 66 | int j; 67 | for(int i=1;ivs[i]){ 69 | temp = vs[i]; 70 | tx = int_pts[2*i]; 71 | ty = int_pts[2*i+1]; 72 | j=i; 73 | while(j>0&&vs[j-1]>temp){ 74 | vs[j] = vs[j-1]; 75 | int_pts[j*2] = int_pts[j*2-2]; 76 | int_pts[j*2+1] = int_pts[j*2-1]; 77 | j--; 78 | } 79 | vs[j] = temp; 80 | int_pts[j*2] = tx; 81 | int_pts[j*2+1] = ty; 82 | } 83 | } 84 | } 85 | 86 | } 87 | __device__ inline bool inter2line(float * pts1, float *pts2, int i, int j, float * temp_pts) { 88 | 89 | float a[2]; 90 | float b[2]; 91 | float c[2]; 92 | float d[2]; 93 | 94 | float area_abc, area_abd, area_cda, area_cdb; 95 | 96 | a[0] = pts1[2 * i]; 97 | a[1] = pts1[2 * i + 1]; 98 | 99 | b[0] = pts1[2 * ((i + 1) % 4)]; 100 | b[1] = pts1[2 * ((i + 1) % 4) + 1]; 101 | 102 | c[0] = pts2[2 * j]; 103 | c[1] = pts2[2 * j + 1]; 104 | 105 | d[0] = pts2[2 * ((j + 1) % 4)]; 106 | d[1] = pts2[2 * ((j + 1) % 4) + 1]; 107 | 108 | area_abc = trangle_area(a, b, c); 109 | area_abd = trangle_area(a, b, d); 110 | 111 | if(area_abc * area_abd >= 0) { 112 | return false; 113 | } 114 | 115 | area_cda = trangle_area(c, d, a); 116 | area_cdb = area_cda + area_abc - area_abd; 117 | 118 | if (area_cda * area_cdb >= 0) { 119 | return false; 120 | } 121 | float t = area_cda / (area_abd - area_abc); 122 | 123 | float dx = t * (b[0] - a[0]); 124 | float dy = t * (b[1] - a[1]); 125 | temp_pts[0] = a[0] + dx; 126 | temp_pts[1] = a[1] + dy; 127 | 128 | return true; 129 | } 130 | 131 | __device__ inline bool in_rect(float pt_x, float pt_y, float * pts) { 132 | 133 | float ab[2]; 134 | float ad[2]; 135 | float ap[2]; 136 | 137 | float abab; 138 | float abap; 139 | float adad; 140 | float adap; 141 | 142 | ab[0] = pts[2] - pts[0]; 143 | ab[1] = pts[3] - pts[1]; 144 | 145 | ad[0] = pts[6] - pts[0]; 146 | ad[1] = pts[7] - pts[1]; 147 | 148 | ap[0] = pt_x - pts[0]; 149 | ap[1] = pt_y - pts[1]; 150 | 151 | abab = ab[0] * ab[0] + ab[1] * ab[1]; 152 | abap = ab[0] * ap[0] + ab[1] * ap[1]; 153 | adad = ad[0] * ad[0] + ad[1] * ad[1]; 154 | adap = ad[0] * ap[0] + ad[1] * ap[1]; 155 | 156 | return abab >= abap and abap >= 0 and adad >= adap and adap >= 0; 157 | } 158 | 159 | __device__ inline int inter_pts(float * pts1, float * pts2, float * int_pts) { 160 | 161 | int num_of_inter = 0; 162 | 163 | for(int i = 0;i < 4;i++) { 164 | if(in_rect(pts1[2 * i], pts1[2 * i + 1], pts2)) { 165 | int_pts[num_of_inter * 2] = pts1[2 * i]; 166 | int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1]; 167 | num_of_inter++; 168 | } 169 | if(in_rect(pts2[2 * i], pts2[2 * i + 1], pts1)) { 170 | int_pts[num_of_inter * 2] = pts2[2 * i]; 171 | int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1]; 172 | num_of_inter++; 173 | } 174 | } 175 | 176 | float temp_pts[2]; 177 | 178 | for(int i = 0;i < 4;i++) { 179 | for(int j = 0;j < 4;j++) { 180 | bool has_pts = inter2line(pts1, pts2, i, j, temp_pts); 181 | if(has_pts) { 182 | int_pts[num_of_inter * 2] = temp_pts[0]; 183 | int_pts[num_of_inter * 2 + 1] = temp_pts[1]; 184 | num_of_inter++; 185 | } 186 | } 187 | } 188 | 189 | 190 | return num_of_inter; 191 | } 192 | 193 | __device__ inline void convert_region(float * pts , float const * const region) { 194 | 195 | float angle = region[4]; 196 | float a_cos = cos(angle/180.0*3.1415926535); 197 | float a_sin = sin(angle/180.0*3.1415926535); 198 | 199 | float ctr_x = region[0]; 200 | float ctr_y = region[1]; 201 | 202 | float w = region[2]; 203 | float h = region[3]; 204 | 205 | float pts_x[4]; 206 | float pts_y[4]; 207 | 208 | pts_x[0] = - w / 2; 209 | pts_x[1] = w / 2; 210 | pts_x[2] = w / 2; 211 | pts_x[3] = - w / 2; 212 | 213 | pts_y[0] = - h / 2; 214 | pts_y[1] = - h / 2; 215 | pts_y[2] = h / 2; 216 | pts_y[3] = h / 2; 217 | 218 | for(int i = 0;i < 4;i++) { 219 | pts[7 - 2 * i - 1] = a_cos * pts_x[i] - a_sin * pts_y[i] + ctr_x; 220 | pts[7 - 2 * i] = a_sin * pts_x[i] + a_cos * pts_y[i] + ctr_y; 221 | 222 | } 223 | 224 | } 225 | 226 | 227 | __device__ inline float inter(float const * const region1, float const * const region2) { 228 | 229 | float pts1[8]; 230 | float pts2[8]; 231 | float int_pts[16]; 232 | int num_of_inter; 233 | 234 | convert_region(pts1, region1); 235 | convert_region(pts2, region2); 236 | 237 | num_of_inter = inter_pts(pts1, pts2, int_pts); 238 | 239 | reorder_pts(int_pts, num_of_inter); 240 | 241 | return area(int_pts, num_of_inter); 242 | 243 | 244 | } 245 | 246 | __device__ inline float devRotateIoU(float const * const region1, float const * const region2) { 247 | 248 | float area1 = region1[2] * region1[3]; 249 | float area2 = region2[2] * region2[3]; 250 | float area_inter = inter(region1, region2); 251 | 252 | return area_inter / (area1 + area2 - area_inter); 253 | 254 | 255 | } 256 | 257 | __global__ void overlaps_kernel(const int N, const int K, const float* dev_boxes, 258 | const float * dev_query_boxes, float* dev_overlaps) { 259 | 260 | const int col_start = blockIdx.y; 261 | const int row_start = blockIdx.x; 262 | 263 | const int row_size = 264 | min(N - row_start * threadsPerBlock, threadsPerBlock); 265 | const int col_size = 266 | min(K - col_start * threadsPerBlock, threadsPerBlock); 267 | 268 | 269 | __shared__ float block_boxes[threadsPerBlock * 5]; 270 | __shared__ float block_query_boxes[threadsPerBlock * 5]; 271 | if (threadIdx.x < col_size) { 272 | block_query_boxes[threadIdx.x * 5 + 0] = 273 | dev_query_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 274 | block_query_boxes[threadIdx.x * 5 + 1] = 275 | dev_query_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 276 | block_query_boxes[threadIdx.x * 5 + 2] = 277 | dev_query_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 278 | block_query_boxes[threadIdx.x * 5 + 3] = 279 | dev_query_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 280 | block_query_boxes[threadIdx.x * 5 + 4] = 281 | dev_query_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 282 | } 283 | 284 | if (threadIdx.x < row_size) { 285 | block_boxes[threadIdx.x * 5 + 0] = 286 | dev_boxes[(threadsPerBlock * row_start + threadIdx.x) * 5 + 0]; 287 | block_boxes[threadIdx.x * 5 + 1] = 288 | dev_boxes[(threadsPerBlock * row_start + threadIdx.x) * 5 + 1]; 289 | block_boxes[threadIdx.x * 5 + 2] = 290 | dev_boxes[(threadsPerBlock * row_start + threadIdx.x) * 5 + 2]; 291 | block_boxes[threadIdx.x * 5 + 3] = 292 | dev_boxes[(threadsPerBlock * row_start + threadIdx.x) * 5 + 3]; 293 | block_boxes[threadIdx.x * 5 + 4] = 294 | dev_boxes[(threadsPerBlock * row_start + threadIdx.x) * 5 + 4]; 295 | } 296 | 297 | __syncthreads(); 298 | 299 | if (threadIdx.x < row_size) { 300 | 301 | for(int i = 0;i < col_size; i++) { 302 | int offset = row_start*threadsPerBlock * K + col_start*threadsPerBlock + threadIdx.x*K+ i ; 303 | dev_overlaps[offset] = devRotateIoU(block_boxes + threadIdx.x * 5, block_query_boxes + i * 5); 304 | } 305 | 306 | } 307 | } 308 | 309 | 310 | __global__ void rotate_nms_kernel(const int n_boxes, const float nms_overlap_thresh, 311 | const float *dev_boxes, unsigned long long *dev_mask) { 312 | const int row_start = blockIdx.y; 313 | const int col_start = blockIdx.x; 314 | 315 | // if (row_start > col_start) return; 316 | 317 | const int row_size = 318 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 319 | const int col_size = 320 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 321 | 322 | __shared__ float block_boxes[threadsPerBlock * 6]; 323 | if (threadIdx.x < col_size) { 324 | block_boxes[threadIdx.x * 6 + 0] = 325 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 0]; 326 | block_boxes[threadIdx.x * 6 + 1] = 327 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 1]; 328 | block_boxes[threadIdx.x * 6 + 2] = 329 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 2]; 330 | block_boxes[threadIdx.x * 6 + 3] = 331 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 3]; 332 | block_boxes[threadIdx.x * 6 + 4] = 333 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 4]; 334 | block_boxes[threadIdx.x * 6 + 5] = 335 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 5]; 336 | } 337 | __syncthreads(); 338 | 339 | if (threadIdx.x < row_size) { 340 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 341 | const float *cur_box = dev_boxes + cur_box_idx * 6; 342 | int i = 0; 343 | unsigned long long t = 0; 344 | int start = 0; 345 | if (row_start == col_start) { 346 | start = threadIdx.x + 1; 347 | } 348 | for (i = start; i < col_size; i++) { 349 | if (devRotateIoU(cur_box, block_boxes + i * 6) > nms_overlap_thresh) { 350 | t |= 1ULL << i; 351 | } 352 | } 353 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 354 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 355 | } 356 | } 357 | 358 | 359 | void _set_device(int device_id) { 360 | int current_device; 361 | CUDA_CHECK(cudaGetDevice(¤t_device)); 362 | if (current_device == device_id) { 363 | return; 364 | } 365 | // The call to cudaSetDevice must come before any calls to Get, which 366 | // may perform initialization using the GPU. 367 | CUDA_CHECK(cudaSetDevice(device_id)); 368 | } 369 | 370 | 371 | void _overlaps(float* overlaps,const float* boxes,const float* query_boxes, int n, int k, int device_id) { 372 | 373 | _set_device(device_id); 374 | 375 | float* overlaps_dev = NULL; 376 | float* boxes_dev = NULL; 377 | float* query_boxes_dev = NULL; 378 | 379 | 380 | CUDA_CHECK(cudaMalloc(&boxes_dev, 381 | n * 5 * sizeof(float))); 382 | 383 | CUDA_CHECK(cudaMemcpy(boxes_dev, 384 | boxes, 385 | n * 5 * sizeof(float), 386 | cudaMemcpyHostToDevice)); 387 | 388 | CUDA_CHECK(cudaMalloc(&query_boxes_dev, 389 | k * 5 * sizeof(float))); 390 | 391 | CUDA_CHECK(cudaMemcpy(query_boxes_dev, 392 | query_boxes, 393 | k * 5 * sizeof(float), 394 | cudaMemcpyHostToDevice)); 395 | 396 | CUDA_CHECK(cudaMalloc(&overlaps_dev, 397 | n * k * sizeof(float))); 398 | 399 | dim3 blocks(DIVUP(n, threadsPerBlock), 400 | DIVUP(k, threadsPerBlock)); 401 | 402 | dim3 threads(threadsPerBlock); 403 | 404 | overlaps_kernel<<>>(n, k, 405 | boxes_dev, 406 | query_boxes_dev, 407 | overlaps_dev); 408 | 409 | CUDA_CHECK(cudaMemcpy(overlaps, 410 | overlaps_dev, 411 | n * k * sizeof(float), 412 | cudaMemcpyDeviceToHost)); 413 | 414 | CUDA_CHECK(cudaFree(overlaps_dev)); 415 | CUDA_CHECK(cudaFree(boxes_dev)); 416 | CUDA_CHECK(cudaFree(query_boxes_dev)); 417 | 418 | } 419 | 420 | 421 | void _rotate_nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 422 | int boxes_dim, float nms_overlap_thresh, int device_id) { 423 | _set_device(device_id); 424 | 425 | float* boxes_dev = NULL; 426 | unsigned long long* mask_dev = NULL; 427 | 428 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 429 | 430 | CUDA_CHECK(cudaMalloc(&boxes_dev, 431 | boxes_num * boxes_dim * sizeof(float))); 432 | CUDA_CHECK(cudaMemcpy(boxes_dev, 433 | boxes_host, 434 | boxes_num * boxes_dim * sizeof(float), 435 | cudaMemcpyHostToDevice)); 436 | 437 | CUDA_CHECK(cudaMalloc(&mask_dev, 438 | boxes_num * col_blocks * sizeof(unsigned long long))); 439 | 440 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 441 | DIVUP(boxes_num, threadsPerBlock)); 442 | dim3 threads(threadsPerBlock); 443 | rotate_nms_kernel<<>>(boxes_num, 444 | nms_overlap_thresh, 445 | boxes_dev, 446 | mask_dev); 447 | 448 | std::vector mask_host(boxes_num * col_blocks); 449 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 450 | mask_dev, 451 | sizeof(unsigned long long) * boxes_num * col_blocks, 452 | cudaMemcpyDeviceToHost)); 453 | 454 | std::vector remv(col_blocks); 455 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 456 | 457 | int num_to_keep = 0; 458 | for (int i = 0; i < boxes_num; i++) { 459 | int nblock = i / threadsPerBlock; 460 | int inblock = i % threadsPerBlock; 461 | 462 | if (!(remv[nblock] & (1ULL << inblock))) { 463 | keep_out[num_to_keep++] = i; 464 | unsigned long long *p = &mask_host[0] + i * col_blocks; 465 | for (int j = nblock; j < col_blocks; j++) { 466 | remv[j] |= p[j]; 467 | } 468 | } 469 | } 470 | *num_out = num_to_keep; 471 | 472 | CUDA_CHECK(cudaFree(boxes_dev)); 473 | CUDA_CHECK(cudaFree(mask_dev)); 474 | } 475 | -------------------------------------------------------------------------------- /utils/box/ext/rbbox_overlap_gpu/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | from setuptools import setup 11 | from distutils.extension import Extension 12 | from Cython.Distutils import build_ext 13 | import numpy as np 14 | 15 | 16 | def find_in_path(name, path): 17 | "Find a file in a search path" 18 | # Adapted fom 19 | # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 20 | for dir in path.split(os.pathsep): 21 | binpath = pjoin(dir, name) 22 | if os.path.exists(binpath): 23 | return os.path.abspath(binpath) 24 | return None 25 | 26 | 27 | def locate_cuda(): 28 | """Locate the CUDA environment on the system 29 | 30 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 31 | and values giving the absolute path to each directory. 32 | 33 | Starts by looking for the CUDAHOME env variable. If not found, everything 34 | is based on finding 'nvcc' in the PATH. 35 | """ 36 | 37 | # first check if the CUDAHOME env variable is in use 38 | if 'CUDAHOME' in os.environ: 39 | home = os.environ['CUDAHOME'] 40 | nvcc = pjoin(home, 'bin', 'nvcc') 41 | else: 42 | # otherwise, search the PATH for NVCC 43 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 44 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 45 | if nvcc is None: 46 | raise EnvironmentError('The nvcc binary could not be ' 47 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 48 | home = os.path.dirname(os.path.dirname(nvcc)) 49 | 50 | cudaconfig = {'home':home, 'nvcc':nvcc, 51 | 'include': pjoin(home, 'include'), 52 | 'lib64': pjoin(home, 'lib64')} 53 | for k, v in cudaconfig.items(): 54 | if not os.path.exists(v): 55 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 56 | 57 | return cudaconfig 58 | CUDA = locate_cuda() 59 | 60 | 61 | # Obtain the numpy include directory. This logic works across numpy versions. 62 | try: 63 | numpy_include = np.get_include() 64 | except AttributeError: 65 | numpy_include = np.get_numpy_include() 66 | 67 | 68 | def customize_compiler_for_nvcc(self): 69 | """inject deep into distutils to customize how the dispatch 70 | to gcc/nvcc works. 71 | 72 | If you subclass UnixCCompiler, it's not trivial to get your subclass 73 | injected in, and still have the right customizations (i.e. 74 | distutils.sysconfig.customize_compiler) run on it. So instead of going 75 | the OO route, I have this. Note, it's kindof like a wierd functional 76 | subclassing going on.""" 77 | 78 | # tell the compiler it can processes .cu 79 | self.src_extensions.append('.cu') 80 | 81 | # save references to the default compiler_so and _comple methods 82 | default_compiler_so = self.compiler_so 83 | super = self._compile 84 | 85 | # now redefine the _compile method. This gets executed for each 86 | # object but distutils doesn't have the ability to change compilers 87 | # based on source extension: we add it. 88 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 89 | if os.path.splitext(src)[1] == '.cu': 90 | # use the cuda for .cu files 91 | self.set_executable('compiler_so', CUDA['nvcc']) 92 | # use only a subset of the extra_postargs, which are 1-1 translated 93 | # from the extra_compile_args in the Extension class 94 | postargs = extra_postargs['nvcc'] 95 | else: 96 | postargs = extra_postargs['gcc'] 97 | 98 | super(obj, src, ext, cc_args, postargs, pp_opts) 99 | # reset the default compiler_so, which we might have changed for cuda 100 | self.compiler_so = default_compiler_so 101 | 102 | # inject our redefined _compile method into the class 103 | self._compile = _compile 104 | 105 | 106 | # run the customize_compiler 107 | class custom_build_ext(build_ext): 108 | def build_extensions(self): 109 | customize_compiler_for_nvcc(self.compiler) 110 | build_ext.build_extensions(self) 111 | 112 | 113 | ext_modules = [ 114 | Extension('rbbox_overlap', 115 | ['rbbox_overlap_kernel.cu', 'rbbox_overlap.pyx'], 116 | library_dirs=[CUDA['lib64']], 117 | libraries=['cudart'], 118 | language='c++', 119 | runtime_library_dirs=[CUDA['lib64']], 120 | # this syntax is specific to this build system 121 | # we're only going to use certain compiler args with nvcc and not with 122 | # gcc the implementation of this trick is in customize_compiler() below 123 | extra_compile_args={'gcc': ["-Wno-unused-function"], 124 | 'nvcc': ['-arch=sm_35', 125 | '--ptxas-options=-v', 126 | '-c', 127 | '--compiler-options', 128 | "'-fPIC'"]}, 129 | include_dirs=[numpy_include, CUDA['include']] 130 | ), 131 | ] 132 | 133 | setup( 134 | name='fast_rcnn', 135 | ext_modules=ext_modules, 136 | # inject our custom trigger 137 | cmdclass={'build_ext': custom_build_ext}, 138 | ) 139 | -------------------------------------------------------------------------------- /utils/box/metric.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : metric.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 11:08 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import numpy as np 12 | 13 | from collections import defaultdict, Counter 14 | 15 | from .rbbox_np import rbbox_iou 16 | 17 | 18 | def get_ap(recall, precision): 19 | recall = [0] + list(recall) + [1] 20 | precision = [0] + list(precision) + [0] 21 | for i in range(len(precision) - 1, 0, -1): 22 | precision[i - 1] = max(precision[i - 1], precision[i]) 23 | ap = sum((recall[i] - recall[i - 1]) * precision[i] for i in range(1, len(recall)) if recall[i] != recall[i - 1]) 24 | return ap * 100 25 | 26 | 27 | def get_ap_07(recall, precision): 28 | ap = 0. 29 | for t in np.linspace(0, 1, 11, endpoint=True): 30 | mask = recall >= t 31 | if np.any(mask): 32 | ap += np.max(precision[mask]) / 11 33 | return ap * 100 34 | 35 | 36 | def get_det_aps(detect, target, num_classes, iou_thresh=0.5, use_07_metric=False): 37 | # [[index, bbox, score, label], ...] 38 | aps = [] 39 | for c in range(num_classes): 40 | target_c = list(filter(lambda x: x[3] == c, target)) 41 | detect_c = filter(lambda x: x[3] == c, detect) 42 | detect_c = sorted(detect_c, key=lambda x: x[2], reverse=True) 43 | tp = np.zeros(len(detect_c)) 44 | fp = np.zeros(len(detect_c)) 45 | target_count = Counter([x[0] for x in target_c]) 46 | target_count = {index: np.zeros(count) for index, count in target_count.items()} 47 | target_lut = defaultdict(list) 48 | for index, bbox, conf, label in target_c: 49 | target_lut[index].append(bbox) 50 | detect_lut = defaultdict(list) 51 | for index, bbox, conf, label in detect_c: 52 | detect_lut[index].append(bbox) 53 | iou_lut = dict() 54 | for index, bboxes in detect_lut.items(): 55 | if index in target_lut: 56 | iou_lut[index] = rbbox_iou(np.stack(bboxes), np.stack(target_lut[index])) 57 | counter = defaultdict(int) 58 | for i, (index, bbox, conf, label) in enumerate(detect_c): 59 | count = counter[index] 60 | counter[index] += 1 61 | iou_max = -np.inf 62 | hit_j = 0 63 | if index in iou_lut: 64 | for j, iou in enumerate(iou_lut[index][count]): 65 | if iou > iou_max: 66 | iou_max = iou 67 | hit_j = j 68 | if iou_max > iou_thresh and target_count[index][hit_j] == 0: 69 | tp[i] = 1 70 | target_count[index][hit_j] = 1 71 | else: 72 | fp[i] = 1 73 | tp_sum = np.cumsum(tp) 74 | fp_sum = np.cumsum(fp) 75 | npos = len(target_c) 76 | recall = tp_sum / npos 77 | precision = tp_sum / (tp_sum + fp_sum) 78 | aps.append((get_ap_07 if use_07_metric else get_ap)(recall, precision)) 79 | return aps 80 | -------------------------------------------------------------------------------- /utils/box/rbbox.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import numpy as np 4 | 5 | 6 | try: 7 | from .ext.rbbox_overlap_gpu import rbbox_iou as rbbox_iou_gpu 8 | from .ext.rbbox_overlap_gpu import rbbox_nms as rbbox_nms_gpu 9 | 10 | 11 | def rbbox_iou(boxes1, boxes2, device=None): # [x, y, w, h, a] 12 | if device is None: 13 | device = 0 if boxes1.device.type == 'cpu' else boxes1.device.index 14 | boxes1 = boxes1.reshape([-1, 5]).detach().cpu().numpy().astype(np.float32) 15 | boxes2 = boxes2.reshape([-1, 5]).detach().cpu().numpy().astype(np.float32) 16 | ious = rbbox_iou_gpu(boxes1, boxes2, device) 17 | return ious 18 | 19 | def rbbox_nms(boxes, scores, iou_thresh=0.5, device=None): 20 | if device is None: 21 | device = 0 if boxes.device.type == 'cpu' else boxes.device.index 22 | boxes = boxes.reshape([-1, 5]).detach().cpu().numpy().astype(np.float32) 23 | scores = scores.reshape([-1, 1]).detach().cpu().numpy().astype(np.float32) 24 | boxes = np.c_[boxes, scores] 25 | keeps = rbbox_nms_gpu(boxes, iou_thresh, device) 26 | return keeps 27 | 28 | except ModuleNotFoundError as e: 29 | 30 | from .ext.rbbox_overlap_cpu import rbbox_iou_nxn as rbbox_iou_cpu 31 | from .ext.rbbox_overlap_cpu import rbbox_nms as rbbox_nms_cpu 32 | 33 | 34 | def rbbox_iou(boxes1, boxes2): 35 | boxes1 = boxes1.reshape([-1, 5]).detach().cpu().numpy().astype(np.float64) 36 | boxes2 = boxes2.reshape([-1, 5]).detach().cpu().numpy().astype(np.float64) 37 | ious = rbbox_iou_cpu(boxes1, boxes2) 38 | return ious 39 | 40 | 41 | def rbbox_nms(boxes, scores, iou_thresh=0.5): 42 | boxes = boxes.reshape([-1, 5]).detach().cpu().numpy().astype(np.float64) 43 | scores = scores.reshape([-1]).detach().cpu().numpy().astype(np.float64) 44 | keeps = rbbox_nms_cpu(boxes, scores, iou_thresh) 45 | return keeps 46 | 47 | 48 | def rbbox_batched_nms(boxes, scores, labels, iou_thresh=0.5): 49 | if len(boxes) == 0: 50 | return np.empty([0], dtype=np.int) 51 | max_coordinate = boxes[:, 0:2].max() + boxes[:, 2:4].max() 52 | labels = labels.to(boxes) 53 | offsets = labels * (max_coordinate + 1) 54 | boxes = boxes.clone() 55 | boxes[:, :2] += offsets[:, None] 56 | return rbbox_nms(boxes, scores, iou_thresh) 57 | -------------------------------------------------------------------------------- /utils/box/rbbox_np.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import numpy as np 4 | 5 | 6 | try: 7 | from .ext.rbbox_overlap_gpu import rbbox_iou as rbbox_iou_gpu 8 | from .ext.rbbox_overlap_gpu import rbbox_nms as rbbox_nms_gpu 9 | 10 | 11 | def rbbox_iou(boxes1, boxes2, device=0): # [x, y, w, h, a] 12 | boxes1 = boxes1.reshape([-1, 5]).astype(np.float32) 13 | boxes2 = boxes2.reshape([-1, 5]).astype(np.float32) 14 | ious = rbbox_iou_gpu(boxes1, boxes2, device) 15 | return ious 16 | 17 | def rbbox_nms(boxes, scores, iou_thresh=0.5, device=0): 18 | boxes = boxes.reshape([-1, 5]).astype(np.float32) 19 | scores = scores.reshape([-1, 1]).astype(np.float32) 20 | boxes = np.c_[boxes, scores] 21 | keeps = rbbox_nms_gpu(boxes, iou_thresh, device) 22 | return keeps 23 | 24 | except ModuleNotFoundError as e: 25 | 26 | from .ext.rbbox_overlap_cpu import rbbox_iou_nxn as rbbox_iou_cpu 27 | from .ext.rbbox_overlap_cpu import rbbox_nms as rbbox_nms_cpu 28 | 29 | 30 | def rbbox_iou(boxes1, boxes2): 31 | boxes1 = boxes1.reshape([-1, 5]).astype(np.float64) 32 | boxes2 = boxes2.reshape([-1, 5]).astype(np.float64) 33 | ious = rbbox_iou_cpu(boxes1, boxes2) 34 | return ious 35 | 36 | 37 | def rbbox_nms(boxes, scores, iou_thresh=0.5): 38 | boxes = boxes.reshape([-1, 5]).astype(np.float64) 39 | scores = scores.reshape([-1]).astype(np.float64) 40 | keeps = rbbox_nms_cpu(boxes, scores, iou_thresh) 41 | return keeps 42 | 43 | 44 | def rbbox_batched_nms(boxes, scores, labels, iou_thresh=0.5): 45 | if len(boxes) == 0: 46 | return np.empty([0], dtype=np.int) 47 | max_coordinate = boxes[:, 0:2].max() + boxes[:, 2:4].max() 48 | offsets = labels * (max_coordinate + 1) 49 | boxes = boxes.copy() 50 | boxes[:, :2] += offsets[:, None] 51 | return rbbox_nms(boxes, scores, iou_thresh) 52 | -------------------------------------------------------------------------------- /utils/crop_image.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : crop_image.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 11:09 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import os 12 | import json 13 | import cv2 as cv 14 | import numpy as np 15 | import multiprocessing 16 | 17 | from copy import deepcopy 18 | 19 | from .box.bbox_np import xy42xywha 20 | from .image import imread, imwrite 21 | 22 | 23 | class Cropper: 24 | def __init__(self, size, overlap): 25 | self.sizes = sorted([size] if isinstance(size, int) else size) 26 | self.overlap = overlap 27 | 28 | @staticmethod 29 | def crop_bbox(objs, roi): 30 | sub_objs = [] 31 | x1, y1, x2, y2 = roi 32 | roi = ((x1 + x2) / 2, (y1 + y2) / 2), (x2 - x1, y2 - y1), 0 33 | for obj in objs: 34 | x, y, w, h, a = xy42xywha(np.array(obj['bbox'], dtype=np.float32)) 35 | inter_points = cv.rotatedRectangleIntersection(roi, ((x, y), (w, h), a))[1] 36 | if inter_points is not None: 37 | order_points = cv.convexHull(inter_points, returnPoints=True) 38 | inter_area = cv.contourArea(order_points) 39 | iou = inter_area / (w * h) 40 | if iou > 0.5: 41 | sub_bbox = cv.boxPoints(cv.minAreaRect(order_points)) - [x1, y1] 42 | obj = deepcopy(obj) 43 | obj['bbox'] = sub_bbox.tolist() 44 | sub_objs.append(obj) 45 | return sub_objs 46 | 47 | def crop_with_anno(self, path_img, path_anno, out_dir_images, out_dir_annos, save_empty=False): 48 | print('crop:', path_img, path_anno) 49 | img = imread(path_img) 50 | ih, iw = img.shape[:2] 51 | name = os.path.splitext(os.path.basename(path_img))[0] 52 | anno = [] if path_anno is None else json.load(open(path_anno)) 53 | for i, size in enumerate(self.sizes): 54 | if i > 0 and (max if save_empty else min)(iw, ih) < self.sizes[i - 1]: 55 | break 56 | stride = int(size * (1 - self.overlap)) 57 | for x in range(0, iw, stride): 58 | for y in range(0, ih, stride): 59 | w, h = size, size 60 | if x + size > iw: 61 | x = max(0, iw - size) 62 | w = iw - x 63 | if y + size > ih: 64 | y = max(0, ih - size) 65 | h = ih - y 66 | save_name = '%s-%d-%d-%d-%d' % (name, x, y, w, h) 67 | sub_anno = self.crop_bbox(anno, (x, y, x + w, y + h)) 68 | if sub_anno: 69 | json.dump(sub_anno, open(os.path.join(out_dir_annos, save_name + '.json'), 'wt'), indent=2) 70 | if sub_anno or save_empty: 71 | save_path = os.path.join(out_dir_images, save_name + '.jpg') 72 | sub_img = img[y: y + h, x: x + w] 73 | imwrite(sub_img, save_path) 74 | 75 | def crop_batch(self, pairs, out_dir_images, out_dir_annos, save_empty=False): 76 | os.makedirs(out_dir_images, exist_ok=True) 77 | os.makedirs(out_dir_annos, exist_ok=True) 78 | pool = multiprocessing.Pool(processes=min(multiprocessing.cpu_count(), 8)) 79 | for image, anno in pairs: 80 | pool.apply_async(self.crop_with_anno, (image, anno, out_dir_images, out_dir_annos, save_empty)) 81 | pool.close() 82 | pool.join() 83 | -------------------------------------------------------------------------------- /utils/image.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import numpy as np 4 | 5 | from PIL import Image 6 | from PIL import ImageFile 7 | 8 | 9 | ImageFile.LOAD_TRUNCATED_IMAGES = True 10 | 11 | 12 | def imread(path, mode='RGB'): 13 | img = Image.open(path) 14 | if mode is not None: 15 | img = img.convert(mode) 16 | return np.array(img) 17 | 18 | 19 | def imwrite(image, path): 20 | Image.fromarray(image).save(path) 21 | -------------------------------------------------------------------------------- /utils/init.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from torch import nn 4 | 5 | 6 | def weight_init_normal(m): 7 | if isinstance(m, (nn.Linear, nn.Conv2d, nn.ConvTranspose2d)): 8 | nn.init.normal_(m.weight, 0, 0.02) 9 | elif isinstance(m, nn.BatchNorm2d): 10 | nn.init.normal_( m.weight, 1, 0.02) 11 | nn.init.constant_( m.bias, 0) 12 | 13 | 14 | def weight_init_uniform(m): 15 | if isinstance(m, (nn.Linear, nn.Conv2d, nn.ConvTranspose2d)): 16 | nn.init.uniform_(m.weight, 0, 0.02) 17 | elif isinstance(m, nn.BatchNorm2d): 18 | nn.init.constant_( m.weight, 1) 19 | nn.init.constant_( m.bias, 0) 20 | 21 | 22 | def weight_init_kaiming_normal(m): 23 | if isinstance(m, (nn.Linear, nn.Conv2d, nn.ConvTranspose2d)): 24 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 25 | elif isinstance(m, nn.BatchNorm2d): 26 | nn.init.constant_(m.weight, 1) 27 | nn.init.constant_(m.bias, 0) 28 | 29 | 30 | def weight_init_kaiming_uniform(m): 31 | if isinstance(m, (nn.Linear, nn.Conv2d, nn.ConvTranspose2d)): 32 | nn.init.kaiming_uniform_(m.weight, mode='fan_out', nonlinearity='relu') 33 | elif isinstance(m, nn.BatchNorm2d): 34 | nn.init.constant_(m.weight, 1) 35 | nn.init.constant_(m.bias, 0) 36 | 37 | 38 | def weight_init_xavier_normal(m): 39 | if isinstance(m, (nn.Linear, nn.Conv2d, nn.ConvTranspose2d)): 40 | nn.init.xavier_normal_(m.weight) 41 | elif isinstance(m, nn.BatchNorm2d): 42 | nn.init.constant_(m.weight, 1) 43 | nn.init.constant_(m.bias, 0) 44 | 45 | 46 | def weight_init_xavier_uniform(m): 47 | if isinstance(m, (nn.Linear, nn.Conv2d, nn.ConvTranspose2d)): 48 | nn.init.xavier_uniform_(m.weight) 49 | elif isinstance(m, nn.BatchNorm2d): 50 | nn.init.constant_(m.weight, 1) 51 | nn.init.constant_(m.bias, 0) 52 | 53 | 54 | weight_init = { 55 | 'normal': weight_init_normal, 56 | 'uniform': weight_init_uniform, 57 | 'kaiming_normal': weight_init_kaiming_normal, 58 | 'kaiming_uniform': weight_init_kaiming_uniform, 59 | 'xavier_normal': weight_init_xavier_normal, 60 | 'xavier_uniform': weight_init_xavier_uniform, 61 | } 62 | -------------------------------------------------------------------------------- /utils/misc.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import os 4 | 5 | from collections import defaultdict, OrderedDict 6 | 7 | 8 | def containerize(x, n=1): 9 | return x if isinstance(x, (list, tuple)) else [x] * n 10 | 11 | 12 | def convert_path(path): 13 | return path.replace(r'\/'.replace(os.sep, ''), os.sep) 14 | 15 | 16 | class Node: 17 | __slots__ = 'key', 'val', 'cnt' 18 | 19 | def __init__(self, key, val, cnt=0): 20 | self.key, self.val, self.cnt = key, val, cnt 21 | 22 | 23 | class LFUCache: 24 | def __init__(self, capacity): 25 | self.capacity = capacity 26 | self.cache = {} # type {key: node} 27 | self.cnt2node = defaultdict(OrderedDict) 28 | self.mincnt = 0 29 | 30 | def get(self, key, default=None): 31 | if key not in self.cache: 32 | return default 33 | 34 | node = self.cache[key] 35 | del self.cnt2node[node.cnt][key] 36 | 37 | if not self.cnt2node[node.cnt]: 38 | del self.cnt2node[node.cnt] 39 | 40 | node.cnt += 1 41 | self.cnt2node[node.cnt][key] = node 42 | 43 | if not self.cnt2node[self.mincnt]: 44 | self.mincnt += 1 45 | return node.val 46 | 47 | def put(self, key, value): 48 | if key in self.cache: 49 | self.cache[key].val = value 50 | self.get(key) 51 | return 52 | if len(self.cache) >= self.capacity: 53 | pop_key, _pop_node = self.cnt2node[self.mincnt].popitem(last=False) 54 | del self.cache[pop_key] 55 | 56 | self.cache[key] = self.cnt2node[1][key] = Node(key, value, 1) 57 | self.mincnt = 1 58 | -------------------------------------------------------------------------------- /utils/parallel/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from .sync_batchnorm import convert_model 4 | from .data_parallel import CustomDetDataParallel 5 | -------------------------------------------------------------------------------- /utils/parallel/data_parallel.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import torch 4 | 5 | from torch import nn 6 | 7 | 8 | class CustomDetDataParallel(nn.DataParallel): 9 | """ 10 | force splitting data to all gpus instead of sending all data to cuda:0 and then moving around. 11 | """ 12 | 13 | def __init__(self, module, device_ids): 14 | super().__init__(module, device_ids) 15 | 16 | def scatter(self, inputs, kwargs, device_ids): 17 | # More like scatter and data prep at the same time. The point is we prep the data in such a way 18 | # that no scatter is necessary, and there's no need to shuffle stuff around different GPUs. 19 | data_splits = [] 20 | for i, device in enumerate(device_ids): 21 | data_split = [] 22 | for data in inputs: 23 | data = data[i:: len(device_ids)] 24 | if isinstance(data, torch.Tensor): 25 | data = data.to(f'cuda:{device}', non_blocking=True) 26 | data_split.append(data) 27 | data_splits.append(data_split) 28 | return data_splits, [kwargs] * len(device_ids) 29 | 30 | def gather(self, outputs, output_device): 31 | if self.training: 32 | # ( 33 | # {}, {}, ... 34 | # ) 35 | outputs = super().gather(outputs, output_device) 36 | for key, val in list(outputs.items()): 37 | outputs[key] = val.mean() 38 | else: 39 | # ( 40 | # [[], [], ...], [[], [], ...] 41 | # ) 42 | outputs = sum(map(list, zip(*outputs)), []) 43 | return outputs 44 | -------------------------------------------------------------------------------- /utils/parallel/sync_batchnorm/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : __init__.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 27/01/2018 6 | # 7 | # This file is part of Synchronized-BatchNorm-PyTorch. 8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 9 | # Distributed under MIT License. 10 | 11 | from .batchnorm import SynchronizedBatchNorm1d, SynchronizedBatchNorm2d, SynchronizedBatchNorm3d 12 | from .batchnorm import patch_sync_batchnorm, convert_model 13 | from .replicate import DataParallelWithCallback, patch_replication_callback 14 | -------------------------------------------------------------------------------- /utils/parallel/sync_batchnorm/batchnorm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : batchnorm.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 27/01/2018 6 | # 7 | # This file is part of Synchronized-BatchNorm-PyTorch. 8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 9 | # Distributed under MIT License. 10 | 11 | import collections 12 | import contextlib 13 | 14 | import torch 15 | import torch.nn.functional as F 16 | 17 | from torch.nn.modules.batchnorm import _BatchNorm 18 | 19 | try: 20 | from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast 21 | except ImportError: 22 | ReduceAddCoalesced = Broadcast = None 23 | 24 | try: 25 | from jactorch.parallel.comm import SyncMaster 26 | from jactorch.parallel.data_parallel import JacDataParallel as DataParallelWithCallback 27 | except ImportError: 28 | from .comm import SyncMaster 29 | from .replicate import DataParallelWithCallback 30 | 31 | __all__ = [ 32 | 'SynchronizedBatchNorm1d', 'SynchronizedBatchNorm2d', 'SynchronizedBatchNorm3d', 33 | 'patch_sync_batchnorm', 'convert_model' 34 | ] 35 | 36 | 37 | def _sum_ft(tensor): 38 | """sum over the first and last dimention""" 39 | return tensor.sum(dim=0).sum(dim=-1) 40 | 41 | 42 | def _unsqueeze_ft(tensor): 43 | """add new dimensions at the front and the tail""" 44 | return tensor.unsqueeze(0).unsqueeze(-1) 45 | 46 | 47 | _ChildMessage = collections.namedtuple('_ChildMessage', ['sum', 'ssum', 'sum_size']) 48 | _MasterMessage = collections.namedtuple('_MasterMessage', ['sum', 'inv_std']) 49 | 50 | 51 | class _SynchronizedBatchNorm(_BatchNorm): 52 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True): 53 | assert ReduceAddCoalesced is not None, 'Can not use Synchronized Batch Normalization without CUDA support.' 54 | 55 | super(_SynchronizedBatchNorm, self).__init__(num_features, eps=eps, momentum=momentum, affine=affine) 56 | 57 | self._sync_master = SyncMaster(self._data_parallel_master) 58 | 59 | self._is_parallel = False 60 | self._parallel_id = None 61 | self._slave_pipe = None 62 | 63 | def forward(self, input): 64 | # If it is not parallel computation or is in evaluation mode, use PyTorch's implementation. 65 | if not (self._is_parallel and self.training): 66 | return F.batch_norm( 67 | input, self.running_mean, self.running_var, self.weight, self.bias, 68 | self.training, self.momentum, self.eps) 69 | 70 | # Resize the input to (B, C, -1). 71 | input_shape = input.size() 72 | input = input.view(input.size(0), self.num_features, -1) 73 | 74 | # Compute the sum and square-sum. 75 | sum_size = input.size(0) * input.size(2) 76 | input_sum = _sum_ft(input) 77 | input_ssum = _sum_ft(input ** 2) 78 | 79 | # Reduce-and-broadcast the statistics. 80 | if self._parallel_id == 0: 81 | mean, inv_std = self._sync_master.run_master(_ChildMessage(input_sum, input_ssum, sum_size)) 82 | else: 83 | mean, inv_std = self._slave_pipe.run_slave(_ChildMessage(input_sum, input_ssum, sum_size)) 84 | 85 | # Compute the output. 86 | if self.affine: 87 | # MJY:: Fuse the multiplication for speed. 88 | output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std * self.weight) + _unsqueeze_ft(self.bias) 89 | else: 90 | output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std) 91 | 92 | # Reshape it. 93 | return output.view(input_shape) 94 | 95 | def __data_parallel_replicate__(self, ctx, copy_id): 96 | self._is_parallel = True 97 | self._parallel_id = copy_id 98 | 99 | # parallel_id == 0 means master device. 100 | if self._parallel_id == 0: 101 | ctx.sync_master = self._sync_master 102 | else: 103 | self._slave_pipe = ctx.sync_master.register_slave(copy_id) 104 | 105 | def _data_parallel_master(self, intermediates): 106 | """Reduce the sum and square-sum, compute the statistics, and broadcast it.""" 107 | 108 | # Always using same "device order" makes the ReduceAdd operation faster. 109 | # Thanks to:: Tete Xiao (http://tetexiao.com/) 110 | intermediates = sorted(intermediates, key=lambda i: i[1].sum.get_device()) 111 | 112 | to_reduce = [i[1][:2] for i in intermediates] 113 | to_reduce = [j for i in to_reduce for j in i] # flatten 114 | target_gpus = [i[1].sum.get_device() for i in intermediates] 115 | 116 | sum_size = sum([i[1].sum_size for i in intermediates]) 117 | sum_, ssum = ReduceAddCoalesced.apply(target_gpus[0], 2, *to_reduce) 118 | mean, inv_std = self._compute_mean_std(sum_, ssum, sum_size) 119 | 120 | broadcasted = Broadcast.apply(target_gpus, mean, inv_std) 121 | 122 | outputs = [] 123 | for i, rec in enumerate(intermediates): 124 | outputs.append((rec[0], _MasterMessage(*broadcasted[i*2:i*2+2]))) 125 | 126 | return outputs 127 | 128 | def _compute_mean_std(self, sum_, ssum, size): 129 | """Compute the mean and standard-deviation with sum and square-sum. This method 130 | also maintains the moving average on the master device.""" 131 | assert size > 1, 'BatchNorm computes unbiased standard-deviation, which requires size > 1.' 132 | mean = sum_ / size 133 | sumvar = ssum - sum_ * mean 134 | unbias_var = sumvar / (size - 1) 135 | bias_var = sumvar / size 136 | 137 | if hasattr(torch, 'no_grad'): 138 | with torch.no_grad(): 139 | self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean.data 140 | self.running_var = (1 - self.momentum) * self.running_var + self.momentum * unbias_var.data 141 | else: 142 | self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean.data 143 | self.running_var = (1 - self.momentum) * self.running_var + self.momentum * unbias_var.data 144 | 145 | return mean, bias_var.clamp(self.eps) ** -0.5 146 | 147 | 148 | class SynchronizedBatchNorm1d(_SynchronizedBatchNorm): 149 | r"""Applies Synchronized Batch Normalization over a 2d or 3d input that is seen as a 150 | mini-batch. 151 | 152 | .. math:: 153 | 154 | y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta 155 | 156 | This module differs from the built-in PyTorch BatchNorm1d as the mean and 157 | standard-deviation are reduced across all devices during training. 158 | 159 | For example, when one uses `nn.DataParallel` to wrap the network during 160 | training, PyTorch's implementation normalize the tensor on each device using 161 | the statistics only on that device, which accelerated the computation and 162 | is also easy to implement, but the statistics might be inaccurate. 163 | Instead, in this synchronized version, the statistics will be computed 164 | over all training samples distributed on multiple devices. 165 | 166 | Note that, for one-GPU or CPU-only case, this module behaves exactly same 167 | as the built-in PyTorch implementation. 168 | 169 | The mean and standard-deviation are calculated per-dimension over 170 | the mini-batches and gamma and beta are learnable parameter vectors 171 | of size C (where C is the input size). 172 | 173 | During training, this layer keeps a running estimate of its computed mean 174 | and variance. The running sum is kept with a default momentum of 0.1. 175 | 176 | During evaluation, this running mean/variance is used for normalization. 177 | 178 | Because the BatchNorm is done over the `C` dimension, computing statistics 179 | on `(N, L)` slices, it's common terminology to call this Temporal BatchNorm 180 | 181 | Args: 182 | num_features: num_features from an expected input of size 183 | `batch_size x num_features [x width]` 184 | eps: a value added to the denominator for numerical stability. 185 | Default: 1e-5 186 | momentum: the value used for the running_mean and running_var 187 | computation. Default: 0.1 188 | affine: a boolean value that when set to ``True``, gives the layer learnable 189 | affine parameters. Default: ``True`` 190 | 191 | Shape:: 192 | - Input: :math:`(N, C)` or :math:`(N, C, L)` 193 | - Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input) 194 | 195 | Examples: 196 | >>> # With Learnable Parameters 197 | >>> m = SynchronizedBatchNorm1d(100) 198 | >>> # Without Learnable Parameters 199 | >>> m = SynchronizedBatchNorm1d(100, affine=False) 200 | >>> input = torch.autograd.Variable(torch.randn(20, 100)) 201 | >>> output = m(input) 202 | """ 203 | 204 | def _check_input_dim(self, input): 205 | if input.dim() != 2 and input.dim() != 3: 206 | raise ValueError('expected 2D or 3D input (got {}D input)' 207 | .format(input.dim())) 208 | super(SynchronizedBatchNorm1d, self)._check_input_dim(input) 209 | 210 | 211 | class SynchronizedBatchNorm2d(_SynchronizedBatchNorm): 212 | r"""Applies Batch Normalization over a 4d input that is seen as a mini-batch 213 | of 3d inputs 214 | 215 | .. math:: 216 | 217 | y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta 218 | 219 | This module differs from the built-in PyTorch BatchNorm2d as the mean and 220 | standard-deviation are reduced across all devices during training. 221 | 222 | For example, when one uses `nn.DataParallel` to wrap the network during 223 | training, PyTorch's implementation normalize the tensor on each device using 224 | the statistics only on that device, which accelerated the computation and 225 | is also easy to implement, but the statistics might be inaccurate. 226 | Instead, in this synchronized version, the statistics will be computed 227 | over all training samples distributed on multiple devices. 228 | 229 | Note that, for one-GPU or CPU-only case, this module behaves exactly same 230 | as the built-in PyTorch implementation. 231 | 232 | The mean and standard-deviation are calculated per-dimension over 233 | the mini-batches and gamma and beta are learnable parameter vectors 234 | of size C (where C is the input size). 235 | 236 | During training, this layer keeps a running estimate of its computed mean 237 | and variance. The running sum is kept with a default momentum of 0.1. 238 | 239 | During evaluation, this running mean/variance is used for normalization. 240 | 241 | Because the BatchNorm is done over the `C` dimension, computing statistics 242 | on `(N, H, W)` slices, it's common terminology to call this Spatial BatchNorm 243 | 244 | Args: 245 | num_features: num_features from an expected input of 246 | size batch_size x num_features x height x width 247 | eps: a value added to the denominator for numerical stability. 248 | Default: 1e-5 249 | momentum: the value used for the running_mean and running_var 250 | computation. Default: 0.1 251 | affine: a boolean value that when set to ``True``, gives the layer learnable 252 | affine parameters. Default: ``True`` 253 | 254 | Shape:: 255 | - Input: :math:`(N, C, H, W)` 256 | - Output: :math:`(N, C, H, W)` (same shape as input) 257 | 258 | Examples: 259 | >>> # With Learnable Parameters 260 | >>> m = SynchronizedBatchNorm2d(100) 261 | >>> # Without Learnable Parameters 262 | >>> m = SynchronizedBatchNorm2d(100, affine=False) 263 | >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45)) 264 | >>> output = m(input) 265 | """ 266 | 267 | def _check_input_dim(self, input): 268 | if input.dim() != 4: 269 | raise ValueError('expected 4D input (got {}D input)' 270 | .format(input.dim())) 271 | super(SynchronizedBatchNorm2d, self)._check_input_dim(input) 272 | 273 | 274 | class SynchronizedBatchNorm3d(_SynchronizedBatchNorm): 275 | r"""Applies Batch Normalization over a 5d input that is seen as a mini-batch 276 | of 4d inputs 277 | 278 | .. math:: 279 | 280 | y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta 281 | 282 | This module differs from the built-in PyTorch BatchNorm3d as the mean and 283 | standard-deviation are reduced across all devices during training. 284 | 285 | For example, when one uses `nn.DataParallel` to wrap the network during 286 | training, PyTorch's implementation normalize the tensor on each device using 287 | the statistics only on that device, which accelerated the computation and 288 | is also easy to implement, but the statistics might be inaccurate. 289 | Instead, in this synchronized version, the statistics will be computed 290 | over all training samples distributed on multiple devices. 291 | 292 | Note that, for one-GPU or CPU-only case, this module behaves exactly same 293 | as the built-in PyTorch implementation. 294 | 295 | The mean and standard-deviation are calculated per-dimension over 296 | the mini-batches and gamma and beta are learnable parameter vectors 297 | of size C (where C is the input size). 298 | 299 | During training, this layer keeps a running estimate of its computed mean 300 | and variance. The running sum is kept with a default momentum of 0.1. 301 | 302 | During evaluation, this running mean/variance is used for normalization. 303 | 304 | Because the BatchNorm is done over the `C` dimension, computing statistics 305 | on `(N, D, H, W)` slices, it's common terminology to call this Volumetric BatchNorm 306 | or Spatio-temporal BatchNorm 307 | 308 | Args: 309 | num_features: num_features from an expected input of 310 | size batch_size x num_features x depth x height x width 311 | eps: a value added to the denominator for numerical stability. 312 | Default: 1e-5 313 | momentum: the value used for the running_mean and running_var 314 | computation. Default: 0.1 315 | affine: a boolean value that when set to ``True``, gives the layer learnable 316 | affine parameters. Default: ``True`` 317 | 318 | Shape:: 319 | - Input: :math:`(N, C, D, H, W)` 320 | - Output: :math:`(N, C, D, H, W)` (same shape as input) 321 | 322 | Examples: 323 | >>> # With Learnable Parameters 324 | >>> m = SynchronizedBatchNorm3d(100) 325 | >>> # Without Learnable Parameters 326 | >>> m = SynchronizedBatchNorm3d(100, affine=False) 327 | >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45, 10)) 328 | >>> output = m(input) 329 | """ 330 | 331 | def _check_input_dim(self, input): 332 | if input.dim() != 5: 333 | raise ValueError('expected 5D input (got {}D input)' 334 | .format(input.dim())) 335 | super(SynchronizedBatchNorm3d, self)._check_input_dim(input) 336 | 337 | 338 | @contextlib.contextmanager 339 | def patch_sync_batchnorm(): 340 | import torch.nn as nn 341 | 342 | backup = nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d 343 | 344 | nn.BatchNorm1d = SynchronizedBatchNorm1d 345 | nn.BatchNorm2d = SynchronizedBatchNorm2d 346 | nn.BatchNorm3d = SynchronizedBatchNorm3d 347 | 348 | yield 349 | 350 | nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d = backup 351 | 352 | 353 | def convert_model(module): 354 | """Traverse the input module and its child recursively 355 | and replace all instance of torch.nn.modules.batchnorm.BatchNorm*N*d 356 | to SynchronizedBatchNorm*N*d 357 | 358 | Args: 359 | module: the input module needs to be convert to SyncBN model 360 | 361 | Examples: 362 | >>> import torch.nn as nn 363 | >>> import torchvision 364 | >>> # m is a standard pytorch model 365 | >>> m = torchvision.models.resnet18(True) 366 | >>> m = nn.DataParallel(m) 367 | >>> # after convert, m is using SyncBN 368 | >>> m = convert_model(m) 369 | """ 370 | if isinstance(module, torch.nn.DataParallel): 371 | mod = module.module 372 | mod = convert_model(mod) 373 | mod = DataParallelWithCallback(mod, device_ids=module.device_ids) 374 | return mod 375 | 376 | mod = module 377 | for pth_module, sync_module in zip([torch.nn.modules.batchnorm.BatchNorm1d, 378 | torch.nn.modules.batchnorm.BatchNorm2d, 379 | torch.nn.modules.batchnorm.BatchNorm3d], 380 | [SynchronizedBatchNorm1d, 381 | SynchronizedBatchNorm2d, 382 | SynchronizedBatchNorm3d]): 383 | if isinstance(module, pth_module): 384 | mod = sync_module(module.num_features, module.eps, module.momentum, module.affine) 385 | mod.running_mean = module.running_mean 386 | mod.running_var = module.running_var 387 | if module.affine: 388 | mod.weight.data = module.weight.data.clone().detach() 389 | mod.bias.data = module.bias.data.clone().detach() 390 | 391 | for name, child in module.named_children(): 392 | mod.add_module(name, convert_model(child)) 393 | 394 | return mod 395 | -------------------------------------------------------------------------------- /utils/parallel/sync_batchnorm/comm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : comm.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 27/01/2018 6 | # 7 | # This file is part of Synchronized-BatchNorm-PyTorch. 8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 9 | # Distributed under MIT License. 10 | 11 | import queue 12 | import collections 13 | import threading 14 | 15 | __all__ = ['FutureResult', 'SlavePipe', 'SyncMaster'] 16 | 17 | 18 | class FutureResult(object): 19 | """A thread-safe future implementation. Used only as one-to-one pipe.""" 20 | 21 | def __init__(self): 22 | self._result = None 23 | self._lock = threading.Lock() 24 | self._cond = threading.Condition(self._lock) 25 | 26 | def put(self, result): 27 | with self._lock: 28 | assert self._result is None, 'Previous result has\'t been fetched.' 29 | self._result = result 30 | self._cond.notify() 31 | 32 | def get(self): 33 | with self._lock: 34 | if self._result is None: 35 | self._cond.wait() 36 | 37 | res = self._result 38 | self._result = None 39 | return res 40 | 41 | 42 | _MasterRegistry = collections.namedtuple('MasterRegistry', ['result']) 43 | _SlavePipeBase = collections.namedtuple('_SlavePipeBase', ['identifier', 'queue', 'result']) 44 | 45 | 46 | class SlavePipe(_SlavePipeBase): 47 | """Pipe for master-slave communication.""" 48 | 49 | def run_slave(self, msg): 50 | self.queue.put((self.identifier, msg)) 51 | ret = self.result.get() 52 | self.queue.put(True) 53 | return ret 54 | 55 | 56 | class SyncMaster(object): 57 | """An abstract `SyncMaster` object. 58 | 59 | - During the replication, as the data parallel will trigger an callback of each module, all slave devices should 60 | call `register(id)` and obtain an `SlavePipe` to communicate with the master. 61 | - During the forward pass, master device invokes `run_master`, all messages from slave devices will be collected, 62 | and passed to a registered callback. 63 | - After receiving the messages, the master device should gather the information and determine to message passed 64 | back to each slave devices. 65 | """ 66 | 67 | def __init__(self, master_callback): 68 | """ 69 | 70 | Args: 71 | master_callback: a callback to be invoked after having collected messages from slave devices. 72 | """ 73 | self._master_callback = master_callback 74 | self._queue = queue.Queue() 75 | self._registry = collections.OrderedDict() 76 | self._activated = False 77 | 78 | def __getstate__(self): 79 | return {'master_callback': self._master_callback} 80 | 81 | def __setstate__(self, state): 82 | self.__init__(state['master_callback']) 83 | 84 | def register_slave(self, identifier): 85 | """ 86 | Register an slave device. 87 | 88 | Args: 89 | identifier: an identifier, usually is the device id. 90 | 91 | Returns: a `SlavePipe` object which can be used to communicate with the master device. 92 | 93 | """ 94 | if self._activated: 95 | assert self._queue.empty(), 'Queue is not clean before next initialization.' 96 | self._activated = False 97 | self._registry.clear() 98 | future = FutureResult() 99 | self._registry[identifier] = _MasterRegistry(future) 100 | return SlavePipe(identifier, self._queue, future) 101 | 102 | def run_master(self, master_msg): 103 | """ 104 | Main entry for the master device in each forward pass. 105 | The messages were first collected from each devices (including the master device), and then 106 | an callback will be invoked to compute the message to be sent back to each devices 107 | (including the master device). 108 | 109 | Args: 110 | master_msg: the message that the master want to send to itself. This will be placed as the first 111 | message when calling `master_callback`. For detailed usage, see `_SynchronizedBatchNorm` for an example. 112 | 113 | Returns: the message to be sent back to the master device. 114 | 115 | """ 116 | self._activated = True 117 | 118 | intermediates = [(0, master_msg)] 119 | for i in range(self.nr_slaves): 120 | intermediates.append(self._queue.get()) 121 | 122 | results = self._master_callback(intermediates) 123 | assert results[0][0] == 0, 'The first result should belongs to the master.' 124 | 125 | for i, res in results: 126 | if i == 0: 127 | continue 128 | self._registry[i].result.put(res) 129 | 130 | for i in range(self.nr_slaves): 131 | assert self._queue.get() is True 132 | 133 | return results[0][1] 134 | 135 | @property 136 | def nr_slaves(self): 137 | return len(self._registry) 138 | -------------------------------------------------------------------------------- /utils/parallel/sync_batchnorm/replicate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : replicate.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 27/01/2018 6 | # 7 | # This file is part of Synchronized-BatchNorm-PyTorch. 8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 9 | # Distributed under MIT License. 10 | 11 | import functools 12 | 13 | from torch.nn.parallel.data_parallel import DataParallel 14 | 15 | __all__ = [ 16 | 'CallbackContext', 17 | 'execute_replication_callbacks', 18 | 'DataParallelWithCallback', 19 | 'patch_replication_callback' 20 | ] 21 | 22 | 23 | class CallbackContext(object): 24 | pass 25 | 26 | 27 | def execute_replication_callbacks(modules): 28 | """ 29 | Execute an replication callback `__data_parallel_replicate__` on each module created by original replication. 30 | 31 | The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)` 32 | 33 | Note that, as all modules are isomorphism, we assign each sub-module with a context 34 | (shared among multiple copies of this module on different devices). 35 | Through this context, different copies can share some information. 36 | 37 | We guarantee that the callback on the master copy (the first copy) will be called ahead of calling the callback 38 | of any slave copies. 39 | """ 40 | master_copy = modules[0] 41 | nr_modules = len(list(master_copy.modules())) 42 | ctxs = [CallbackContext() for _ in range(nr_modules)] 43 | 44 | for i, module in enumerate(modules): 45 | for j, m in enumerate(module.modules()): 46 | if hasattr(m, '__data_parallel_replicate__'): 47 | m.__data_parallel_replicate__(ctxs[j], i) 48 | 49 | 50 | class DataParallelWithCallback(DataParallel): 51 | """ 52 | Data Parallel with a replication callback. 53 | 54 | An replication callback `__data_parallel_replicate__` of each module will be invoked after being created by 55 | original `replicate` function. 56 | The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)` 57 | 58 | Examples: 59 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) 60 | > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) 61 | # sync_bn.__data_parallel_replicate__ will be invoked. 62 | """ 63 | 64 | def replicate(self, module, device_ids): 65 | modules = super(DataParallelWithCallback, self).replicate(module, device_ids) 66 | execute_replication_callbacks(modules) 67 | return modules 68 | 69 | 70 | def patch_replication_callback(data_parallel): 71 | """ 72 | Monkey-patch an existing `DataParallel` object. Add the replication callback. 73 | Useful when you have customized `DataParallel` implementation. 74 | 75 | Examples: 76 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) 77 | > sync_bn = DataParallel(sync_bn, device_ids=[0, 1]) 78 | > patch_replication_callback(sync_bn) 79 | # this is equivalent to 80 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) 81 | > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) 82 | """ 83 | 84 | assert isinstance(data_parallel, DataParallel) 85 | 86 | old_replicate = data_parallel.replicate 87 | 88 | @functools.wraps(old_replicate) 89 | def new_replicate(module, device_ids): 90 | modules = old_replicate(module, device_ids) 91 | execute_replication_callbacks(modules) 92 | return modules 93 | 94 | data_parallel.replicate = new_replicate 95 | -------------------------------------------------------------------------------- /xtorch/README.md: -------------------------------------------------------------------------------- 1 | # xtorch 2 | 3 | This is a simple encapsulation of pytorch, so that in_featuers/in_channels can be implicitly determined when the model is defined, rather than explicitly specified. 4 | 5 | A simple example is as follows: 6 | 7 | ```python 8 | import torch 9 | from torch import nn 10 | from xtorch import xnn 11 | 12 | model = xnn.Sequential(xnn.Linear(16), nn.ReLU(), xnn.Linear(2)) 13 | # <===> nn.Sequential(nn.Linear(8, 16), nn.ReLU(), xnn.Linear(16, 2)) 14 | 15 | model.build_pipe(shape=[2, 8]) 16 | # alternative 17 | # model.build(torch.randn(2, 8)) 18 | 19 | x = torch.randn(32, 8) 20 | y = model(x) 21 | ``` 22 | -------------------------------------------------------------------------------- /xtorch/xnn/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from .containers import * 4 | from .layers import * 5 | -------------------------------------------------------------------------------- /xtorch/xnn/containers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : containers.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 12:07 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | import torch 12 | 13 | from torch import nn 14 | 15 | 16 | __all__ = ['Module', 'ModuleAtom', 'ModulePipe', 'Sequential'] 17 | 18 | 19 | class Module(nn.Module): 20 | def __init__(self): 21 | super(Module, self).__init__() 22 | 23 | def forward(self, *args, **kwargs): 24 | raise NotImplementedError 25 | 26 | def __call__(self, *args, **kwargs): 27 | return self.forward(*args, **kwargs) 28 | 29 | def build_pipe(self, shape): 30 | return self(torch.randn(shape)) 31 | 32 | build = __call__ 33 | 34 | 35 | class ModuleAtom(Module): 36 | def __init__(self, *args, **kwargs): 37 | super(ModuleAtom, self).__init__() 38 | self.args = args 39 | self.kwargs = kwargs 40 | self.module = None 41 | 42 | def _init_module(self, *args, **kwargs): 43 | raise NotImplementedError 44 | 45 | def forward(self, *args, **kwargs): 46 | if self.module is None: 47 | self._init_module(*args, **kwargs) 48 | return self.module(*args, **kwargs) 49 | 50 | 51 | class ModulePipe(Module): 52 | def __init__(self): 53 | super(ModulePipe, self).__init__() 54 | 55 | def forward(self, x): 56 | for module in self._modules.values(): 57 | x = module(x) 58 | return x 59 | 60 | 61 | class Sequential(nn.Sequential, Module): 62 | def __init__(self, *args): 63 | super(Sequential, self).__init__(*args) 64 | -------------------------------------------------------------------------------- /xtorch/xnn/layers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : layers.py 3 | # Author : Kai Ao 4 | # Email : capino627@163.com 5 | # Date : 2020/12/12 12:07 6 | # 7 | # This file is part of Rotation-Decoupled Detector. 8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector 9 | # Distributed under MIT License. 10 | 11 | from torch import nn 12 | 13 | from .containers import ModuleAtom 14 | 15 | 16 | __all__ = ['Linear', 'Conv1d', 'Conv2d', 'Conv3d', 'ConvTranspose1d', 'ConvTranspose2d', 'ConvTranspose3d', 17 | 'BatchNorm1d', 'BatchNorm2d', 'BatchNorm3d', 'GroupNorm', 'InstanceNorm1d', 'InstanceNorm2d', 18 | 'InstanceNorm3d', 'LayerNorm'] 19 | 20 | 21 | class Linear(ModuleAtom): 22 | def __init__(self, out_features, bias=True): 23 | super(Linear, self).__init__(out_features, bias=bias) 24 | 25 | def _init_module(self, x): 26 | if self.args[0] is None: 27 | self.args = (x.shape[1], *self.args[1:]) 28 | self.args = (x.shape[1], *self.args) 29 | self.module = nn.Linear(*self.args, **self.kwargs) 30 | 31 | 32 | class ConvNd(ModuleAtom): 33 | def __init__(self, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, 34 | padding_mode='zeros'): 35 | super(ConvNd, self).__init__(out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, 36 | groups=groups, bias=bias, padding_mode=padding_mode) 37 | 38 | def _init_params(self, x): 39 | if self.kwargs['groups'] < 0: 40 | assert x.shape[1] % self.kwargs['groups'] == 0 41 | self.kwargs['groups'] = x.shape[1] // -self.kwargs['groups'] 42 | if self.args[0] is None: 43 | self.args = (x.shape[1], *self.args[1:]) 44 | self.args = (x.shape[1], *self.args) 45 | 46 | 47 | class Conv1d(ConvNd): 48 | def _init_module(self, x): 49 | self._init_params(x) 50 | self.module = nn.Conv1d(*self.args, **self.kwargs) 51 | 52 | 53 | class Conv2d(ConvNd): 54 | def _init_module(self, x): 55 | self._init_params(x) 56 | self.module = nn.Conv2d(*self.args, **self.kwargs) 57 | 58 | 59 | class Conv3d(ConvNd): 60 | def _init_module(self, x): 61 | self._init_params(x) 62 | self.module = nn.Conv3d(*self.args, **self.kwargs) 63 | 64 | 65 | class ConvTransposeNd(ModuleAtom): 66 | def __init__(self, out_channels, kernel_size, stride=1, padding=0, output_padding=0, dilation=1, groups=1, 67 | bias=True, padding_mode='zeros'): 68 | super(ConvTransposeNd, self).__init__(out_channels, kernel_size, stride=stride, padding=padding, 69 | output_padding=output_padding, dilation=dilation, groups=groups, 70 | bias=bias, padding_mode=padding_mode) 71 | 72 | def _init_params(self, x): 73 | if self.kwargs['groups'] < 0: 74 | assert x.shape[1] % self.kwargs['groups'] == 0 75 | self.kwargs['groups'] = x.shape[1] // -self.kwargs['groups'] 76 | if self.args[0] is None: 77 | self.args = (x.shape[1], *self.args[1:]) 78 | self.args = (x.shape[1], *self.args) 79 | 80 | 81 | class ConvTranspose1d(ConvTransposeNd): 82 | def _init_module(self, x): 83 | self._init_params(x) 84 | self.module = nn.ConvTranspose1d(*self.args, **self.kwargs) 85 | 86 | 87 | class ConvTranspose2d(ConvTransposeNd): 88 | def _init_module(self, x): 89 | self._init_params(x) 90 | self.module = nn.ConvTranspose2d(*self.args, **self.kwargs) 91 | 92 | 93 | class ConvTranspose3d(ConvTransposeNd): 94 | def _init_module(self, x): 95 | self._init_params(x) 96 | self.module = nn.ConvTranspose3d(*self.args, **self.kwargs) 97 | 98 | 99 | class BatchNormNd(ModuleAtom): 100 | def __init__(self, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True): 101 | super(BatchNormNd, self).__init__(eps=eps, momentum=momentum, affine=affine, 102 | track_running_stats=track_running_stats) 103 | 104 | 105 | class BatchNorm1d(BatchNormNd): 106 | def _init_module(self, x): 107 | self.args = (x.shape[1], *self.args) 108 | self.module = nn.BatchNorm1d(*self.args, **self.kwargs) 109 | 110 | 111 | class BatchNorm2d(BatchNormNd): 112 | def _init_module(self, x): 113 | self.args = (x.shape[1], *self.args) 114 | self.module = nn.BatchNorm2d(*self.args, **self.kwargs) 115 | 116 | 117 | class BatchNorm3d(BatchNormNd): 118 | def _init_module(self, x): 119 | self.args = (x.shape[1], *self.args) 120 | self.module = nn.BatchNorm3d(*self.args, **self.kwargs) 121 | 122 | 123 | class GroupNorm(ModuleAtom): 124 | def __init__(self, num_groups, eps=1e-5, affine=True): 125 | super(GroupNorm, self).__init__(num_groups, eps=eps, affine=affine) 126 | 127 | def _init_module(self, x): 128 | num_groups = self.args[0] 129 | if num_groups < 0: 130 | assert x.shape[1] % num_groups == 0 131 | num_groups = x.shape[1] // -num_groups 132 | self.args = (num_groups, x.shape[1]) 133 | self.module = nn.GroupNorm(*self.args, **self.kwargs) 134 | 135 | 136 | class InstanceNormNd(ModuleAtom): 137 | def __init__(self, eps=1e-5, momentum=0.1, affine=False, track_running_stats=False): 138 | super(InstanceNormNd, self).__init__(eps=eps, momentum=momentum, affine=affine, 139 | track_running_stats=track_running_stats) 140 | 141 | 142 | class InstanceNorm1d(InstanceNormNd): 143 | def _init_module(self, x): 144 | self.args = (x.shape[1], *self.args) 145 | self.module = nn.InstanceNorm1d(*self.args, **self.kwargs) 146 | 147 | 148 | class InstanceNorm2d(InstanceNormNd): 149 | def _init_module(self, x): 150 | self.args = (x.shape[1], *self.args) 151 | self.module = nn.InstanceNorm2d(*self.args, **self.kwargs) 152 | 153 | 154 | class InstanceNorm3d(InstanceNormNd): 155 | def _init_module(self, x): 156 | self.args = (x.shape[1], *self.args) 157 | self.module = nn.InstanceNorm3d(*self.args, **self.kwargs) 158 | 159 | 160 | class LayerNorm(ModuleAtom): 161 | def __init__(self, num_last_dimensions, *args, **kwargs): 162 | super(LayerNorm, self).__init__(num_last_dimensions, *args, **kwargs) 163 | 164 | def _init_module(self, x): 165 | self.args = (x.shape[-self.args[0]:],) 166 | self.module = nn.LayerNorm(*self.args, **self.kwargs) 167 | --------------------------------------------------------------------------------