├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── config
└── __init__.py
├── data
├── aug
│ ├── compose.py
│ ├── func.py
│ └── ops
│ │ ├── __init__.py
│ │ ├── ops_det.py
│ │ └── ops_img.py
└── dataset
│ ├── __init__.py
│ ├── dataset.py
│ ├── dota.py
│ ├── hrsc2016.py
│ └── usca_aod.py
├── demo
├── graphical-abstract.png
└── result.png
├── model
├── backbone
│ ├── __init__.py
│ ├── darknet.py
│ └── resnet
│ │ ├── __init__.py
│ │ ├── resnet.py
│ │ └── splat.py
└── rdd
│ ├── __init__.py
│ ├── rdd.py
│ └── utils
│ ├── detect.py
│ ├── loss.py
│ ├── modules.py
│ └── priorbox.py
├── requirement.txt
├── run
├── dota
│ ├── evaluate.py
│ ├── prepare.py
│ ├── train-dist.py
│ └── train.py
├── hrsc2016
│ ├── evaluate.py
│ ├── prepare.py
│ ├── train-dist.py
│ └── train.py
└── ucas-aod
│ ├── evaluate.py
│ ├── prepare.py
│ └── train.py
├── utils
├── adjust_lr.py
├── box
│ ├── bbox.py
│ ├── bbox_np.py
│ ├── ext
│ │ ├── rbbox_overlap_cpu
│ │ │ ├── __init__.py
│ │ │ ├── rbbox_overlap.h
│ │ │ ├── rbbox_overlap.pyx
│ │ │ └── setup.py
│ │ └── rbbox_overlap_gpu
│ │ │ ├── __init__.py
│ │ │ ├── rbbox_overlap.cpp
│ │ │ ├── rbbox_overlap.hpp
│ │ │ ├── rbbox_overlap.pyx
│ │ │ ├── rbbox_overlap_kernel.cu
│ │ │ └── setup.py
│ ├── metric.py
│ ├── rbbox.py
│ └── rbbox_np.py
├── crop_image.py
├── image.py
├── init.py
├── misc.py
└── parallel
│ ├── __init__.py
│ ├── data_parallel.py
│ └── sync_batchnorm
│ ├── __init__.py
│ ├── batchnorm.py
│ ├── comm.py
│ └── replicate.py
└── xtorch
├── README.md
└── xnn
├── __init__.py
├── containers.py
└── layers.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # celery beat schedule file
95 | celerybeat-schedule
96 |
97 | # SageMath parsed files
98 | *.sage.py
99 |
100 | # Environments
101 | .env
102 | .venv
103 | env/
104 | venv/
105 | ENV/
106 | env.bak/
107 | venv.bak/
108 |
109 | # Spyder project settings
110 | .spyderproject
111 | .spyproject
112 |
113 | # Rope project settings
114 | .ropeproject
115 |
116 | # mkdocs documentation
117 | /site
118 |
119 | # mypy
120 | .mypy_cache/
121 | .dmypy.json
122 | dmypy.json
123 |
124 | # Pyre type checker
125 | .pyre/
126 |
127 | .idea/
128 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Capino512
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Single-Stage Rotation-Decoupled Detector for Oriented Object
2 |
3 | This is the repository of paper **Single-Stage Rotation-Decoupled Detector for Oriented Object**. [[Paper]](https://www.mdpi.com/2072-4292/12/19/3262/htm) [[PDF]](https://www.mdpi.com/2072-4292/12/19/3262/pdf)
4 |
5 | **Update:** Updated the code for training on the DOTA, HRSC2016 and UCAS-AOD datasets. Uploaded the weights trained on these datasets.
6 |
7 |
8 |
9 |
10 |
11 | ## Introduction
12 |
13 | We optimized the anchor-based oriented object detection method by decoupling the matching of the oriented bounding box and the oriented anchor into the matching of the horizontal bounding box and the horizontal anchor.
14 |
15 | ## Performance
16 |
17 | ### DOTA1.0 (Task1)
18 |
19 | Reported in our paper:
20 |
21 | | backbone | MS | mAP | PL | BD | BR | GTF | SV | LV | SH | TC | BC | ST | SBF | RA | HA | SP | HC |
22 | | --------- | ---- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- |
23 | | ResNet101 | × | 75.52 | 89.7 | 84.33 | 46.35 | 68.62 | 73.89 | 73.19 | 86.92 | 90.41 | 86.46 | 84.3 | 64.22 | 64.95 | 73.55 | 72.59 | 73.31 |
24 | | ResNet101 | √ | 77.75 | 89.15 | 83.92 | 52.51 | 73.06 | 77.81 | 79 | 87.08 | 90.62 | 86.72 | 87.15 | 63.96 | 70.29 | 76.98 | 75.79 | 72.15 |
25 |
26 | Retested with the **original** weights and the **newly** released code:
27 |
28 | | backbone | MS | mAP | PL | BD | BR | GTF | SV | LV | SH | TC | BC | ST | SBF | RA | HA | SP | HC |
29 | | --------- | ---- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- |
30 | | ResNet101 | × | 75.02 | 89.61 | 82.01 | 43.35 | 64.79 | 74.10 | 77.54 | 87.11 | 90.84 | 87.15 | 84.80 | 61.52 | 62.22 | 74.49 | 72.57 | 73.13 |
31 | | ResNet101 | √ | 77.87 | 89.21 | 84.80 | 53.40 | 73.17 | 78.11 | 79.44 | 87.28 | 90.78 | 86.46 | 87.43 | 63.46 | 69.91 | 77.52 | 76.00 | 71.06 |
32 |
33 | Checkpoint:
34 |
35 | - [Baidu Drive (l07k)](https://pan.baidu.com/s/1KppCwFpGT-x_BakpIw4KfA) (resnet101, original)
36 | - [Baidu Drive (v9lc)](https://pan.baidu.com/s/1YrsWWbmk9M6fokDWbfSy3A) (resnet101, newly)
37 |
38 | - [Google Drive](https://drive.google.com/file/d/1_QfLJMusMIsdDUpQzqwUCUyBT6N15HvJ/view?usp=sharing) (resnet101, original)
39 | - [Google Drive](https://drive.google.com/drive/folders/1aJ0lfPo-Zqh4g5kf8sXTNIzTLyjVsdLO?usp=sharing) (resnet101, newly)
40 |
41 | ### HRSC2016
42 |
43 | Reported in our paper:
44 |
45 | | backbone | AP(12) |
46 | | --------- | ------ |
47 | | ResNet101 | 94.29 |
48 | | ResNet152 | 94.61 |
49 |
50 | *****Updated the test results obtained using the VOC 07 11 point method. Retested with the original weights and the newly released code:
51 |
52 | | backbone | AP(12) | AP(07) |
53 | | --------- | ------ | ------ |
54 | | ResNet101 | 94.26 | 88.19 |
55 | | ResNet152 | 94.71 | 89.00 |
56 |
57 | 07 or 12 means use the VOC 07 or VOC 12 evaluation metric.
58 |
59 | Checkpoint:
60 |
61 | - [Baidu Drive (ka71)](https://pan.baidu.com/s/1TE98ntQFivqLpSi7PlofHw) (resnet101, original)
62 | - [Baidu Drive (kin2)](https://pan.baidu.com/s/1Sp5PQoHLUT0jD5yUMDo73g) (resnet152, original)
63 | - [Baidu Drive (9vjf)](https://pan.baidu.com/s/11gv3KZKMB4ZBkOaSygD3GA) (resnet101, newly)
64 |
65 | - [Google Drive](https://drive.google.com/file/d/1EqH8Ipb9Haa8Rcn0dlZCP3klKaWG990y/view?usp=sharing) (resnet101, original)
66 | - [Google Drive](https://drive.google.com/file/d/1it-7n7oGZeFHuUa8GZqbljLxLqUvgaSb/view?usp=sharing) (resnet152, original)
67 | - [Google Drive](https://drive.google.com/drive/folders/1i2rHVYQswf4c-iUiIjfj3WKUcDhJS7qS?usp=sharing) (resnet101, newly)
68 |
69 | ### UCAS-AOD
70 |
71 | Reported in our paper:
72 |
73 | | backbone | plane | car | mAP |
74 | | --------- | ----- | ----- | ----- |
75 | | ResNet101 | 98.86 | 94.96 | 96.86 |
76 | | ResNet152 | 98.85 | 95.18 | 97.01 |
77 |
78 | Retested with the original weights and the newly released code:
79 |
80 | | backbone | plane | car | mAP |
81 | | --------- | ----- | ----- | ----- |
82 | | ResNet101 | 98.86 | 94.96 | 96.91 |
83 | | ResNet152 | 98.93 | 95.14 | 97.03 |
84 |
85 | Checkpoint:
86 |
87 | - [Baidu Drive (2adc)](https://pan.baidu.com/s/1-w2QRXa_hhHPBkUSELbxPg) (resnet101, original)
88 | - [Baidu Drive (oxbo)](https://pan.baidu.com/s/1cmkV40p0POBwsr0f1HZEBw) (resnet152, original)
89 | - [Baidu Drive (1l2q)](https://pan.baidu.com/s/1sS5lc65F99lz7SmPMAw1uw) (resnet101, newly)
90 |
91 | - [Google Drive](https://drive.google.com/file/d/1HjdK5tXhRQ-wAkBmdJ5KaJRhoqmCELoB/view?usp=sharing) (resnet101, original)
92 | - [Google Drive](https://drive.google.com/file/d/1zr5tFDHxHIIU7vlsCvBgVmV8IkH591O5/view?usp=sharing) (resnet152, original)
93 | - [Google Drive](https://drive.google.com/drive/folders/1OBNjd-1I9AO0alKtYnoR_4rlDse3lVj4?usp=sharing) (resnet101, newly)
94 |
95 | ## Visualization
96 |
97 | 
98 |
99 | ## Run
100 |
101 | ### Requirements
102 |
103 | ```
104 | tqdm
105 | numpy
106 | pillow
107 | cython
108 | beautifulsoup4
109 | opnecv-python
110 | pytorch>=1.2
111 | torchvision>=0.4
112 | tensorboard>=2.2
113 | ```
114 |
115 | ### Compile
116 |
117 | ```
118 | # 'rbbox_batched_nms' will be used as post-processing in the interface stage
119 | # use gpu, for Linux only
120 | cd $PATH_ROOT/utils/box/ext/rbbox_overlap_gpu
121 | python setup.py build_ext --inplace
122 |
123 | # alternative, use cpu, for Windows and Linux
124 | cd $PATH_ROOT/utils/box/ext/rbbox_overlap_cpu
125 | python setup.py build_ext --inplace
126 | ```
127 |
128 | ### Pre-training Weight
129 |
130 | Download pretrained weight files.
131 |
132 | - [Baidu Drive (4m2c)](https://pan.baidu.com/s/1u9i3giU5Q-7XAF_rkyL8Bw)
133 |
134 | - [Google Drive](https://drive.google.com/drive/folders/1ll4DxqkIa77tBq3H59cdv_ziAfYO8ek7?usp=sharing)
135 |
136 | Modify the `DIR_WEIGHT` defined in `config/__init__.py` to be the directory where the weight files are placed.
137 |
138 | ```
139 | DIR_WEIGHT = /.../pre-training-weights
140 | ```
141 |
142 | ### Train on DOTA
143 |
144 | #### Data Preprocessing
145 |
146 | Download the [DOTA](https://captain-whu.github.io/DOTA/index.html) dataset, and move files like:
147 |
148 | ```
149 | $PATH_ROOT/images
150 | ----------/labelTxt-v1.0-obb
151 | $PATH_ROOT/images/train/P0000.png
152 | -----------------/train/...
153 | -----------------/val/...
154 | -----------------/test/...
155 |
156 | $PATH_ROOT/labelTxt/train/P0000.txt
157 | -------------------/train/...
158 | -------------------/val/...
159 | ```
160 |
161 | Modify `dir_dataset` and `dir_dataset` defined in `run/dota/prepare.py`, `run/dota/train.py`, `run/dota/evaluate.py` to the local path.
162 |
163 | ```
164 | dir_dataset = '/.../PATH_ROOT' # The directory where the dataset is located
165 | dir_save = '...' # Output directory
166 | ```
167 |
168 | Then run the provided code:
169 |
170 | ```
171 | REPO_ROOT$ python run/dota/prepare.py
172 | ```
173 |
174 | #### Start Training
175 |
176 | ```
177 | REPO_ROOT$ python run/dota/train.py
178 | ```
179 |
180 | #### Evaluate
181 |
182 | ```
183 | REPO_ROOT$ python run/dota/evaluate.py
184 | ```
185 |
186 | ### Train on HRSC2016
187 |
188 | Similar to the steps on the DOTA dataset, the code is provided in `run/hrsc2016`.
189 |
190 | ### Train on UCAS-AOD
191 |
192 | Similar to the steps on the DOTA dataset, the code is provided in `run/ucas-aod`.
193 |
194 | ## To Do
195 |
196 | Update the code used for detection.
197 |
198 | ## Citation
199 |
200 | ```
201 | @article{rdd,
202 | title={Single-Stage Rotation-Decoupled Detector for Oriented Object},
203 | author={Zhong, Bo and Ao, Kai},
204 | journal={Remote Sensing},
205 | year={2020}
206 | }
207 | ```
--------------------------------------------------------------------------------
/config/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | DIR_WEIGHT = ''
4 |
5 | CATEGORY_DOTA_V10 = ['baseball-diamond', 'basketball-court', 'bridge', 'ground-track-field', 'harbor', 'helicopter',
6 | 'large-vehicle', 'plane', 'roundabout', 'ship', 'small-vehicle', 'soccer-ball-field',
7 | 'storage-tank', 'swimming-pool', 'tennis-court']
8 |
9 | CATEGORY_UCAS_AOD = ['car', 'plane']
10 |
11 | CATEGORY_HRSC2016 = ['ship']
12 |
--------------------------------------------------------------------------------
/data/aug/compose.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import numpy as np
4 |
5 |
6 | class Compose:
7 | def __init__(self, ops):
8 | self.ops = ops
9 |
10 | def __call__(self, *args):
11 | for op in self.ops:
12 | args = op(*args)
13 | return args
14 |
15 |
16 | class RandomSelect:
17 | def __init__(self, ops):
18 | self.ops = ops
19 |
20 | def __call__(self, *args):
21 | op = np.random.choice(self.ops)
22 | return op(*args)
23 |
--------------------------------------------------------------------------------
/data/aug/func.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import cv2 as cv
4 | import numpy as np
5 |
6 | from utils.misc import containerize
7 |
8 |
9 | __all__ = ['hflip', 'vflip', 'rgb2gray', 'resize', 'rotate90', 'pad']
10 |
11 |
12 | INTER_MODE = {'NEAREST': cv.INTER_NEAREST, 'BILINEAR': cv.INTER_LINEAR, 'BICUBIC': cv.INTER_CUBIC}
13 |
14 |
15 | def hflip(img):
16 | return np.ascontiguousarray(np.fliplr(img))
17 |
18 |
19 | def vflip(img):
20 | return np.ascontiguousarray(np.flipud(img))
21 |
22 |
23 | def rgb2gray(img):
24 | return cv.cvtColor(cv.cvtColor(img, cv.COLOR_RGB2GRAY), cv.COLOR_GRAY2RGB)
25 |
26 |
27 | def resize(img, size, interpolate='BILINEAR'):
28 | w, h = containerize(size, 2)
29 | ih, iw = img.shape[:2]
30 | if ih != h or iw != w:
31 | img = cv.resize(img, (w, h), interpolation=INTER_MODE[interpolate])
32 | return img
33 |
34 |
35 | def rotate90(img, k): # CLOCKWISE k=0, 1, 2, 3
36 | if k % 4 != 0:
37 | img = np.ascontiguousarray(np.rot90(img, -k))
38 | return img
39 |
40 |
41 | def pad(img, padding, mode='constant', **kwargs):
42 | if isinstance(padding, int):
43 | padding = [[padding, padding], [padding, padding]]
44 | else:
45 | padding = [containerize(p, 2) for p in padding]
46 | if img.ndim == 3 and len(padding) == 2:
47 | padding.append([0, 0])
48 | return np.pad(img, padding, mode, **kwargs)
49 |
--------------------------------------------------------------------------------
/data/aug/ops/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from .ops_img import *
4 | from .ops_det import *
5 |
--------------------------------------------------------------------------------
/data/aug/ops/ops_det.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : ops_det.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 10:44
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import random
12 | import cv2 as cv
13 | import numpy as np
14 |
15 | from utils.misc import containerize
16 | from utils.box.bbox_np import xy42xywha, xywha2xy4
17 |
18 | from ..func import *
19 |
20 |
21 | __all__ = ['RandomHFlip', 'RandomVFlip', 'Resize', 'ResizeJitter', 'ResizeLong', 'ResizeBase', 'Pad', 'RandomPad',
22 | 'PadSize', 'PadSquare', 'PadBase', 'Rotate', 'RandomRotate', 'RandomRotate90', 'RandomCrop', 'BBoxFilter']
23 |
24 |
25 | class RandomHFlip:
26 | def __call__(self, img, anno=None):
27 | if np.random.randint(2):
28 | if anno:
29 | ih, iw = img.shape[:2]
30 | anno['bboxes'][:, :, 0] = iw - 1 - anno['bboxes'][:, :, 0]
31 | img = hflip(img)
32 | return img, anno
33 |
34 |
35 | class RandomVFlip:
36 | def __call__(self, img, anno=None):
37 | if np.random.randint(2):
38 | if anno:
39 | ih, iw = img.shape[:2]
40 | anno['bboxes'][:, :, 1] = ih - 1 - anno['bboxes'][:, :, 1]
41 | img = vflip(img)
42 | return img, anno
43 |
44 |
45 | class Resize:
46 | def __init__(self, size, interpolate='BILINEAR'):
47 | self.size = containerize(size, 2)
48 | self.interpolate = interpolate
49 |
50 | def __call__(self, img, anno=None):
51 | if anno:
52 | ih, iw = img.shape[:2]
53 | rw, rh = self.size
54 | bboxes = anno['bboxes'] * [rw / iw, rh / ih]
55 | # Convert to rectangle, if not it should not affect much
56 | anno['bboxes'] = np.array([cv.boxPoints(cv.minAreaRect(bbox)) for bbox in bboxes.astype(np.float32)])
57 | img = resize(img, self.size, self.interpolate)
58 | return img, anno
59 |
60 |
61 | class ResizeJitter:
62 | def __init__(self, scale=(0.8, 1.2)):
63 | self.scale = scale
64 |
65 | def __call__(self, img, anno=None):
66 | ih, iw = img.shape[:2]
67 | rh, rw = [ih, iw] * np.random.uniform(*self.scale, 2)
68 | img, anno = Resize((int(rw), int(rw)))(img, anno)
69 | return img, anno
70 |
71 |
72 | class ResizeLong:
73 | def __init__(self, length, interpolate='BILINEAR'):
74 | self.length = length
75 | self.interpolate = interpolate
76 |
77 | def __call__(self, img, anno=None):
78 | ih, iw = img.shape[:2]
79 | if ih > iw:
80 | size = (int(iw / ih * self.length), self.length)
81 | else:
82 | size = (self.length, int(ih / iw * self.length))
83 | return Resize(size, self.interpolate)(img, anno)
84 |
85 |
86 | class ResizeBase:
87 | def __init__(self, base, scale=1., interpolate='BILINEAR'):
88 | self.base = base
89 | self.scale = scale
90 | self.interpolate = interpolate
91 |
92 | def __call__(self, img, anno=None):
93 | ih, iw = img.shape[:2]
94 | rh, rw = int(ih * self.scale), int(iw * self.scale)
95 | rh = (rh - rh % self.base + self.base) if rh % self.base else rh
96 | rw = (rw - rw % self.base + self.base) if rw % self.base else rw
97 | return Resize((rw, rh), self.interpolate)(img, anno)
98 |
99 |
100 | class _Pad:
101 | def get_padding(self, img):
102 | raise NotImplementedError
103 |
104 | def __call__(self, img, anno=None):
105 | padding = self.get_padding(img)
106 | if anno:
107 | anno['bboxes'] += [padding[1][0], padding[0][0]]
108 | img = pad(img, padding)
109 | return img, anno
110 |
111 |
112 | class Pad(_Pad):
113 | def __init__(self, padding):
114 | if isinstance(padding, (int, float)):
115 | padding = [[padding, padding], [padding, padding]]
116 | else:
117 | padding = [containerize(p, 2) for p in padding]
118 | self.padding = padding
119 |
120 | def get_padding(self, img):
121 | (ph1, ph2), (pw1, pw2) = self.padding
122 | ih, iw = img.shape[:2]
123 | ph1 = ph1 if isinstance(ph1, int) else int(ph1 * ih)
124 | ph2 = ph2 if isinstance(ph2, int) else int(ph2 * ih)
125 | pw1 = pw1 if isinstance(pw1, int) else int(pw1 * iw)
126 | pw2 = pw2 if isinstance(pw2, int) else int(pw2 * iw)
127 | padding = [[ph1, ph2], [pw1, pw2]]
128 | return padding
129 |
130 |
131 | class RandomPad:
132 | def __init__(self, padding):
133 | self.padding = padding
134 |
135 | def __call__(self, img, anno=None):
136 | ih, iw = img.shape[:2]
137 | if isinstance(self.padding, float):
138 | ph = pw = int(max(ih, iw) * np.random.uniform(0, self.padding))
139 | else:
140 | ph = pw = random.randint(0, self.padding)
141 | ph1 = random.randint(0, ph)
142 | pw1 = random.randint(0, pw)
143 | return Pad([[ph1, ph - ph1], [pw1, pw - pw1]])(img, anno)
144 |
145 |
146 | class PadSize(_Pad):
147 | def __init__(self, size, check_size=False):
148 | self.size = containerize(size, 2)
149 | self.check_size = check_size
150 |
151 | def get_padding(self, img):
152 | pw, ph = self.size
153 | ih, iw = img.shape[:2]
154 | if self.check_size:
155 | assert ih <= ph and iw <= pw
156 | padding = (max(0, ph - ih) // 2), max(0, (pw - iw) // 2)
157 | padding = [[padding[0], max(0, ph - ih - padding[0])], [padding[1], max(0, pw - iw - padding[1])]]
158 | return padding
159 |
160 |
161 | class PadSquare:
162 | def __call__(self, img, anno=None):
163 | ih, iw = img.shape[:2]
164 | pw = ph = max(ih, iw)
165 | return PadSize([pw, ph])(img, anno)
166 |
167 |
168 | class PadBase:
169 | def __init__(self, base):
170 | self.base = base
171 |
172 | def __call__(self, img, anno=None):
173 | ih, iw = img.shape[:2]
174 | ph = (ih - ih % self.base + self.base) if ih % self.base else ih
175 | pw = (iw - iw % self.base + self.base) if iw % self.base else iw
176 | return PadSize((pw, ph))(img, anno)
177 |
178 |
179 | class Rotate:
180 | def __init__(self, angle, scale=1, expand=False, shift=False):
181 | self.angle = angle
182 | self.scale = scale
183 | self.expand = expand
184 | self.shift = shift
185 |
186 | def __call__(self, img, anno=None):
187 | nh, nw = ih, iw = img.shape[:2]
188 | center = ((iw - 1) / 2, (ih - 1) / 2)
189 | m = cv.getRotationMatrix2D(center, angle=-self.angle, scale=self.scale)
190 | if self.expand or self.shift:
191 | corner = np.array([[0, 0, 1], [iw - 1, 0, 1], [iw - 1, ih - 1, 1], [0, ih - 1, 1]], dtype=np.float32)
192 | corner = np.matmul(m, corner.T).T
193 | left, top = np.min(corner, axis=0)
194 | right, bottom = np.max(corner, axis=0)
195 | dx = (right - left - iw) / 2
196 | dy = (bottom - top - ih) / 2
197 | if self.expand:
198 | nw = int(np.ceil(right - left))
199 | nh = int(np.ceil(bottom - top))
200 | shiftX = dx
201 | shiftY = dy
202 | else:
203 | shiftX = np.random.uniform(-dx, dx) if dx > 0 else 0
204 | shiftY = np.random.uniform(-dy, dy) if dy > 0 else 0
205 | m[0, 2] += shiftX
206 | m[1, 2] += shiftY
207 | if anno:
208 | bound = (nw / 2, nh / 2), (nw, nh), 0
209 | bboxes, labels = [], []
210 | for bbox, label in zip(anno['bboxes'], anno['labels']):
211 | corner = np.matmul(m, np.c_[bbox, np.ones((4, 1))].T).T
212 | if not self.expand:
213 | x, y, w, h, a = xy42xywha(corner)
214 | inter_points = cv.rotatedRectangleIntersection(bound, ((x, y), (w, h), a))[1]
215 | if inter_points is not None:
216 | order_points = cv.convexHull(inter_points, returnPoints=True)
217 | inter_area = cv.contourArea(order_points)
218 | iou = inter_area / (w * h)
219 | if iou >= 0.5:
220 | corner = cv.boxPoints(cv.minAreaRect(order_points))
221 | else:
222 | continue
223 | bboxes.append(corner)
224 | labels.append(label)
225 | if bboxes:
226 | anno['bboxes'] = np.stack(bboxes)
227 | anno['labels'] = np.stack(labels)
228 | else:
229 | anno = None
230 | img = cv.warpAffine(img, m, (nw, nh))
231 | return img, anno
232 |
233 |
234 | class RandomRotate:
235 | def __init__(self, angle=180, scale=1, expand=False, shift=False):
236 | self.angle = (-angle, angle) if isinstance(angle, (int, float)) else angle
237 | self.scale = containerize(scale, 2)
238 | self.expand = expand
239 | self.shift = shift
240 |
241 | def __call__(self, img, anno=None):
242 | angle = np.random.uniform(*self.angle)
243 | scale = np.random.uniform(*self.scale)
244 | return Rotate(angle, scale, self.expand, self.shift)(img, anno)
245 |
246 |
247 | class RandomRotate90:
248 | def __init__(self, k=(0, 1, 2, 3)): # CLOCKWISE
249 | self.k = k
250 |
251 | def __call__(self, img, anno=None):
252 | k = np.random.choice(self.k)
253 | ih, iw = img.shape[:2]
254 | if anno:
255 | if k == 1:
256 | anno['bboxes'][:, :, 1] = ih - 1 - anno['bboxes'][:, :, 1]
257 | anno['bboxes'] = anno['bboxes'][:, :, [1, 0]]
258 | if k == 2:
259 | anno['bboxes'] = ([iw - 1, ih - 1] - anno['bboxes'])
260 | if k == 3:
261 | anno['bboxes'][:, :, 0] = iw - 1 - anno['bboxes'][:, :, 0]
262 | anno['bboxes'] = anno['bboxes'][:, :, [1, 0]]
263 | img = rotate90(img, k)
264 | return img, anno
265 |
266 |
267 | class RandomCrop:
268 | def __init__(self, size, max_aspect=1.0, iou_thresh=0.5, max_try=100, nonempty=True):
269 | self.size = size
270 | self.max_aspect = max_aspect
271 | self.iou_thresh = iou_thresh
272 | self.max_try = max_try
273 | self.nonempty = nonempty
274 |
275 | def __call__(self, img, anno=None):
276 | ih, iw = img.shape[:2]
277 | polygons = []
278 | if anno:
279 | for bbox in anno['bboxes']:
280 | x, y, w, h, a = xy42xywha(bbox)
281 | polygons.append(((x, y), (w, h), a))
282 | for count in range(self.max_try):
283 | if isinstance(self.size, int):
284 | nh = nw = min(ih, iw, self.size)
285 | else:
286 | if self.max_aspect == 1:
287 | nh = nw = random.randint(min(ih, iw, self.size[0]), min(ih, iw, self.size[1]))
288 | else:
289 | nh = random.randint(min(ih, self.size[0]), min(ih, self.size[1]))
290 | nw = random.randint(min(iw, self.size[0]), min(iw, self.size[1]))
291 | if max(nh / nw, nw / nh) > self.max_aspect:
292 | continue
293 | oh = random.randint(0, ih - nh)
294 | ow = random.randint(0, iw - nw)
295 | a = np.random.uniform(0, 360)
296 | src = xywha2xy4([ow + nw / 2, oh + nh / 2, nw, nh, a])
297 | dst = np.array([[0, 0], [nw, 0], [nw, nh]], dtype=np.float32)
298 | m = cv.getAffineTransform(src.astype(np.float32)[:3], dst)
299 | if anno:
300 | bound = (ow + nw / 2, oh + nh / 2), (nw, nh), a
301 | iou, intersections = [], []
302 | for polygon in polygons:
303 | inter_points = cv.rotatedRectangleIntersection(bound, polygon)[1]
304 | if inter_points is None:
305 | iou.append(0)
306 | intersections.append(None)
307 | else:
308 | order_points = cv.convexHull(inter_points, returnPoints=True)
309 | inter_area = cv.contourArea(order_points)
310 | iou.append(inter_area / (polygon[1][0] * polygon[1][1]))
311 | intersections.append(cv.boxPoints(cv.minAreaRect(order_points)))
312 | iou = np.array(iou)
313 | if isinstance(self.iou_thresh, float):
314 | mask = iou >= self.iou_thresh
315 | else:
316 | mask = (iou > self.iou_thresh[0]) & (iou < self.iou_thresh[1])
317 | if np.any(mask):
318 | continue
319 | mask = iou >= self.iou_thresh[1]
320 | if np.any(mask):
321 | bboxes = np.array([inter for inter, m in zip(intersections, mask) if m])
322 | bboxes = np.concatenate([bboxes, np.ones_like(bboxes[:, :, [0]])], axis=-1)
323 | bboxes = np.matmul(m, bboxes.transpose([0, 2, 1])).transpose([0, 2, 1])
324 | anno['bboxes'] = bboxes
325 | anno['labels'] = anno['labels'][mask]
326 | else:
327 | if self.nonempty:
328 | continue
329 | else:
330 | anno = None
331 | img = cv.warpAffine(img, m, (nw, nh))
332 | break
333 | return img, anno
334 |
335 |
336 | class BBoxFilter:
337 | def __init__(self, min_area):
338 | self.min_area = min_area
339 |
340 | def __call__(self, img, anno=None):
341 | if anno:
342 | wh = np.stack([xy42xywha(bbox)[2:4] for bbox in anno['bboxes']])
343 | area = wh[:, 0] * wh[:, 1]
344 | mask = area >= self.min_area
345 | if np.any(mask):
346 | anno['bboxes'] = anno['bboxes'][mask]
347 | anno['labels'] = anno['labels'][mask]
348 | else:
349 | anno.clear()
350 | return img, anno
351 |
--------------------------------------------------------------------------------
/data/aug/ops/ops_img.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import cv2 as cv
4 | import numpy as np
5 |
6 | from ..func import *
7 | from ..compose import Compose
8 |
9 |
10 | __all__ = ['ToFloat', 'Normalize', 'ConvertColor', 'RandomGray', 'RandomBrightness', 'RandomContrast',
11 | 'RandomLightingNoise', 'RandomHue', 'RandomSaturation', 'PhotometricDistort']
12 |
13 |
14 | class ToFloat:
15 | def __call__(self, img, anno=None):
16 | img = img.astype(np.float32)
17 | return img, anno
18 |
19 |
20 | class Normalize:
21 | def __init__(self, mean, std):
22 | self.mean = mean
23 | self.std = std
24 |
25 | def __call__(self, img, anno=None):
26 | img = (img - self.mean) / self.std
27 | return img, anno
28 |
29 |
30 | class ConvertColor:
31 | def __init__(self, current='RGB', transform='HSV'):
32 | self.transform = transform
33 | self.current = current
34 |
35 | def __call__(self, img, anno=None):
36 | if self.current == 'RGB' and self.transform == 'HSV':
37 | img = cv.cvtColor(img, cv.COLOR_RGB2HSV)
38 | elif self.current == 'HSV' and self.transform == 'RGB':
39 | img = cv.cvtColor(img, cv.COLOR_HSV2RGB)
40 | else:
41 | raise NotImplementedError
42 | return img, anno
43 |
44 |
45 | class RandomGray: # RGB
46 | def __call__(self, img, anno=None):
47 | if np.random.randint(2):
48 | img = rgb2gray(img)
49 | return img, anno
50 |
51 |
52 | class RandomBrightness: # RGB
53 | def __init__(self, delta=32):
54 | assert 0 <= delta <= 255
55 | self.delta = delta
56 |
57 | def __call__(self, img, anno=None):
58 | if np.random.randint(2):
59 | delta = np.random.uniform(-self.delta, self.delta)
60 | img = np.clip(img + delta, 0, 255)
61 | return img, anno
62 |
63 |
64 | class RandomContrast: # RGB
65 | def __init__(self, lower=0.5, upper=1.5):
66 | assert 0 < lower < upper
67 | self.lower = lower
68 | self.upper = upper
69 |
70 | def __call__(self, img, anno=None):
71 | if np.random.randint(2):
72 | alpha = np.random.uniform(self.lower, self.upper)
73 | img = np.clip(alpha * img, 0, 255)
74 | return img, anno
75 |
76 |
77 | class RandomLightingNoise: # RGB
78 | def __call__(self, img, anno=None):
79 | if np.random.randint(2):
80 | indexes = [0, 1, 2]
81 | np.random.shuffle(indexes)
82 | img = img[..., indexes]
83 | return img, anno
84 |
85 |
86 | class RandomHue: # HSV
87 | def __init__(self, delta=18.0):
88 | assert 0 <= delta <= 360
89 | self.delta = delta
90 |
91 | def __call__(self, img, anno=None):
92 | if np.random.randint(2):
93 | delta = np.random.uniform(-self.delta, self.delta)
94 | img[:, :, 0] = (img[:, :, 0] + delta) % 360
95 | return img, anno
96 |
97 |
98 | class RandomSaturation: # HSV
99 | def __init__(self, lower=0.5, upper=1.5):
100 | assert 0 < lower < upper
101 | self.lower = lower
102 | self.upper = upper
103 |
104 | def __call__(self, img, anno=None):
105 | if np.random.randint(2):
106 | alpha = np.random.uniform(self.lower, self.upper)
107 | img[:, :, 1] = np.clip(alpha * img[:, :, 1], 0, 1)
108 | return img, anno
109 |
110 |
111 | class PhotometricDistort:
112 | def __init__(self, prob_light_noise=0.2, prob_gray=0.2):
113 | self.prob_light_noise = prob_light_noise
114 | self.prob_gray = prob_gray
115 | self.pd = [
116 | RandomContrast(),
117 | ConvertColor(current='RGB', transform='HSV'),
118 | RandomSaturation(),
119 | RandomHue(),
120 | ConvertColor(current='HSV', transform='RGB'),
121 | RandomContrast()
122 | ]
123 | self.rand_brightness = RandomBrightness()
124 | self.rand_light_noise = RandomLightingNoise()
125 | self.rand_gray = RandomGray()
126 |
127 | def __call__(self, img, anno=None):
128 | img, anno = self.rand_brightness(img, anno)
129 | distort = Compose(self.pd[:-1] if np.random.randint(2) else self.pd[1:])
130 | img, anno = distort(img, anno)
131 | if np.random.randint(2):
132 | if np.random.rand() < self.prob_light_noise:
133 | img, anno = self.rand_light_noise(img, anno)
134 | else:
135 | if np.random.rand() < self.prob_gray:
136 | img, anno = self.rand_gray(img, anno)
137 | return img, anno
138 |
--------------------------------------------------------------------------------
/data/dataset/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from .dataset import DetDataset
4 | from .dota import DOTA
5 | from .hrsc2016 import HRSC2016
6 | from .usca_aod import UCAS_AOD
7 |
--------------------------------------------------------------------------------
/data/dataset/dataset.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : dataset.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 10:44
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import os
12 | import json
13 | import torch
14 | import numpy as np
15 |
16 | from copy import deepcopy
17 | from torch.utils.data import Dataset
18 | from utils.misc import convert_path
19 | from utils.image import imread
20 | from utils.box.bbox_np import xy42xywha
21 |
22 |
23 | class DetDataset(Dataset):
24 | def __init__(self, root, image_set, names, aug=None, color_space='RGB'):
25 | self.names = names
26 | self.aug = aug
27 | self.color_space = color_space
28 | self.label2name = dict((label, name) for label, name in enumerate(self.names))
29 | self.name2label = dict((name, label) for label, name in enumerate(self.names))
30 | self.dataset = self.load_dataset(root, image_set)
31 |
32 | @staticmethod
33 | def load_dataset(root, image_set):
34 | image_sets = [image_set] if isinstance(image_set, str) else image_set
35 | dataset = []
36 | for image_set in image_sets:
37 | for img, anno in json.load(open(os.path.join(root, 'image-sets', f'{image_set}.json'))):
38 | img = os.path.join(root, convert_path(img))
39 | anno = (os.path.join(root, convert_path(anno)) if anno else None)
40 | dataset.append([img, anno])
41 | return dataset
42 |
43 | @staticmethod
44 | def load_objs(path, name2label=None):
45 | objs = None
46 | if path:
47 | objs = json.load(open(path))
48 | bboxes = [obj['bbox'] for obj in objs]
49 | labels = [name2label[obj['name']] if name2label else obj['name'] for obj in objs]
50 | objs = {'bboxes': np.array(bboxes, dtype=np.float32), 'labels': np.array(labels)}
51 | return objs
52 |
53 | @staticmethod
54 | def convert_objs(objs):
55 | target = dict()
56 | if objs:
57 | # Limit the angle between -45° and 45° by set flag=2
58 | target['bboxes'] = torch.from_numpy(np.stack([xy42xywha(bbox, flag=2) for bbox in objs['bboxes']])).float()
59 | target['labels'] = torch.from_numpy(objs['labels']).long()
60 | return target
61 |
62 | def __getitem__(self, index):
63 | img_path, anno_path = self.dataset[index]
64 | img = imread(img_path, self.color_space)
65 | objs = self.load_objs(anno_path, self.name2label)
66 | info = {'img_path': img_path, 'anno_path': anno_path, 'shape': img.shape, 'objs': objs}
67 | if self.aug is not None:
68 | img, objs = self.aug(img, deepcopy(objs))
69 | return img, objs, info
70 |
71 | @staticmethod
72 | def collate(batch):
73 | images, targets, infos = [], [], []
74 | # Ensure data balance when parallelizing
75 | batch = sorted(batch, key=lambda x: len(x[1]['labels']) if x[1] else 0)
76 | for i, (img, objs, info) in enumerate(batch):
77 | images.append(torch.from_numpy(img).reshape(*img.shape[:2], -1).float())
78 | targets.append(DetDataset.convert_objs(objs))
79 | infos.append(info)
80 | return torch.stack(images).permute(0, 3, 1, 2), targets, infos
81 |
82 | def __len__(self):
83 | return len(self.dataset)
84 |
--------------------------------------------------------------------------------
/data/dataset/dota.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from config import CATEGORY_DOTA_V10 as NAMES
4 |
5 | from .dataset import DetDataset
6 |
7 |
8 | class DOTA(DetDataset):
9 | def __init__(self, root, image_sets, aug=None):
10 | super(DOTA, self).__init__(root, image_sets, NAMES, aug)
11 |
--------------------------------------------------------------------------------
/data/dataset/hrsc2016.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from config import CATEGORY_HRSC2016 as NAMES
4 |
5 | from .dataset import DetDataset
6 |
7 |
8 | class HRSC2016(DetDataset):
9 | def __init__(self, root, image_sets, aug=None):
10 | super(HRSC2016, self).__init__(root, image_sets, NAMES, aug)
11 |
--------------------------------------------------------------------------------
/data/dataset/usca_aod.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from config import CATEGORY_UCAS_AOD as NAMES
4 |
5 | from .dataset import DetDataset
6 |
7 |
8 | class UCAS_AOD(DetDataset):
9 | def __init__(self, root, image_sets, aug=None):
10 | super(UCAS_AOD, self).__init__(root, image_sets, NAMES, aug)
11 |
--------------------------------------------------------------------------------
/demo/graphical-abstract.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Capino512/pytorch-rotation-decoupled-detector/c00d7ea3e33d10f00dfd3198b33eb3e94de61239/demo/graphical-abstract.png
--------------------------------------------------------------------------------
/demo/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Capino512/pytorch-rotation-decoupled-detector/c00d7ea3e33d10f00dfd3198b33eb3e94de61239/demo/result.png
--------------------------------------------------------------------------------
/model/backbone/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from .resnet import *
4 | from .darknet import darknet21, darknet53
5 |
--------------------------------------------------------------------------------
/model/backbone/darknet.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import os
4 | import torch
5 |
6 | from torch import nn
7 | from xtorch import xnn
8 | from config import DIR_WEIGHT
9 | from utils.init import weight_init
10 |
11 | # all pre-trained on image-net
12 |
13 | weights = {
14 | # from YOLO-v3
15 | 'darknet21': os.path.join(DIR_WEIGHT, 'darknet', 'darknet21.pth'),
16 | 'darknet53': os.path.join(DIR_WEIGHT, 'darknet', 'darknet53.pth'),
17 | }
18 |
19 |
20 | def CBR(plane, kernel_size, stride=1, padding=0):
21 | return nn.Sequential(xnn.Conv2d(plane, kernel_size, stride, padding, bias=False),
22 | xnn.BatchNorm2d(),
23 | nn.ReLU(inplace=True))
24 |
25 |
26 | class BasicBlock(xnn.Module):
27 | def __init__(self, plane):
28 | super(BasicBlock, self).__init__()
29 | self.body = nn.Sequential(CBR(plane // 2, kernel_size=1, stride=1, padding=0),
30 | CBR(plane, kernel_size=3, stride=1, padding=1))
31 |
32 | def forward(self, x):
33 | return x + self.body(x)
34 |
35 |
36 | class Backbone(xnn.Module):
37 | def __init__(self, layers, name=None, fetch_feature=False):
38 | super(Backbone, self).__init__()
39 | self.name = name
40 | self.fetch_feature = fetch_feature
41 | self.head = CBR(32, kernel_size=3, stride=1, padding=1)
42 | self.layers = nn.ModuleList([self._make_layer(64 * 2 ** i, blocks) for i, blocks in enumerate(layers)])
43 |
44 | @staticmethod
45 | def _make_layer(plane, blocks):
46 | layers = [CBR(plane, kernel_size=3, stride=2, padding=1)]
47 | for i in range(0, blocks):
48 | layers.append(BasicBlock(plane))
49 | return nn.Sequential(*layers)
50 |
51 | def init(self):
52 | if self.name in weights:
53 | print('load pre-training weights for', self.name)
54 | weight = torch.load(weights[self.name])
55 | ret = self.load_state_dict(weight, strict=False)
56 | print(ret)
57 | else:
58 | self.apply(weight_init['normal'])
59 |
60 | def forward(self, x):
61 | feature = self.head(x)
62 | features = []
63 | for layer in self.layers:
64 | feature = layer(feature)
65 | if self.fetch_feature:
66 | features.append(feature)
67 | return features if self.fetch_feature else feature
68 |
69 |
70 | def darknet21(fetch_feature=False):
71 | return Backbone([1, 1, 2, 2, 1], 'darknet21', fetch_feature)
72 |
73 |
74 | def darknet53(fetch_feature=False):
75 | return Backbone([1, 2, 8, 8, 4], 'darknet53', fetch_feature)
76 |
--------------------------------------------------------------------------------
/model/backbone/resnet/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from .resnet import resnet18, resnet34, resnet50, resnet101, resnet152
4 | from .resnet import resnest50, resnest101, resnest200, resnest269
5 | from .resnet import resnext50_32x4d, resnext101_32x8d
6 | from .resnet import resnet18_d, resnet34_d, resnet50_d, resnet101_d, resnet152_d
7 |
--------------------------------------------------------------------------------
/model/backbone/resnet/resnet.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import os
4 | import torch
5 |
6 | from torch import nn
7 | from xtorch import xnn
8 | from config import DIR_WEIGHT
9 | from utils.init import weight_init
10 |
11 | from .splat import SplAtConv2d
12 |
13 | # all pre-trained on image-net
14 |
15 | weights = {
16 | # from pytorch
17 | 'resnet18': os.path.join(DIR_WEIGHT, 'resnet', 'resnet18-5c106cde.pth'),
18 | 'resnet34': os.path.join(DIR_WEIGHT, 'resnet', 'resnet34-333f7ec4.pth'),
19 | 'resnet50': os.path.join(DIR_WEIGHT, 'resnet', 'resnet50-19c8e357.pth'),
20 | 'resnet101': os.path.join(DIR_WEIGHT, 'resnet', 'resnet101-5d3b4d8f.pth'),
21 | 'resnet152': os.path.join(DIR_WEIGHT, 'resnet', 'resnet152-b121ed2d.pth'),
22 | 'resnext50': os.path.join(DIR_WEIGHT, 'resnet', 'resnext50_32x4d-7cdf4587.pth'),
23 | 'resnext101': os.path.join(DIR_WEIGHT, 'resnet', 'resnext101_32x8d-8ba56ff5.pth'),
24 |
25 | # from https://github.com/zhanghang1989/ResNeSt
26 | 'resnest50': os.path.join(DIR_WEIGHT, 'resnet', 'resnest50-528c19ca.pth'),
27 | 'resnest101': os.path.join(DIR_WEIGHT, 'resnet', 'resnest101-22405ba7.pth'),
28 | 'resnest200': os.path.join(DIR_WEIGHT, 'resnet', 'resnest200-75117900.pth'),
29 | 'resnest269': os.path.join(DIR_WEIGHT, 'resnet', 'resnest269-0cc87c48.pth'),
30 |
31 | 'resnet50-d': os.path.join(DIR_WEIGHT, 'resnet', 'resnet50_v1d.pth'),
32 | 'resnet101-d': os.path.join(DIR_WEIGHT, 'resnet', 'resnet101_v1d.pth'),
33 | 'resnet152-d': os.path.join(DIR_WEIGHT, 'resnet', 'resnet152_v1d.pth'),
34 | }
35 |
36 |
37 | class BasicBlock(xnn.Module):
38 | expansion = 1
39 |
40 | def __init__(self, planes, stride=1, downsample=None, *args, **kwargs):
41 | super(BasicBlock, self).__init__()
42 | self.body = nn.Sequential(xnn.Conv2d(planes, 3, stride, 1, bias=False),
43 | xnn.BatchNorm2d(),
44 | nn.ReLU(inplace=True),
45 | xnn.Conv2d(planes, 3, 1, 1, bias=False),
46 | xnn.BatchNorm2d())
47 | self.downsample = downsample
48 |
49 | def forward(self, x):
50 | return nn.ReLU(inplace=True)(self.body(x) + (x if self.downsample is None else self.downsample(x)))
51 |
52 |
53 | class Bottleneck(nn.Module):
54 | expansion = 4
55 |
56 | def __init__(self, planes, stride=1, downsample=None, radix=1, cardinality=1, bottleneck_width=64, avd=False,
57 | avd_first=False, dilation=1, is_first=False):
58 | super(Bottleneck, self).__init__()
59 | group_width = int(planes * (bottleneck_width / 64)) * cardinality
60 | avd = avd and (stride > 1 or is_first)
61 |
62 | body = [xnn.Conv2d(group_width, kernel_size=1, bias=False), xnn.BatchNorm2d(), nn.ReLU(inplace=True)]
63 | if avd:
64 | avd_layer = nn.AvgPool2d(3, stride, padding=1)
65 | stride = 1
66 | if avd_first:
67 | body.append(avd_layer)
68 | if radix > 1:
69 | body.append(SplAtConv2d(group_width, 3, stride, dilation, dilation, cardinality, bias=False, radix=radix))
70 | else:
71 | body.append(xnn.Conv2d(group_width, 3, stride, dilation, dilation, cardinality, bias=False))
72 | body.append(xnn.BatchNorm2d())
73 | body.append(nn.ReLU(inplace=True))
74 | if avd and not avd_first:
75 | body.append(avd_layer)
76 | body.append(xnn.Conv2d(planes * self.expansion, 1, bias=False))
77 | body.append(xnn.BatchNorm2d())
78 | self.body = nn.Sequential(*body)
79 | self.downsample = downsample
80 |
81 | def forward(self, x):
82 | return nn.ReLU(inplace=True)(self.body(x) + (x if self.downsample is None else self.downsample(x)))
83 |
84 |
85 | class Backbone(xnn.Module):
86 | def __init__(self, block, layers, name=None, fetch_feature=False, radix=1, groups=1, bottleneck_width=64,
87 | dilated=False, dilation=1, deep_stem=False, stem_width=64, avg_down=False, avd=False, avd_first=False):
88 | self.cardinality = groups
89 | self.bottleneck_width = bottleneck_width
90 | # ResNet-D params
91 | self.inplanes = stem_width * 2 if deep_stem else 64
92 | self.avg_down = avg_down
93 | # ResNeSt params
94 | self.radix = radix
95 | self.avd = avd
96 | self.avd_first = avd_first
97 |
98 | super(Backbone, self).__init__()
99 |
100 | self.name = name
101 | self.fetch_feature = fetch_feature
102 |
103 | if deep_stem:
104 | head = [xnn.Conv2d(stem_width, kernel_size=3, stride=2, padding=1, bias=False),
105 | xnn.BatchNorm2d(),
106 | nn.ReLU(inplace=True),
107 | xnn.Conv2d(stem_width, kernel_size=3, stride=1, padding=1, bias=False),
108 | xnn.BatchNorm2d(),
109 | nn.ReLU(inplace=True),
110 | xnn.Conv2d(stem_width * 2, kernel_size=3, stride=1, padding=1, bias=False)]
111 | else:
112 | head = [xnn.Conv2d(64, kernel_size=7, stride=2, padding=3, bias=False)]
113 | self.head = nn.Sequential(*head, xnn.BatchNorm2d(), nn.ReLU(inplace=True),
114 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
115 | self.layer1 = self._make_layer(block, 64, layers[0], is_first=False)
116 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
117 | if dilated or dilation == 4:
118 | self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2)
119 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4)
120 | elif dilation == 2:
121 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilation=1)
122 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=2)
123 | else:
124 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
125 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
126 |
127 | def init(self):
128 | if self.name in weights:
129 | print('load pre-training weights for', self.name)
130 | weight = torch.load(weights[self.name])
131 | ret = self.load_state_dict(weight, strict=False)
132 | print(ret)
133 | else:
134 | self.apply(weight_init['kaiming_normal'])
135 |
136 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1, is_first=True):
137 | downsample = None
138 | if stride != 1 or self.inplanes != planes * block.expansion:
139 | down_layers = []
140 | if self.avg_down:
141 | if dilation == 1:
142 | down_layers.append(
143 | nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False))
144 | else:
145 | down_layers.append(nn.AvgPool2d(kernel_size=1, stride=1, ceil_mode=True, count_include_pad=False))
146 | down_layers.append(xnn.Conv2d(planes * block.expansion, kernel_size=1, stride=1, bias=False))
147 | else:
148 | down_layers.append(xnn.Conv2d(planes * block.expansion, kernel_size=1, stride=stride, bias=False))
149 | down_layers.append(xnn.BatchNorm2d())
150 | downsample = nn.Sequential(*down_layers)
151 |
152 | layers = []
153 | if dilation == 1 or dilation == 2:
154 | layers.append(
155 | block(planes, stride, downsample, self.radix, self.cardinality, self.bottleneck_width, self.avd,
156 | self.avd_first, 1, is_first))
157 | elif dilation == 4:
158 | layers.append(
159 | block(planes, stride, downsample, self.radix, self.cardinality, self.bottleneck_width, self.avd,
160 | self.avd_first, 2, is_first))
161 | else:
162 | raise RuntimeError("=> unknown dilation size: {}".format(dilation))
163 |
164 | self.inplanes = planes * block.expansion
165 | for i in range(1, blocks):
166 | layers.append(
167 | block(planes, 1, None, self.radix, self.cardinality, self.bottleneck_width, self.avd, self.avd_first,
168 | dilation))
169 |
170 | return nn.Sequential(*layers)
171 |
172 | def forward(self, x):
173 | x = self.head(x)
174 |
175 | x1 = self.layer1(x)
176 | x2 = self.layer2(x1)
177 | x3 = self.layer3(x2)
178 | x4 = self.layer4(x3)
179 |
180 | return [x1, x2, x3, x4] if self.fetch_feature else x4
181 |
182 |
183 | def resnet18(fetch_feature=False):
184 | return Backbone(BasicBlock, (2, 2, 2, 2), 'resnet18', fetch_feature)
185 |
186 |
187 | def resnet34(fetch_feature=False):
188 | return Backbone(BasicBlock, (3, 4, 6, 3), 'resnet34', fetch_feature)
189 |
190 |
191 | def resnet50(fetch_feature=False):
192 | return Backbone(Bottleneck, (3, 4, 6, 3), 'resnet50', fetch_feature)
193 |
194 |
195 | def resnet101(fetch_feature=False):
196 | return Backbone(Bottleneck, (3, 4, 23, 3), 'resnet101', fetch_feature)
197 |
198 |
199 | def resnet152(fetch_feature=False):
200 | return Backbone(Bottleneck, (3, 8, 36, 3), 'resnet152', fetch_feature)
201 |
202 |
203 | def resnest50(fetch_feature=False):
204 | return Backbone(Bottleneck, (3, 4, 6, 3), 'resnest50', fetch_feature, radix=2, deep_stem=True, stem_width=32,
205 | avg_down=True, avd=True, avd_first=False)
206 |
207 |
208 | def resnest101(fetch_feature=False):
209 | return Backbone(Bottleneck, (3, 4, 23, 3), 'resnest101', fetch_feature, radix=2, deep_stem=True, stem_width=64,
210 | avg_down=True, avd=True, avd_first=False)
211 |
212 |
213 | def resnest200(fetch_feature=False):
214 | return Backbone(Bottleneck, (3, 24, 36, 3), 'resnest200', fetch_feature, radix=2, deep_stem=True, stem_width=64,
215 | avg_down=True, avd=True, avd_first=False)
216 |
217 |
218 | def resnest269(fetch_feature=False):
219 | return Backbone(Bottleneck, (3, 30, 48, 8), 'resnest269', fetch_feature, radix=2, deep_stem=True, stem_width=64,
220 | avg_down=True, avd=True, avd_first=False)
221 |
222 |
223 | def resnext50_32x4d(fetch_feature=False):
224 | return Backbone(Bottleneck, (3, 4, 6, 3), 'resnext50-32x4d', fetch_feature, groups=32, bottleneck_width=4)
225 |
226 |
227 | def resnext101_32x8d(fetch_feature=False):
228 | return Backbone(Bottleneck, (3, 4, 23, 3), 'resnext101-32x8d', fetch_feature, groups=32, bottleneck_width=8)
229 |
230 |
231 | def resnet18_d(fetch_feature=False):
232 | return Backbone(BasicBlock, (2, 2, 2, 2), 'resnet18-d', fetch_feature, deep_stem=True, stem_width=32)
233 |
234 |
235 | def resnet34_d(fetch_feature=False):
236 | return Backbone(BasicBlock, (3, 4, 6, 3), 'resnet34-d', fetch_feature, deep_stem=True, stem_width=32)
237 |
238 |
239 | def resnet50_d(fetch_feature=False):
240 | return Backbone(Bottleneck, (3, 4, 6, 3), 'resnet50-d', fetch_feature, deep_stem=True, stem_width=32)
241 |
242 |
243 | def resnet101_d(fetch_feature=False):
244 | return Backbone(Bottleneck, (3, 4, 23, 3), 'resnet101-d', fetch_feature, deep_stem=True, stem_width=32)
245 |
246 |
247 | def resnet152_d(fetch_feature=False):
248 | return Backbone(Bottleneck, (3, 8, 36, 3), 'resnet152-d', fetch_feature, deep_stem=True, stem_width=32)
249 |
--------------------------------------------------------------------------------
/model/backbone/resnet/splat.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import torch
4 |
5 | from torch import nn
6 | from torch.nn import functional as F
7 | from xtorch import xnn
8 |
9 |
10 | class SplAtConv2d(xnn.Module):
11 | def __init__(self, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', radix=2, reduction_factor=4):
12 | super(SplAtConv2d, self).__init__()
13 | inter_channels = max(out_channels * radix // reduction_factor, 32)
14 | self.radix = radix
15 | self.conv = xnn.Conv2d(out_channels * radix, kernel_size, stride, padding, dilation, groups * radix, bias, padding_mode)
16 | self.bn0 = xnn.BatchNorm2d()
17 | self.relu = nn.ReLU(inplace=True)
18 | self.fc1 = xnn.Conv2d(inter_channels, 1, groups=groups)
19 | self.bn1 = xnn.BatchNorm2d()
20 | self.fc2 = xnn.Conv2d(out_channels * radix, 1, groups=groups)
21 | self.rsoftmax = rSoftMax(radix, groups)
22 |
23 | def forward(self, x):
24 | x = self.conv(x)
25 | x = self.bn0(x)
26 | x = self.relu(x)
27 | split = torch.chunk(x, self.radix, 1)
28 | gap = sum(split)
29 | gap = F.adaptive_avg_pool2d(gap, (1, 1))
30 | gap = self.fc1(gap)
31 | gap = self.bn1(gap)
32 | gap = self.relu(gap)
33 | atten = self.fc2(gap)
34 | atten = self.rsoftmax(atten)
35 | atten = torch.chunk(atten, self.radix, 1)
36 | out = sum([att * split for (att, split) in zip(atten, split)])
37 | return out
38 |
39 |
40 | class rSoftMax(xnn.Module):
41 | def __init__(self, radix, cardinality):
42 | super().__init__()
43 | self.radix = radix
44 | self.cardinality = cardinality
45 |
46 | def forward(self, x):
47 | shape = x.shape
48 | if self.radix > 1:
49 | x = x.view(x.size(0), self.cardinality, self.radix, -1).transpose(1, 2)
50 | x = F.softmax(x, dim=1)
51 | x = x.reshape(shape)
52 | else:
53 | x = torch.sigmoid(x)
54 | return x
55 |
--------------------------------------------------------------------------------
/model/rdd/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from .rdd import RDD
4 |
--------------------------------------------------------------------------------
/model/rdd/rdd.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : rdd.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 10:58
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import torch
12 |
13 | from torch import nn
14 | from xtorch import xnn
15 | from utils.init import weight_init
16 |
17 | from .utils.modules import FeaturePyramidNet, DetPredict
18 | from .utils.priorbox import LFUPriorBox
19 | from .utils.loss import calc_loss
20 | from .utils.detect import detect
21 |
22 |
23 | class RDD(xnn.Module):
24 | def __init__(self, backbone, cfg):
25 | super(RDD, self).__init__()
26 |
27 | cfg.setdefault('iou_thresh', [0.4, 0.5])
28 | cfg.setdefault('variance', [0.1, 0.2, 0.1])
29 | cfg.setdefault('balance', 0.5)
30 |
31 | cfg.setdefault('conf_thresh', 0.01)
32 | cfg.setdefault('nms_thresh', 0.5)
33 | cfg.setdefault('top_n', None)
34 |
35 | cfg.setdefault('extra', 0)
36 | cfg.setdefault('fpn_plane', 256)
37 | cfg.setdefault('extra_plane', 512)
38 |
39 | self.backbone = backbone
40 | self.prior_box = LFUPriorBox(cfg['prior_box'])
41 | self.num_levels = self.prior_box.num_levels
42 | self.num_classes = cfg['num_classes']
43 | self.iou_thresh = cfg['iou_thresh']
44 | self.variance = cfg['variance']
45 | self.balance = cfg['balance']
46 |
47 | self.conf_thresh = cfg['conf_thresh']
48 | self.nms_thresh = cfg['nms_thresh']
49 | self.top_n = cfg['top_n']
50 |
51 | self.extra = cfg['extra']
52 | self.fpn_plane = cfg['fpn_plane']
53 | self.extra_plane = cfg['extra_plane']
54 |
55 | self.fpn = FeaturePyramidNet(self.num_levels, self.fpn_plane)
56 | self.predict = DetPredict(self.num_levels, self.fpn_plane, self.prior_box.num_prior_boxes, self.num_classes, 5)
57 |
58 | if self.extra > 0:
59 | self.extra_layers = nn.ModuleList()
60 | for i in range(self.extra):
61 | self.extra_layers.append(nn.Sequential(xnn.Conv2d(self.extra_plane, 3, 2, 1, bias=False),
62 | xnn.BatchNorm2d(),
63 | nn.ReLU(inplace=True)))
64 |
65 | def init(self):
66 | self.apply(weight_init['normal'])
67 | self.backbone.init()
68 |
69 | def restore(self, path):
70 | weight = torch.load(path)
71 | self.load_state_dict(weight, strict=True)
72 |
73 | def forward(self, images, targets=None):
74 | features = list(self.backbone(images))
75 | features = features[-(self.num_levels - self.extra):]
76 | if self.extra > 0:
77 | for layer in self.extra_layers:
78 | features.append(layer(features[-1]))
79 | features = self.fpn(features)
80 |
81 | pred_cls, pred_loc = self.predict(features)
82 | anchors = self.prior_box.get_anchors(images.shape[2:]).to(images)
83 | if self.training:
84 | if targets is not None:
85 | return calc_loss(pred_cls, pred_loc, targets, anchors, self.iou_thresh, self.variance, self.balance)
86 | else:
87 | pred_cls, pred_loc = pred_cls.detach(), pred_loc.detach()
88 | top_n = (images.size(2) // 32) * (images.size(3) // 32) if self.top_n is None else self.top_n
89 | return detect(pred_cls, pred_loc, anchors, self.variance, self.conf_thresh, self.nms_thresh, top_n)
90 |
--------------------------------------------------------------------------------
/model/rdd/utils/detect.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : detect.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 10:58
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import torch
12 |
13 | from collections import Counter
14 | from utils.box.bbox import decode
15 | from utils.box.rbbox import rbbox_batched_nms as nms
16 |
17 |
18 | def detect(pred_cls, pred_loc, anchors, variance, conf_thresh, nms_thresh, top_n):
19 | scores = torch.sigmoid(pred_cls)
20 | bboxes = decode(pred_loc, anchors[None], variance)
21 | indexes_img, indexes_anchor, indexes_cls = torch.where(scores > conf_thresh)
22 |
23 | bboxes = bboxes[indexes_img, indexes_anchor]
24 | scores = scores[indexes_img, indexes_anchor, indexes_cls]
25 | labels = indexes_cls
26 |
27 | start = 0
28 | dets = [None] * pred_cls.size(0)
29 | for image_id, n in sorted(Counter(indexes_img.tolist()).items()):
30 | bboxes_ = bboxes[start: start + n]
31 | scores_ = scores[start: start + n]
32 | labels_ = labels[start: start + n]
33 | keeps = nms(bboxes_, scores_, labels_, nms_thresh)[:top_n]
34 | dets[image_id] = [bboxes_[keeps], scores_[keeps], labels_[keeps]]
35 | start += n
36 |
37 | return dets
38 |
--------------------------------------------------------------------------------
/model/rdd/utils/loss.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : loss.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 10:59
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import torch
12 |
13 | from torch import nn
14 | from collections import OrderedDict
15 | from torch.nn.functional import one_hot
16 | from utils.box.bbox import bbox_switch, bbox_iou, encode
17 |
18 |
19 | def match(bboxes, anchors, iou_thresh, batch=16):
20 | # Reduce GPU memory usage
21 | ious = torch.cat([bbox_iou(bboxes[i: i + batch], anchors) for i in range(0, bboxes.size(0), batch)])
22 | max_ious, bbox_indexes = torch.max(ious, dim=0)
23 | mask_neg = max_ious < iou_thresh[0]
24 | mask_pos = max_ious > iou_thresh[1]
25 | return mask_pos, mask_neg, bbox_indexes
26 |
27 |
28 | def calc_loss_v1(pred_cls, pred_loc, targets, anchors, iou_thresh, variance, balance):
29 | device = pred_cls.device
30 | num_classes = pred_cls.size(-1)
31 | weight_pos, weight_neg = 2 * balance, 2 * (1 - balance)
32 | anchors_xyxy = bbox_switch(anchors, 'xywh', 'xyxy')
33 |
34 | criterion_cls = nn.BCEWithLogitsLoss(reduction='none')
35 | criterion_loc = nn.SmoothL1Loss(reduction='sum')
36 | loss_cls, loss_loc = torch.zeros([2], dtype=torch.float, device=device, requires_grad=True)
37 | num_pos = 0
38 | for i, target in enumerate(targets):
39 | if target:
40 | bboxes = target['bboxes'].to(device)
41 | labels = target['labels'].to(device)
42 | bboxes_xyxy = bbox_switch(bboxes[:, :4], 'xywh', 'xyxy')
43 | mask_pos, mask_neg, bbox_indexes = match(bboxes_xyxy, anchors_xyxy, iou_thresh)
44 |
45 | labels = labels[bbox_indexes]
46 | indexes_pos = bbox_indexes[mask_pos]
47 | bboxes_matched = bboxes[indexes_pos]
48 | anchors_matched = anchors[mask_pos]
49 | bboxes_pred = pred_loc[i][mask_pos]
50 | gt_bboxes, det_bboxes = encode(bboxes_matched, bboxes_pred, anchors_matched, variance)
51 |
52 | labels = one_hot(labels, num_classes=num_classes).float()
53 | labels[mask_neg] = 0
54 | loss_cls_ = criterion_cls(pred_cls[i], labels)
55 | loss_cls = loss_cls + loss_cls_[mask_pos].sum() * weight_pos + loss_cls_[mask_neg].sum() * weight_neg
56 | loss_loc = loss_loc + criterion_loc(gt_bboxes, det_bboxes)
57 | num_pos += mask_pos.sum().item()
58 | else:
59 | loss_cls = loss_cls + criterion_cls(pred_cls[i], torch.zeros_like(pred_cls[i])).sum()
60 | num_pos = max(num_pos, 1)
61 | return OrderedDict([('loss_cls', loss_cls / num_pos), ('loss_loc', loss_loc / num_pos)])
62 |
63 |
64 | def calc_loss_v2(pred_cls, pred_loc, targets, anchors, iou_thresh, variance, balance):
65 | # Calculate the loss centrally, has only a small acceleration effect
66 | device = pred_cls.device
67 | num_classes = pred_cls.size(-1)
68 | weight_pos, weight_neg = 2 * balance, 2 * (1 - balance)
69 | criterion_cls = nn.BCEWithLogitsLoss(reduction='none')
70 | criterion_loc = nn.SmoothL1Loss(reduction='sum')
71 |
72 | num_bboxes = [target['bboxes'].size(0) if target else 0 for target in targets]
73 | bboxes = [target['bboxes'] for target in targets if target]
74 | labels = [target['labels'] for target in targets if target]
75 | if len(bboxes) > 0:
76 | bboxes = torch.cat(bboxes).to(device)
77 | labels = torch.cat(labels).to(device)
78 | else:
79 | loss_cls = criterion_cls(pred_cls, torch.zeros_like(pred_cls)).sum()
80 | return OrderedDict([('loss_cls', loss_cls), ('loss_loc', torch.tensor(0., requires_grad=True))])
81 |
82 | # Reduce GPU memory usage
83 | batch = 16
84 | iou = torch.cat([bbox_iou(bboxes[i: i + batch, :4], anchors, 'xywh') for i in range(0, bboxes.size(0), batch)])
85 | start = 0
86 | max_iou_merged, bbox_indexes_merged = [], []
87 | for i, num in enumerate(num_bboxes):
88 | if num == 0:
89 | max_iou = torch.zeros_like(pred_cls[i, :, 0])
90 | bbox_indexes = torch.zeros_like(pred_cls[i, :, 0], dtype=torch.long)
91 | else:
92 | max_iou, bbox_indexes = torch.max(iou[start: start + num], dim=0) # a
93 | max_iou_merged.append(max_iou)
94 | bbox_indexes_merged.append(bbox_indexes + start)
95 | start += num
96 | max_iou_merged = torch.stack(max_iou_merged)
97 | bbox_indexes_merged = torch.stack(bbox_indexes_merged)
98 | masks_pos = max_iou_merged > iou_thresh[1]
99 | masks_neg = max_iou_merged < iou_thresh[0]
100 | labels_matched = labels[bbox_indexes_merged]
101 | labels_matched = one_hot(labels_matched, num_classes=num_classes)
102 | labels_matched[masks_neg] = 0
103 | bboxes_matched = bboxes[bbox_indexes_merged[masks_pos]]
104 | anchors_matched = anchors[None].repeat(len(targets), 1, 1)[masks_pos]
105 | loss_cls = criterion_cls(pred_cls, labels_matched.float())
106 | loss_cls = loss_cls[masks_pos].sum() * weight_pos + loss_cls[masks_neg].sum() * weight_neg
107 | gt_bboxes, det_bboxes = encode(bboxes_matched, pred_loc[masks_pos], anchors_matched, variance)
108 | loss_loc = criterion_loc(det_bboxes, gt_bboxes)
109 | num_pos = max(masks_pos.sum().item(), 1)
110 | return OrderedDict([('loss_cls', loss_cls / num_pos), ('loss_loc', loss_loc / num_pos)])
111 |
112 |
113 | calc_loss = calc_loss_v1
114 |
--------------------------------------------------------------------------------
/model/rdd/utils/modules.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : modules.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 11:03
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import torch
12 |
13 | from torch import nn
14 | from xtorch import xnn
15 |
16 |
17 | class FeaturePyramidNet(xnn.Module):
18 | def __init__(self, depth, plane):
19 | super(FeaturePyramidNet, self).__init__()
20 | self.link = nn.ModuleList()
21 | self.fuse = nn.ModuleList()
22 | for i in range(depth):
23 | self.link.append(nn.Sequential(xnn.Conv2d(plane, 1, 1, 0, bias=False),
24 | xnn.BatchNorm2d()))
25 | if i != depth:
26 | self.fuse.append(nn.Sequential(nn.ReLU(inplace=True),
27 | xnn.Conv2d(plane, 3, 1, 1, bias=False),
28 | xnn.BatchNorm2d()))
29 |
30 | def forward(self, features):
31 | features = [self.link[i](feature) for i, feature in enumerate(features)]
32 | for i in range(len(features))[::-1]:
33 | if i != len(features) - 1:
34 | features[i] = self.fuse[i](features[i] + nn.Upsample(scale_factor=2)(features[i + 1]))
35 | features = [nn.ReLU(inplace=True)(feature) for feature in features]
36 | return features
37 |
38 |
39 | class PredictHead(xnn.Module):
40 | def __init__(self, plane, num_anchors, num_classes):
41 | super(PredictHead, self).__init__()
42 | self.num_classes = num_classes
43 | self.body = nn.Sequential(xnn.Conv2d(plane, 3, 1, 1, bias=False),
44 | xnn.BatchNorm2d(),
45 | nn.ReLU(inplace=True),
46 | xnn.Conv2d(num_anchors * num_classes, 3, 1, 1))
47 |
48 | def forward(self, x):
49 | x = self.body(x)
50 | return x.permute(0, 2, 3, 1).reshape(x.size(0), -1, self.num_classes)
51 |
52 |
53 | class DetPredict(xnn.Module):
54 | def __init__(self, depth, plane, num_anchors, num_classes, num_loc_params):
55 | super(DetPredict, self).__init__()
56 | self.heads_cls = nn.ModuleList()
57 | self.heads_loc = nn.ModuleList()
58 | for i in range(depth):
59 | self.heads_cls.append(PredictHead(plane, num_anchors[i], num_classes))
60 | self.heads_loc.append(PredictHead(plane, num_anchors[i], num_loc_params))
61 |
62 | def forward(self, features):
63 | predict_cls, predict_loc = [], []
64 | for i, feature in enumerate(features):
65 | predict_cls.append(self.heads_cls[i](feature))
66 | predict_loc.append(self.heads_loc[i](feature))
67 | predict_cls = torch.cat(predict_cls, dim=1)
68 | predict_loc = torch.cat(predict_loc, dim=1)
69 | return predict_cls, predict_loc
70 |
--------------------------------------------------------------------------------
/model/rdd/utils/priorbox.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : priorbox.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 11:03
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import torch
12 |
13 | from collections import OrderedDict
14 |
15 | from utils.misc import LFUCache
16 |
17 |
18 | class PriorBox:
19 | def __init__(self, cfg):
20 | self.cfg = cfg
21 | self.prior_boxes = OrderedDict()
22 |
23 | for stride, size, aspects, scales in zip(cfg['strides'], cfg['sizes'], cfg['aspects'], cfg['scales']):
24 | self.prior_boxes[stride] = self._get_prior_box(stride, size, aspects, scales, cfg.get('old_version', False))
25 |
26 | @staticmethod
27 | def _get_prior_box(stride, size, aspects, scales, old_version=False):
28 | boxes = []
29 | if old_version:
30 | # To be compatible with previous weights
31 | pair = [[aspect, scale] for scale in scales for aspect in aspects]
32 | else:
33 | pair = [[aspect, scale] for aspect in aspects for scale in scales]
34 | for aspect, scale in pair:
35 | length = stride * size * scale
36 | if aspect == 1:
37 | boxes.append([length, length])
38 | else:
39 | boxes.append([length * aspect ** 0.5, length / aspect ** 0.5])
40 | boxes.append([length / aspect ** 0.5, length * aspect ** 0.5])
41 | return boxes
42 |
43 | @staticmethod
44 | def _get_anchors(img_size, prior_boxes):
45 | h, w = img_size
46 | anchors = []
47 | for stride, prior_box in prior_boxes:
48 | assert w % stride == 0 and h % stride == 0
49 | fmw, fmh = w // stride, h // stride
50 | prior_box = torch.tensor(prior_box, dtype=torch.float)
51 | offset_y, offset_x = torch.meshgrid([torch.arange(fmh), torch.arange(fmw)])
52 | offset_x = offset_x.to(prior_box) + 0.5
53 | offset_y = offset_y.to(prior_box) + 0.5
54 | offset = torch.stack([offset_x, offset_y], dim=-1) * stride
55 | offset = offset[:, :, None, :].repeat(1, 1, prior_box.size(0), 1)
56 | prior_box = prior_box[None, None, :, :].repeat(fmh, fmw, 1, 1)
57 | anchors.append(torch.cat([offset, prior_box], dim=-1).reshape(-1, 4))
58 | anchors = torch.cat(anchors)
59 | return anchors
60 |
61 | def get_anchors(self, img_size):
62 | return self._get_anchors(img_size, self.prior_boxes.items())
63 |
64 |
65 | class LFUPriorBox:
66 | def __init__(self, prior_box_cfg, capacity=3):
67 | self.prior_box = PriorBox(prior_box_cfg)
68 | self.num_levels = len(self.prior_box.prior_boxes)
69 | self.num_prior_boxes = [len(prior_boxes) for prior_boxes in self.prior_box.prior_boxes.values()]
70 | self.lfu_cache = LFUCache(capacity)
71 |
72 | def get_anchors(self, img_size):
73 | name = 'anchors-%d-%d' % tuple(img_size)
74 | anchors = self.lfu_cache.get(name, None)
75 | if anchors is None:
76 | anchors = self.prior_box.get_anchors(img_size)
77 | self.lfu_cache.put(name, anchors)
78 | return anchors
79 |
--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
1 | tqdm
2 | numpy
3 | pillow
4 | cython
5 | beautifulsoup4
6 | opnecv-python
7 | pytorch>=1.2
8 | torchvision>=0.4
9 | tensorboard>=2.2
--------------------------------------------------------------------------------
/run/dota/evaluate.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : evaluate.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 11:10
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import sys
12 |
13 | sys.path.append('.')
14 |
15 | import os
16 | import tqdm
17 | import torch
18 | import numpy as np
19 |
20 | from collections import defaultdict
21 | from torch.utils.data import DataLoader
22 |
23 | from data.aug import ops
24 | from data.aug.compose import Compose
25 | from data.dataset import DOTA
26 |
27 | from model.rdd import RDD
28 | from model.backbone import resnet
29 |
30 | from utils.box.bbox_np import xywha2xy4, xy42xywha
31 | from utils.box.rbbox_np import rbbox_batched_nms
32 | from utils.parallel import CustomDetDataParallel
33 |
34 |
35 | @torch.no_grad()
36 | def main():
37 | global checkpoint
38 | if checkpoint is None:
39 | dir_weight = os.path.join(dir_save, 'weight')
40 | indexes = [int(os.path.splitext(path)[0]) for path in os.listdir(dir_weight)]
41 | current_step = max(indexes)
42 | checkpoint = os.path.join(dir_weight, '%d.pth' % current_step)
43 |
44 | batch_size = 32
45 | num_workers = 4
46 |
47 | image_size = 768
48 | aug = Compose([ops.PadSquare(), ops.Resize(image_size)])
49 | dataset = DOTA(dir_dataset, image_set, aug)
50 | loader = DataLoader(dataset, batch_size, num_workers=num_workers, pin_memory=True, collate_fn=dataset.collate)
51 | num_classes = len(dataset.names)
52 |
53 | prior_box = {
54 | 'strides': [8, 16, 32, 64, 128],
55 | 'sizes': [3] * 5,
56 | 'aspects': [[1, 2, 4, 8]] * 5,
57 | 'scales': [[2 ** 0, 2 ** (1 / 3), 2 ** (2 / 3)]] * 5,
58 | 'old_version': old_version
59 | }
60 | conf_thresh = 0.01
61 | nms_thresh = 0.45
62 | cfg = {
63 | 'prior_box': prior_box,
64 | 'num_classes': num_classes,
65 | 'extra': 2,
66 | 'conf_thresh': conf_thresh,
67 | 'nms_thresh': nms_thresh,
68 | }
69 |
70 | model = RDD(backbone(fetch_feature=True), cfg)
71 | model.build_pipe(shape=[2, 3, image_size, image_size])
72 | model.restore(checkpoint)
73 | if len(device_ids) > 1:
74 | model = CustomDetDataParallel(model, device_ids)
75 | model.cuda()
76 | model.eval()
77 |
78 | ret_raw = defaultdict(list)
79 | for images, targets, infos in tqdm.tqdm(loader):
80 | images = images.cuda() / 255
81 | dets = model(images)
82 | for (det, info) in zip(dets, infos):
83 | if det:
84 | bboxes, scores, labels = det
85 | bboxes = bboxes.cpu().numpy()
86 | scores = scores.cpu().numpy()
87 | labels = labels.cpu().numpy()
88 | fname, x, y, w, h = os.path.splitext(os.path.basename(info['img_path']))[0].split('-')[:5]
89 | x, y, w, h = int(x), int(y), int(w), int(h)
90 | long_edge = max(w, h)
91 | pad_x, pad_y = (long_edge - w) // 2, (long_edge - h) // 2
92 | bboxes = np.stack([xywha2xy4(bbox) for bbox in bboxes])
93 | bboxes *= long_edge / image_size
94 | bboxes -= [pad_x, pad_y]
95 | bboxes += [x, y]
96 | bboxes = np.stack([xy42xywha(bbox) for bbox in bboxes])
97 | ret_raw[fname].append([bboxes, scores, labels])
98 |
99 | print('merging results...')
100 | ret = []
101 |
102 | for fname, dets in ret_raw.items():
103 | bboxes, scores, labels = zip(*dets)
104 | bboxes = np.concatenate(list(bboxes))
105 | scores = np.concatenate(list(scores))
106 | labels = np.concatenate(list(labels))
107 | keeps = rbbox_batched_nms(bboxes, scores, labels, nms_thresh)
108 | ret.append([fname, [bboxes[keeps], scores[keeps], labels[keeps]]])
109 |
110 | print('converting to submission format...')
111 | ret_save = defaultdict(list)
112 | for fname, (bboxes, scores, labels) in ret:
113 | for bbox, score, label in zip(bboxes, scores, labels):
114 | bbox = xywha2xy4(bbox).ravel()
115 | line = '%s %.12f %.1f %.1f %.1f %.1f %.1f %.1f %.1f %.1f' % (fname, score, *bbox)
116 | ret_save[dataset.label2name[label]].append(line)
117 |
118 | print('saving...')
119 | os.makedirs(os.path.join(dir_save, 'submission'), exist_ok=True)
120 | for name, dets in ret_save.items():
121 | with open(os.path.join(dir_save, 'submission', 'Task%d_%s.txt' % (1, name)), 'wt') as f:
122 | f.write('\n'.join(dets))
123 |
124 | print('finished')
125 |
126 |
127 | if __name__ == '__main__':
128 |
129 | device_ids = [0]
130 | torch.cuda.set_device(device_ids[0])
131 |
132 | dir_dataset = ''
133 | dir_save = ''
134 |
135 | backbone = resnet.resnet101
136 | checkpoint = None
137 | old_version = False # set True when using the original weights
138 | image_set = 'test' # test-768
139 |
140 | main()
141 |
--------------------------------------------------------------------------------
/run/dota/prepare.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : prepare.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 11:10
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import sys
12 |
13 | sys.path.append('.')
14 |
15 | import os
16 | import json
17 | import cv2 as cv
18 | import numpy as np
19 |
20 | from utils.crop_image import Cropper
21 |
22 |
23 | def txt2json(dir_txt, dir_json):
24 | os.makedirs(dir_json, exist_ok=True)
25 | for file in os.listdir(dir_txt):
26 | objs = []
27 | for i, line in enumerate(open(os.path.join(dir_txt, file)).readlines()):
28 | line = line.strip()
29 | line_split = line.split(' ')
30 | if len(line_split) == 10:
31 | obj = dict()
32 | coord = np.array(line_split[:8], dtype=np.float32).reshape([4, 2])
33 | bbox = cv.boxPoints(cv.minAreaRect(coord)).astype(np.int).tolist()
34 | obj['name'] = line_split[8].lower()
35 | obj['bbox'] = bbox
36 | objs.append(obj)
37 | else:
38 | print(' %s' % line)
39 | if objs:
40 | json.dump(objs, open(os.path.join(dir_json, file.replace('txt', 'json')), 'wt'), indent=2)
41 |
42 |
43 | def main(image_set, single_scale=False):
44 | # (1)
45 | if image_set != 'test':
46 | dir_txt = os.path.join(dir_dataset, 'labelTxt', image_set)
47 | out_dir_json = os.path.join(dir_dataset, 'annotations', image_set)
48 | txt2json(dir_txt, out_dir_json)
49 |
50 | # (2)
51 | pairs = []
52 | for filename in os.listdir(os.path.join(dir_dataset, 'images', image_set)):
53 | anno = os.path.join(dir_dataset, 'annotations', image_set, filename.replace('png', 'json'))
54 | img = os.path.join(dir_dataset, 'images', image_set, filename)
55 | if not os.path.exists(anno):
56 | anno = None
57 | pairs.append([img, anno])
58 |
59 | overlap = 0.25
60 | sizes = [768] if single_scale else [512, 768, 1024, 1536]
61 | save_empty = image_set == 'test'
62 | image_set = f'{image_set}-{sizes[0]}' if single_scale else image_set
63 |
64 | out_dir_images = os.path.join(dir_dataset, 'images', f'{image_set}-crop')
65 | out_dir_annos = os.path.join(dir_dataset, 'annotations', f'{image_set}-crop')
66 |
67 | cropper = Cropper(sizes, overlap)
68 | cropper.crop_batch(pairs, out_dir_images, out_dir_annos, save_empty)
69 |
70 | # (3)
71 | pairs = []
72 | for filename in os.listdir(out_dir_images):
73 | img = os.path.join('images', f'{image_set}-crop', filename)
74 | anno = None if image_set == 'test' else os.path.join('annotations', f'{image_set}-crop', filename.replace('jpg', 'json'))
75 | pairs.append([img, anno])
76 | out_dir = os.path.join(dir_dataset, 'image-sets')
77 | os.makedirs(out_dir, exist_ok=True)
78 | json.dump(pairs, open(os.path.join(out_dir, f'{image_set}.json'), 'wt'), indent=2)
79 |
80 |
81 | if __name__ == '__main__':
82 |
83 | # directory hierarchy
84 |
85 | # root/images/train/P0000.png
86 | # -----------/train/...
87 | # -----------/val/...
88 | # -----------/test/...
89 |
90 | # root/labelTxt/train/P0000.txt
91 | # -------------/train/...
92 | # -------------/val/...
93 |
94 | # (1) convert annotation files
95 | # (2) crop images
96 | # (3) generate image-set files
97 |
98 | dir_dataset = ''
99 |
100 | main('train')
101 | main('val')
102 | main('test')
103 | main('test', True)
104 |
--------------------------------------------------------------------------------
/run/dota/train-dist.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : train.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2021/03/20 16:00
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 |
12 | def main(batch_size, rank, world_size):
13 |
14 | import os
15 | import tqdm
16 | import torch
17 | import tempfile
18 |
19 | from torch import optim
20 | from torch import distributed as dist
21 | from torch.nn import SyncBatchNorm
22 | from torch.utils.data import DataLoader
23 | from torch.utils.tensorboard import SummaryWriter
24 |
25 | from data.aug.compose import Compose
26 | from data.aug import ops
27 | from data.dataset import DOTA
28 |
29 | from model.rdd import RDD
30 | from model.backbone import resnet
31 |
32 | from utils.adjust_lr import adjust_lr_multi_step
33 |
34 | torch.manual_seed(0)
35 | torch.backends.cudnn.benchmark = True
36 | torch.cuda.set_device(rank)
37 | dist.init_process_group("nccl", init_method='env://', rank=rank, world_size=world_size)
38 |
39 | backbone = resnet.resnet101
40 |
41 | dir_dataset = ''
42 | dir_save = ''
43 |
44 | dir_weight = os.path.join(dir_save, 'weight')
45 | dir_log = os.path.join(dir_save, 'log')
46 | os.makedirs(dir_weight, exist_ok=True)
47 | if rank == 0:
48 | writer = SummaryWriter(dir_log)
49 |
50 | indexes = [int(os.path.splitext(path)[0]) for path in os.listdir(dir_weight)]
51 | current_step = max(indexes) if indexes else 0
52 |
53 | image_size = 768
54 | lr = 1e-3
55 | batch_size //= world_size
56 | num_workers = 4
57 |
58 | max_step = 250000
59 | lr_cfg = [[100000, lr], [200000, lr / 10], [max_step, lr / 50]]
60 | warm_up = [1000, lr / 50, lr]
61 | save_interval = 1000
62 |
63 | aug = Compose([
64 | ops.ToFloat(),
65 | ops.PhotometricDistort(),
66 | ops.RandomHFlip(),
67 | ops.RandomVFlip(),
68 | ops.RandomRotate90(),
69 | ops.ResizeJitter([0.8, 1.2]),
70 | ops.PadSquare(),
71 | ops.Resize(image_size),
72 | ops.BBoxFilter(24 * 24 * 0.4)
73 | ])
74 | dataset = DOTA(dir_dataset, ['train', 'val'], aug)
75 | train_sampler = torch.utils.data.distributed.DistributedSampler(dataset, world_size, rank)
76 | batch_sampler = torch.utils.data.BatchSampler(train_sampler, batch_size, drop_last=True)
77 | loader = DataLoader(dataset, batch_sampler=batch_sampler, num_workers=num_workers, collate_fn=dataset.collate)
78 | num_classes = len(dataset.names)
79 |
80 | prior_box = {
81 | 'strides': [8, 16, 32, 64, 128],
82 | 'sizes': [3] * 5,
83 | 'aspects': [[1, 2, 4, 8]] * 5,
84 | 'scales': [[2 ** 0, 2 ** (1 / 3), 2 ** (2 / 3)]] * 5,
85 | }
86 |
87 | cfg = {
88 | 'prior_box': prior_box,
89 | 'num_classes': num_classes,
90 | 'extra': 2,
91 | }
92 | device = torch.device(f'cuda:{rank}')
93 | model = RDD(backbone(fetch_feature=True), cfg)
94 | model.build_pipe(shape=[2, 3, image_size, image_size])
95 | model = SyncBatchNorm.convert_sync_batchnorm(model)
96 | model.to(device)
97 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank])
98 | if current_step:
99 | model.module.load_state_dict(torch.load(os.path.join(dir_weight, '%d.pth' % current_step), map_location=device))
100 | else:
101 | checkpoint = os.path.join(tempfile.gettempdir(), "initial-weights.pth")
102 | if rank == 0:
103 | model.module.init()
104 | torch.save(model.module.state_dict(), checkpoint)
105 | dist.barrier()
106 | if rank > 0:
107 | model.module.load_state_dict(torch.load(checkpoint, map_location=device))
108 | dist.barrier()
109 | if rank == 0:
110 | os.remove(checkpoint)
111 |
112 | optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
113 | training = True
114 | while training and current_step < max_step:
115 | tqdm_loader = tqdm.tqdm(loader) if rank == 0 else loader
116 | for images, targets, infos in tqdm_loader:
117 | current_step += 1
118 | adjust_lr_multi_step(optimizer, current_step, lr_cfg, warm_up)
119 |
120 | images = images.cuda() / 255
121 | losses = model(images, targets)
122 | loss = sum(losses.values())
123 | loss.backward()
124 | optimizer.step()
125 | optimizer.zero_grad()
126 |
127 | if rank == 0:
128 | for key, val in list(losses.items()):
129 | losses[key] = val.item()
130 | writer.add_scalar(key, val, global_step=current_step)
131 | writer.flush()
132 | tqdm_loader.set_postfix(losses)
133 | tqdm_loader.set_description(f'<{current_step}/{max_step}>')
134 |
135 | if current_step % save_interval == 0:
136 | save_path = os.path.join(dir_weight, '%d.pth' % current_step)
137 | state_dict = model.module.state_dict()
138 | torch.save(state_dict, save_path)
139 | cache_file = os.path.join(dir_weight, '%d.pth' % (current_step - save_interval))
140 | if os.path.exists(cache_file):
141 | os.remove(cache_file)
142 |
143 | if current_step >= max_step:
144 | training = False
145 | if rank == 0:
146 | writer.close()
147 | break
148 |
149 |
150 | if __name__ == "__main__":
151 |
152 | import os
153 | import sys
154 | import argparse
155 | import multiprocessing
156 |
157 | sys.path.append('.')
158 | multiprocessing.set_start_method('spawn')
159 |
160 | parser = argparse.ArgumentParser()
161 | parser.add_argument('--batch_size', default=12, type=int)
162 | parser.add_argument('--device_ids', default='0,1', type=str)
163 | parser.add_argument('--local_rank', default=0, type=int)
164 |
165 | args = parser.parse_args()
166 | os.environ["CUDA_VISIBLE_DEVICES"] = args.device_ids
167 | device_ids = list(range(len(args.device_ids.split(','))))
168 |
169 | processes = []
170 | for device_id in device_ids:
171 | p = multiprocessing.Process(target=main, args=(args.batch_size, device_id, len(device_ids)))
172 | p.start()
173 | processes.append(p)
174 | for p in processes:
175 | p.join()
176 |
177 | # python -m torch.distributed.launch run/dota/train-dist.py --batch_size=12 --device_ids=0,1
178 |
--------------------------------------------------------------------------------
/run/dota/train.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : train.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 11:10
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import sys
12 |
13 | sys.path.append('.')
14 |
15 | import os
16 | import tqdm
17 | import torch
18 |
19 | from torch import optim
20 | from torch.utils.data import DataLoader
21 | from torch.utils.tensorboard import SummaryWriter
22 |
23 | from data.aug.compose import Compose
24 | from data.aug import ops
25 | from data.dataset import DOTA
26 |
27 | from model.rdd import RDD
28 | from model.backbone import resnet
29 |
30 | from utils.adjust_lr import adjust_lr_multi_step
31 | from utils.parallel import convert_model, CustomDetDataParallel
32 |
33 |
34 | def main():
35 | dir_weight = os.path.join(dir_save, 'weight')
36 | dir_log = os.path.join(dir_save, 'log')
37 | os.makedirs(dir_weight, exist_ok=True)
38 | writer = SummaryWriter(dir_log)
39 |
40 | indexes = [int(os.path.splitext(path)[0]) for path in os.listdir(dir_weight)]
41 | current_step = max(indexes) if indexes else 0
42 |
43 | image_size = 768
44 | lr = 1e-3
45 | batch_size = 12
46 | num_workers = 4
47 |
48 | max_step = 250000
49 | lr_cfg = [[100000, lr], [200000, lr / 10], [max_step, lr / 50]]
50 | warm_up = [1000, lr / 50, lr]
51 | save_interval = 1000
52 |
53 | aug = Compose([
54 | ops.ToFloat(),
55 | ops.PhotometricDistort(),
56 | ops.RandomHFlip(),
57 | ops.RandomVFlip(),
58 | ops.RandomRotate90(),
59 | ops.ResizeJitter([0.8, 1.2]),
60 | ops.PadSquare(),
61 | ops.Resize(image_size),
62 | ops.BBoxFilter(24 * 24 * 0.4)
63 | ])
64 | dataset = DOTA(dir_dataset, ['train', 'val'], aug)
65 | loader = DataLoader(dataset, batch_size, shuffle=True, num_workers=num_workers, pin_memory=True, drop_last=True,
66 | collate_fn=dataset.collate)
67 | num_classes = len(dataset.names)
68 |
69 | prior_box = {
70 | 'strides': [8, 16, 32, 64, 128],
71 | 'sizes': [3] * 5,
72 | 'aspects': [[1, 2, 4, 8]] * 5,
73 | 'scales': [[2 ** 0, 2 ** (1 / 3), 2 ** (2 / 3)]] * 5,
74 | }
75 |
76 | cfg = {
77 | 'prior_box': prior_box,
78 | 'num_classes': num_classes,
79 | 'extra': 2,
80 | }
81 |
82 | model = RDD(backbone(fetch_feature=True), cfg)
83 | model.build_pipe(shape=[2, 3, image_size, image_size])
84 | if current_step:
85 | model.restore(os.path.join(dir_weight, '%d.pth' % current_step))
86 | else:
87 | model.init()
88 | if len(device_ids) > 1:
89 | model = convert_model(model)
90 | model = CustomDetDataParallel(model, device_ids)
91 | model.cuda()
92 | optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
93 | training = True
94 | while training and current_step < max_step:
95 | tqdm_loader = tqdm.tqdm(loader)
96 | for images, targets, infos in tqdm_loader:
97 | current_step += 1
98 | adjust_lr_multi_step(optimizer, current_step, lr_cfg, warm_up)
99 |
100 | images = images.cuda() / 255
101 | losses = model(images, targets)
102 | loss = sum(losses.values())
103 | loss.backward()
104 | optimizer.step()
105 | optimizer.zero_grad()
106 |
107 | for key, val in list(losses.items()):
108 | losses[key] = val.item()
109 | writer.add_scalar(key, val, global_step=current_step)
110 | writer.flush()
111 | tqdm_loader.set_postfix(losses)
112 | tqdm_loader.set_description(f'<{current_step}/{max_step}>')
113 |
114 | if current_step % save_interval == 0:
115 | save_path = os.path.join(dir_weight, '%d.pth' % current_step)
116 | state_dict = model.state_dict() if len(device_ids) == 1 else model.module.state_dict()
117 | torch.save(state_dict, save_path)
118 | cache_file = os.path.join(dir_weight, '%d.pth' % (current_step - save_interval))
119 | if os.path.exists(cache_file):
120 | os.remove(cache_file)
121 |
122 | if current_step >= max_step:
123 | training = False
124 | writer.close()
125 | break
126 |
127 |
128 | if __name__ == '__main__':
129 |
130 | torch.manual_seed(0)
131 | torch.backends.cudnn.benchmark = True
132 |
133 | device_ids = [0, 1]
134 | torch.cuda.set_device(device_ids[0])
135 | backbone = resnet.resnet101
136 |
137 | dir_dataset = ''
138 | dir_save = ''
139 |
140 | main()
141 |
--------------------------------------------------------------------------------
/run/hrsc2016/evaluate.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : evaluate.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 11:10
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import sys
12 |
13 | sys.path.append('.')
14 |
15 | import os
16 | import tqdm
17 | import torch
18 | import cv2 as cv
19 | import numpy as np
20 |
21 | from torch.utils.data import DataLoader
22 |
23 | from data.aug import ops
24 | from data.dataset import HRSC2016
25 |
26 | from model.rdd import RDD
27 | from model.backbone import resnet
28 |
29 | from utils.box.bbox_np import xy42xywha, xywha2xy4
30 | from utils.box.metric import get_det_aps
31 | from utils.parallel import CustomDetDataParallel
32 |
33 |
34 | @torch.no_grad()
35 | def main():
36 | global checkpoint
37 | if checkpoint is None:
38 | dir_weight = os.path.join(dir_save, 'weight')
39 | indexes = [int(os.path.splitext(path)[0]) for path in os.listdir(dir_weight)]
40 | current_step = max(indexes)
41 | checkpoint = os.path.join(dir_weight, '%d.pth' % current_step)
42 |
43 | image_size = 768
44 | batch_size = 32
45 | num_workers = 4
46 |
47 | aug = ops.Resize(image_size)
48 | dataset = HRSC2016(dir_dataset, 'test', aug)
49 | loader = DataLoader(dataset, batch_size, num_workers=num_workers, pin_memory=True, collate_fn=dataset.collate)
50 | num_classes = len(dataset.names)
51 |
52 | prior_box = {
53 | 'strides': [8, 16, 32, 64, 128],
54 | 'sizes': [3] * 5,
55 | 'aspects': [[1.5, 3, 5, 8]] * 5,
56 | 'scales': [[2 ** 0, 2 ** (1 / 3), 2 ** (2 / 3)]] * 5,
57 | 'old_version': old_version
58 | }
59 | conf_thresh = 0.01
60 | nms_thresh = 0.45
61 | cfg = {
62 | 'prior_box': prior_box,
63 | 'num_classes': num_classes,
64 | 'extra': 2,
65 | 'conf_thresh': conf_thresh,
66 | 'nms_thresh': nms_thresh,
67 | }
68 |
69 | model = RDD(backbone(fetch_feature=True), cfg)
70 | model.build_pipe(shape=[2, 3, image_size, image_size])
71 | model.restore(checkpoint)
72 | if len(device_ids) > 1:
73 | model = CustomDetDataParallel(model, device_ids)
74 | model.cuda()
75 | model.eval()
76 |
77 | count = 0
78 | gt_list, det_list = [], []
79 | for images, targets, infos in tqdm.tqdm(loader):
80 | images = images.cuda() / 255
81 | dets = model(images)
82 | for target, det, info in zip(targets, dets, infos):
83 | if target:
84 | bboxes = np.stack([xy42xywha(bbox) for bbox in info['objs']['bboxes']])
85 | labels = info['objs']['labels']
86 | gt_list.extend([count, bbox, 1, label] for bbox, label in zip(bboxes, labels))
87 | if det:
88 | ih, iw = info['shape'][:2]
89 | bboxes, scores, labels = list(map(lambda x: x.cpu().numpy(), det))
90 | bboxes = np.stack([xywha2xy4(bbox) for bbox in bboxes])
91 | bboxes_ = bboxes * [iw / image_size, ih / image_size]
92 | # bboxes = np.stack([xy42xywha(bbox) for bbox in bboxes_])
93 | bboxes = []
94 | for bbox in bboxes_.astype(np.float32):
95 | (x, y), (w, h), a = cv.minAreaRect(bbox)
96 | bboxes.append([x, y, w, h, a])
97 | bboxes = np.array(bboxes)
98 | det_list.extend([count, bbox, score, label] for bbox, score, label in zip(bboxes, scores, labels))
99 | count += 1
100 | APs = get_det_aps(det_list, gt_list, num_classes, use_07_metric=use_07_metric)
101 | mAP = sum(APs) / len(APs)
102 | print('AP')
103 | for label in range(num_classes):
104 | print(f'{dataset.label2name[label]}: {APs[label]}')
105 | print(f'mAP: {mAP}')
106 |
107 |
108 | if __name__ == '__main__':
109 |
110 | device_ids = [0]
111 | torch.cuda.set_device(device_ids[0])
112 |
113 | dir_dataset = ''
114 | dir_save = ''
115 |
116 | backbone = resnet.resnet101
117 | checkpoint = None
118 | use_07_metric = False
119 | old_version = False # set True when using the original weights
120 |
121 | main()
122 |
--------------------------------------------------------------------------------
/run/hrsc2016/prepare.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : prepare.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 11:11
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import sys
12 |
13 | sys.path.append('.')
14 |
15 | import os
16 | import json
17 | import numpy as np
18 |
19 | from bs4 import BeautifulSoup as bs
20 | from utils.box.bbox_np import xywha2xy4
21 |
22 |
23 | def xml2json(dir_xml, dir_json):
24 | os.makedirs(dir_json, exist_ok=True)
25 | for xml in os.listdir(dir_xml):
26 | objs = []
27 | name = os.path.splitext(xml)[0]
28 | for obj in bs(open(os.path.join(dir_xml, xml)), "html.parser").findAll('hrsc_object'):
29 | xywha = []
30 | xywha.append(float(obj.select_one('mbox_cx').text))
31 | xywha.append(float(obj.select_one('mbox_cy').text))
32 | xywha.append(float(obj.select_one('mbox_w').text))
33 | xywha.append(float(obj.select_one('mbox_h').text))
34 | xywha.append(np.rad2deg(float(obj.select_one('mbox_ang').text)))
35 | obj = dict()
36 | obj['name'] = 'ship'
37 | obj['bbox'] = xywha2xy4(xywha).tolist()
38 | objs.append(obj)
39 | if objs:
40 | json.dump(objs, open(os.path.join(dir_json, name + '.json'), 'wt'), indent=2)
41 |
42 |
43 | def main():
44 | # (1)
45 | dir_xml = os.path.join(dir_dataset, 'labelXml')
46 | out_dir_json = os.path.join(dir_dataset, 'annotations')
47 | xml2json(dir_xml, out_dir_json)
48 |
49 | # (2)
50 | out_dir = os.path.join(dir_dataset, 'image-sets')
51 | os.makedirs(out_dir, exist_ok=True)
52 | for image_set in ['trainval', 'test']:
53 | pairs = []
54 | with open(os.path.join(dir_dataset, 'ImageSets', f'{image_set}.txt')) as f:
55 | for line in f.readlines():
56 | line = line.strip()
57 | img = os.path.join('images', f'{line}.bmp')
58 | anno = os.path.join('annotations', f'{line}.json')
59 | if not os.path.exists(os.path.join(dir_dataset, anno)):
60 | anno = None
61 | pairs.append([img, anno])
62 | json.dump(pairs, open(os.path.join(out_dir, f'{image_set}.json'), 'wt'), indent=2)
63 |
64 |
65 | if __name__ == '__main__':
66 |
67 | # directory hierarchy
68 |
69 | # root/images/100000001.bmp
70 | # ----/images/...
71 |
72 | # root/labelXml/100000001.xml
73 | # ----/labelXml/...
74 |
75 | # (1) convert annotation files
76 | # (2) generate image-set files
77 |
78 | dir_dataset = ''
79 |
80 | main()
81 |
--------------------------------------------------------------------------------
/run/hrsc2016/train-dist.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : train.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2021/03/20 16:00
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 |
12 | def main(batch_size, rank, world_size):
13 |
14 | import os
15 | import tqdm
16 | import torch
17 | import tempfile
18 |
19 | from torch import optim
20 | from torch import distributed as dist
21 | from torch.nn import SyncBatchNorm
22 | from torch.utils.data import DataLoader
23 | from torch.utils.tensorboard import SummaryWriter
24 |
25 | from data.aug.compose import Compose
26 | from data.aug import ops
27 | from data.dataset import HRSC2016
28 |
29 | from model.rdd import RDD
30 | from model.backbone import resnet
31 |
32 | from utils.adjust_lr import adjust_lr_multi_step
33 |
34 | torch.manual_seed(0)
35 | torch.backends.cudnn.benchmark = True
36 | torch.cuda.set_device(rank)
37 | dist.init_process_group("nccl", init_method='env://', rank=rank, world_size=world_size)
38 |
39 | backbone = resnet.resnet101
40 |
41 | dir_dataset = ''
42 | dir_save = ''
43 |
44 | dir_weight = os.path.join(dir_save, 'weight')
45 | dir_log = os.path.join(dir_save, 'log')
46 | os.makedirs(dir_weight, exist_ok=True)
47 | if rank == 0:
48 | writer = SummaryWriter(dir_log)
49 |
50 | indexes = [int(os.path.splitext(path)[0]) for path in os.listdir(dir_weight)]
51 | current_step = max(indexes) if indexes else 0
52 |
53 | image_size = 768
54 | lr = 1e-3
55 | batch_size //= world_size
56 | num_workers = 4
57 |
58 | max_step = 12000
59 | lr_cfg = [[7500, lr], [max_step, lr / 10]]
60 | warm_up = [500, lr / 50, lr]
61 | save_interval = 1000
62 |
63 | aug = Compose([
64 | ops.ToFloat(),
65 | ops.PhotometricDistort(),
66 | ops.RandomHFlip(),
67 | ops.RandomVFlip(),
68 | ops.RandomRotate90(),
69 | ops.ResizeJitter([0.8, 1.2]),
70 | ops.PadSquare(),
71 | ops.Resize(image_size),
72 | ])
73 | dataset = HRSC2016(dir_dataset, ['trainval'], aug)
74 | train_sampler = torch.utils.data.distributed.DistributedSampler(dataset, world_size, rank)
75 | batch_sampler = torch.utils.data.BatchSampler(train_sampler, batch_size, drop_last=True)
76 | loader = DataLoader(dataset, batch_sampler=batch_sampler, num_workers=num_workers, collate_fn=dataset.collate)
77 | num_classes = len(dataset.names)
78 |
79 | prior_box = {
80 | 'strides': [8, 16, 32, 64, 128],
81 | 'sizes': [3] * 5,
82 | 'aspects': [[1.5, 3, 5, 8]] * 5,
83 | 'scales': [[2 ** 0, 2 ** (1 / 3), 2 ** (2 / 3)]] * 5,
84 | }
85 |
86 | cfg = {
87 | 'prior_box': prior_box,
88 | 'num_classes': num_classes,
89 | 'extra': 2,
90 | }
91 | device = torch.device(f'cuda:{rank}')
92 | model = RDD(backbone(fetch_feature=True), cfg)
93 | model.build_pipe(shape=[2, 3, image_size, image_size])
94 | model = SyncBatchNorm.convert_sync_batchnorm(model)
95 | model.to(device)
96 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank])
97 | if current_step:
98 | model.module.load_state_dict(torch.load(os.path.join(dir_weight, '%d.pth' % current_step), map_location=device))
99 | else:
100 | checkpoint = os.path.join(tempfile.gettempdir(), "initial-weights.pth")
101 | if rank == 0:
102 | model.module.init()
103 | torch.save(model.module.state_dict(), checkpoint)
104 | dist.barrier()
105 | if rank > 0:
106 | model.module.load_state_dict(torch.load(checkpoint, map_location=device))
107 | dist.barrier()
108 | if rank == 0:
109 | os.remove(checkpoint)
110 |
111 | optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
112 | training = True
113 | while training and current_step < max_step:
114 | tqdm_loader = tqdm.tqdm(loader) if rank == 0 else loader
115 | for images, targets, infos in tqdm_loader:
116 | current_step += 1
117 | adjust_lr_multi_step(optimizer, current_step, lr_cfg, warm_up)
118 |
119 | images = images.cuda() / 255
120 | losses = model(images, targets)
121 | loss = sum(losses.values())
122 | loss.backward()
123 | optimizer.step()
124 | optimizer.zero_grad()
125 |
126 | if rank == 0:
127 | for key, val in list(losses.items()):
128 | losses[key] = val.item()
129 | writer.add_scalar(key, val, global_step=current_step)
130 | writer.flush()
131 | tqdm_loader.set_postfix(losses)
132 | tqdm_loader.set_description(f'<{current_step}/{max_step}>')
133 |
134 | if current_step % save_interval == 0:
135 | save_path = os.path.join(dir_weight, '%d.pth' % current_step)
136 | state_dict = model.module.state_dict()
137 | torch.save(state_dict, save_path)
138 | cache_file = os.path.join(dir_weight, '%d.pth' % (current_step - save_interval))
139 | if os.path.exists(cache_file):
140 | os.remove(cache_file)
141 |
142 | if current_step >= max_step:
143 | training = False
144 | if rank == 0:
145 | writer.close()
146 | break
147 |
148 |
149 | if __name__ == "__main__":
150 |
151 | import os
152 | import sys
153 | import argparse
154 | import multiprocessing
155 |
156 | sys.path.append('.')
157 | multiprocessing.set_start_method('spawn')
158 |
159 | parser = argparse.ArgumentParser()
160 | parser.add_argument('--batch_size', default=12, type=int)
161 | parser.add_argument('--device_ids', default='0,1', type=str)
162 | parser.add_argument('--local_rank', default=0, type=int)
163 |
164 | args = parser.parse_args()
165 | os.environ["CUDA_VISIBLE_DEVICES"] = args.device_ids
166 | device_ids = list(range(len(args.device_ids.split(','))))
167 |
168 | processes = []
169 | for device_id in device_ids:
170 | p = multiprocessing.Process(target=main, args=(args.batch_size, device_id, len(device_ids)))
171 | p.start()
172 | processes.append(p)
173 | for p in processes:
174 | p.join()
175 |
176 | # python -m torch.distributed.launch run/hrsc2016/train-dist.py --batch_size=12 --device_ids=0,1
177 |
--------------------------------------------------------------------------------
/run/hrsc2016/train.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : train.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 11:11
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import sys
12 |
13 | sys.path.append('.')
14 |
15 | import os
16 | import tqdm
17 | import torch
18 |
19 | from torch import optim
20 | from torch.utils.data import DataLoader
21 | from torch.utils.tensorboard import SummaryWriter
22 |
23 | from data.aug.compose import Compose
24 | from data.aug import ops
25 | from data.dataset import HRSC2016
26 |
27 | from model.rdd import RDD
28 | from model.backbone import resnet
29 |
30 | from utils.adjust_lr import adjust_lr_multi_step
31 | from utils.parallel import convert_model, CustomDetDataParallel
32 |
33 |
34 | def main():
35 | dir_weight = os.path.join(dir_save, 'weight')
36 | dir_log = os.path.join(dir_save, 'log')
37 | os.makedirs(dir_weight, exist_ok=True)
38 | writer = SummaryWriter(dir_log)
39 |
40 | indexes = [int(os.path.splitext(path)[0]) for path in os.listdir(dir_weight)]
41 | current_step = max(indexes) if indexes else 0
42 |
43 | image_size = 768
44 | lr = 1e-3
45 | batch_size = 12
46 | num_workers = 4
47 |
48 | max_step = 12000
49 | lr_cfg = [[7500, lr], [max_step, lr / 10]]
50 | warm_up = [500, lr / 50, lr]
51 | save_interval = 1000
52 |
53 | aug = Compose([
54 | ops.ToFloat(),
55 | ops.PhotometricDistort(),
56 | ops.RandomHFlip(),
57 | ops.RandomVFlip(),
58 | ops.RandomRotate90(),
59 | ops.ResizeJitter([0.8, 1.2]),
60 | ops.PadSquare(),
61 | ops.Resize(image_size),
62 | ])
63 | dataset = HRSC2016(dir_dataset, ['trainval'], aug)
64 | loader = DataLoader(dataset, batch_size, shuffle=True, num_workers=num_workers, pin_memory=True, drop_last=True,
65 | collate_fn=dataset.collate)
66 | num_classes = len(dataset.names)
67 |
68 | prior_box = {
69 | 'strides': [8, 16, 32, 64, 128],
70 | 'sizes': [3] * 5,
71 | 'aspects': [[1.5, 3, 5, 8]] * 5,
72 | 'scales': [[2 ** 0, 2 ** (1 / 3), 2 ** (2 / 3)]] * 5,
73 | }
74 |
75 | cfg = {
76 | 'prior_box': prior_box,
77 | 'num_classes': num_classes,
78 | 'extra': 2,
79 | }
80 |
81 | model = RDD(backbone(fetch_feature=True), cfg)
82 | model.build_pipe(shape=[2, 3, image_size, image_size])
83 | if current_step:
84 | model.restore(os.path.join(dir_weight, '%d.pth' % current_step))
85 | else:
86 | model.init()
87 | if len(device_ids) > 1:
88 | model = convert_model(model)
89 | model = CustomDetDataParallel(model, device_ids)
90 | model = model.cuda()
91 | optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
92 | training = True
93 |
94 | while training and current_step < max_step:
95 | tqdm_loader = tqdm.tqdm(loader)
96 | for images, targets, infos in tqdm_loader:
97 | current_step += 1
98 | adjust_lr_multi_step(optimizer, current_step, lr_cfg, warm_up)
99 |
100 | images = images.cuda() / 255
101 | losses = model(images, targets)
102 | loss = sum(losses.values())
103 | loss.backward()
104 | optimizer.step()
105 | optimizer.zero_grad()
106 |
107 | for key, val in list(losses.items()):
108 | losses[key] = val.item()
109 | writer.add_scalar(key, val, global_step=current_step)
110 | writer.flush()
111 | tqdm_loader.set_postfix(losses)
112 | tqdm_loader.set_description(f'<{current_step}/{max_step}>')
113 |
114 | if current_step % save_interval == 0:
115 | save_path = os.path.join(dir_weight, '%d.pth' % current_step)
116 | state_dict = model.state_dict() if len(device_ids) == 1 else model.module.state_dict()
117 | torch.save(state_dict, save_path)
118 | cache_file = os.path.join(dir_weight, '%d.pth' % (current_step - save_interval))
119 | if os.path.exists(cache_file):
120 | os.remove(cache_file)
121 |
122 | if current_step >= max_step:
123 | training = False
124 | writer.close()
125 | break
126 |
127 |
128 | if __name__ == '__main__':
129 |
130 | torch.manual_seed(0)
131 | torch.backends.cudnn.benchmark = True
132 |
133 | device_ids = [0, 1]
134 | torch.cuda.set_device(device_ids[0])
135 | backbone = resnet.resnet101
136 |
137 | dir_dataset = ''
138 | dir_save = ''
139 |
140 | main()
141 |
--------------------------------------------------------------------------------
/run/ucas-aod/evaluate.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : evaluate.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 11:11
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import sys
12 |
13 | sys.path.append('.')
14 |
15 | import os
16 | import tqdm
17 | import torch
18 | import cv2 as cv
19 | import numpy as np
20 |
21 | from torch.utils.data import DataLoader
22 |
23 | from data.aug import ops
24 | from data.dataset import UCAS_AOD
25 |
26 | from model.rdd import RDD
27 | from model.backbone import resnet
28 |
29 | from utils.box.bbox_np import xy42xywha, xywha2xy4
30 | from utils.box.metric import get_det_aps
31 |
32 |
33 | @torch.no_grad()
34 | def main():
35 | global checkpoint
36 | if checkpoint is None:
37 | dir_weight = os.path.join(dir_save, 'weight')
38 | indexes = [int(os.path.splitext(path)[0]) for path in os.listdir(dir_weight)]
39 | current_step = max(indexes)
40 | checkpoint = os.path.join(dir_weight, '%d.pth' % current_step)
41 |
42 | image_size = 768
43 | batch_size = 1
44 | num_workers = 4
45 |
46 | aug = ops.ResizeBase(64)
47 | dataset = UCAS_AOD(dir_dataset, 'test', aug)
48 | loader = DataLoader(dataset, batch_size, num_workers=num_workers, pin_memory=True, collate_fn=dataset.collate)
49 | num_classes = len(dataset.names)
50 |
51 | prior_box = {
52 | 'strides': [8, 16, 32, 64],
53 | 'sizes': [3] * 4,
54 | 'aspects': [[1, 2]] * 4,
55 | 'scales': [[2 ** 0, 2 ** (1 / 3), 2 ** (2 / 3)]] * 4,
56 | 'old_version': old_version
57 | }
58 | conf_thresh = 0.01
59 | nms_thresh = 0.45
60 | cfg = {
61 | 'prior_box': prior_box,
62 | 'num_classes': num_classes,
63 | 'extra': 1,
64 | 'conf_thresh': conf_thresh,
65 | 'nms_thresh': nms_thresh,
66 | }
67 |
68 | model = RDD(backbone(fetch_feature=True), cfg)
69 | model.build_pipe(shape=[2, 3, image_size, image_size])
70 | model.restore(checkpoint)
71 | model.cuda()
72 | model.eval()
73 |
74 | count = 0
75 | gt_list, det_list = [], []
76 | for images, targets, infos in tqdm.tqdm(loader):
77 | images = images.cuda() / 255
78 | rh, rw = images.shape[2:]
79 | dets = model(images)
80 | for target, det, info in zip(targets, dets, infos):
81 | if target:
82 | bboxes = np.stack([xy42xywha(bbox) for bbox in info['objs']['bboxes']])
83 | labels = info['objs']['labels']
84 | gt_list.extend([count, bbox, 1, label] for bbox, label in zip(bboxes, labels))
85 | if det:
86 | ih, iw = info['shape'][:2]
87 | bboxes, scores, labels = list(map(lambda x: x.cpu().numpy(), det))
88 | bboxes = np.stack([xywha2xy4(bbox) for bbox in bboxes])
89 | bboxes_ = bboxes * [iw / rw, ih / rh]
90 | # bboxes = np.stack([xy42xywha(bbox) for bbox in bboxes_])
91 | bboxes = []
92 | for bbox in bboxes_.astype(np.float32):
93 | (x, y), (w, h), a = cv.minAreaRect(bbox)
94 | bboxes.append([x, y, w, h, a])
95 | bboxes = np.array(bboxes)
96 | det_list.extend([count, bbox, score, label] for bbox, score, label in zip(bboxes, scores, labels))
97 | count += 1
98 | APs = get_det_aps(det_list, gt_list, num_classes)
99 | mAP = sum(APs) / len(APs)
100 | print('AP')
101 | for label in range(num_classes):
102 | print(f'{dataset.label2name[label]}: {APs[label]}')
103 | print(f'mAP: {mAP}')
104 |
105 |
106 | if __name__ == '__main__':
107 |
108 | device_id = 0
109 | torch.cuda.set_device(device_id)
110 |
111 | dir_dataset = ''
112 | dir_save = ''
113 |
114 | backbone = resnet.resnet101
115 | checkpoint = None
116 | old_version = False # set True when using the original weights
117 |
118 | main()
119 |
--------------------------------------------------------------------------------
/run/ucas-aod/prepare.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : prepare.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 11:11
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import sys
12 |
13 | sys.path.append('.')
14 |
15 | import os
16 | import json
17 | import cv2 as cv
18 | import numpy as np
19 |
20 | from utils.crop_image import Cropper
21 |
22 |
23 | def txt2json(dir_txt, dir_json, category):
24 | os.makedirs(dir_json, exist_ok=True)
25 | for txt in os.listdir(dir_txt):
26 | objs = []
27 | name = os.path.splitext(txt)[0]
28 | for line in open(os.path.join(dir_txt, txt)).readlines():
29 | bbox = line.strip().split('\t')[:8]
30 | bbox = np.array(bbox, dtype=np.float32).reshape([4, 2])
31 | bbox = cv.boxPoints(cv.minAreaRect(bbox))
32 | bbox = bbox.tolist()
33 | obj = dict()
34 | obj['name'] = category
35 | obj['bbox'] = bbox
36 | objs.append(obj)
37 | if objs:
38 | json.dump(objs, open(os.path.join(dir_json, name + '.json'), 'wt'), indent=2)
39 |
40 |
41 | def main():
42 | # (1)
43 | dir_txt = os.path.join(dir_dataset, 'labelTxt', 'car')
44 | dir_anno_car = os.path.join(dir_dataset, 'annotations', 'car')
45 | txt2json(dir_txt, dir_anno_car, 'car')
46 |
47 | dir_txt = os.path.join(dir_dataset, 'labelTxt', 'plane')
48 | dir_anno_plane = os.path.join(dir_dataset, 'annotations', 'plane')
49 | txt2json(dir_txt, dir_anno_plane, 'plane')
50 |
51 | # (2)
52 | dir_img_car = os.path.join(dir_dataset, 'images', 'car')
53 | dir_img_plane = os.path.join(dir_dataset, 'images', 'plane')
54 | num_car = len(os.listdir(dir_img_car))
55 | num_plane = len(os.listdir(dir_img_plane))
56 | num_test = 400
57 | indexes_test = np.linspace(1, num_car + num_plane, num_test, endpoint=False, dtype=np.int).tolist()
58 |
59 | size = 768
60 | overlap = 0
61 | save_empty = False
62 |
63 | cropper = Cropper(size, overlap)
64 |
65 | pair_train, pair_test = [], []
66 | for category, dir_img, dir_anno in [['car', dir_img_car, dir_anno_car], ['plane', dir_img_plane, dir_anno_plane]]:
67 | pair_train_ = []
68 | for filename in os.listdir(dir_img):
69 | index = int(filename[1: -4])
70 | if index + (0 if category == 'car' else num_car) in indexes_test:
71 | img = os.path.join('images', category, filename)
72 | anno = os.path.join('annotations', category, filename.replace('png', 'json'))
73 | pair_test.append([img, anno])
74 | else:
75 | img = os.path.join(dir_img, filename)
76 | anno = os.path.join(dir_anno, filename.replace('png', 'json'))
77 | pair_train_.append([img, anno])
78 |
79 | out_dir_images = os.path.join(dir_dataset, 'images', f'{category}-crop')
80 | out_dir_annos = os.path.join(dir_dataset, 'annotations', f'{category}-crop')
81 | cropper.crop_batch(pair_train_, out_dir_images, out_dir_annos, save_empty)
82 |
83 | for filename in os.listdir(out_dir_images):
84 | img = os.path.join('images', f'{category}-crop', filename)
85 | anno = os.path.join('annotations', f'{category}-crop', filename.replace('jpg', 'json'))
86 | pair_train.append([img, anno])
87 |
88 | # (3)
89 | out_dir = os.path.join(dir_dataset, 'image-sets')
90 | os.makedirs(out_dir, exist_ok=True)
91 | json.dump(pair_train, open(os.path.join(out_dir, 'train.json'), 'wt'), indent=2)
92 | json.dump(pair_test, open(os.path.join(out_dir, 'test.json'), 'wt'), indent=2)
93 |
94 |
95 | if __name__ == '__main__':
96 |
97 | # directory hierarchy
98 |
99 | # dir_dataset/images/car/P0001.png
100 | # -----------/car/...
101 | # -----------/plane/...
102 |
103 | # dir_dataset/labelTxt/car/P0001.txt
104 | # -------------/car/...
105 | # -------------/plane/...
106 |
107 | # (1) convert annotation files
108 | # (2) crop images
109 | # (3) generate image-set files
110 |
111 | dir_dataset = ''
112 |
113 | main()
114 |
--------------------------------------------------------------------------------
/run/ucas-aod/train.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : train.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 11:11
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import sys
12 |
13 | sys.path.append('.')
14 |
15 | import os
16 | import tqdm
17 | import torch
18 |
19 | from torch import optim
20 | from torch.utils.data import DataLoader
21 | from torch.utils.tensorboard import SummaryWriter
22 |
23 | from data.aug.compose import Compose
24 | from data.aug import ops
25 | from data.dataset import UCAS_AOD
26 |
27 | from model.rdd import RDD
28 | from model.backbone import resnet
29 |
30 | from utils.adjust_lr import adjust_lr_multi_step
31 | from utils.parallel import convert_model, CustomDetDataParallel
32 |
33 |
34 | def main():
35 | dir_weight = os.path.join(dir_save, 'weight')
36 | dir_log = os.path.join(dir_save, 'log')
37 | os.makedirs(dir_weight, exist_ok=True)
38 | writer = SummaryWriter(dir_log)
39 |
40 | indexes = [int(os.path.splitext(path)[0]) for path in os.listdir(dir_weight)]
41 | current_step = max(indexes) if indexes else 0
42 |
43 | image_size = 768
44 | lr = 1e-3
45 | batch_size = 12
46 | num_workers = 4
47 |
48 | max_step = 30000
49 | lr_cfg = [[15000, lr], [24000, lr / 10], [max_step, lr / 50]]
50 | warm_up = [500, lr / 50, lr]
51 | save_interval = 1000
52 |
53 | aug = Compose([
54 | ops.ToFloat(),
55 | ops.PhotometricDistort(),
56 | ops.RandomHFlip(),
57 | ops.RandomVFlip(),
58 | ops.RandomRotate90(),
59 | ops.ResizeJitter([0.8, 1.2]),
60 | ops.PadSquare(),
61 | ops.Resize(image_size),
62 | ])
63 | dataset = UCAS_AOD(dir_dataset, ['train'], aug)
64 | loader = DataLoader(dataset, batch_size, shuffle=True, num_workers=num_workers, pin_memory=True, drop_last=True,
65 | collate_fn=dataset.collate)
66 | num_classes = len(dataset.names)
67 |
68 | prior_box = {
69 | 'strides': [8, 16, 32, 64],
70 | 'sizes': [3] * 4,
71 | 'aspects': [[1, 2]] * 4,
72 | 'scales': [[2 ** 0, 2 ** (1 / 3), 2 ** (2 / 3)]] * 4,
73 | }
74 |
75 | cfg = {
76 | 'prior_box': prior_box,
77 | 'num_classes': num_classes,
78 | 'extra': 1,
79 | }
80 |
81 | model = RDD(backbone(fetch_feature=True), cfg)
82 | model.build_pipe(shape=[2, 3, image_size, image_size])
83 | if current_step:
84 | model.restore(os.path.join(dir_weight, '%d.pth' % current_step))
85 | else:
86 | model.init()
87 | if len(device_ids) > 1:
88 | model = convert_model(model)
89 | model = CustomDetDataParallel(model, device_ids)
90 | model = model.cuda()
91 | optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
92 | training = True
93 |
94 | while training and current_step < max_step:
95 | tqdm_loader = tqdm.tqdm(loader)
96 | for images, targets, infos in tqdm_loader:
97 | current_step += 1
98 | adjust_lr_multi_step(optimizer, current_step, lr_cfg, warm_up)
99 |
100 | images = images.cuda() / 255
101 | losses = model(images, targets)
102 | loss = sum(losses.values())
103 | loss.backward()
104 | optimizer.step()
105 | optimizer.zero_grad()
106 |
107 | for key, val in list(losses.items()):
108 | losses[key] = val.item()
109 | writer.add_scalar(key, val, global_step=current_step)
110 | writer.flush()
111 | tqdm_loader.set_postfix(losses)
112 | tqdm_loader.set_description(f'<{current_step}/{max_step}>')
113 |
114 | if current_step % save_interval == 0:
115 | save_path = os.path.join(dir_weight, '%d.pth' % current_step)
116 | state_dict = model.state_dict() if len(device_ids) == 1 else model.module.state_dict()
117 | torch.save(state_dict, save_path)
118 | cache_file = os.path.join(dir_weight, '%d.pth' % (current_step - save_interval))
119 | if os.path.exists(cache_file):
120 | os.remove(cache_file)
121 |
122 | if current_step >= max_step:
123 | training = False
124 | writer.close()
125 | break
126 |
127 |
128 | if __name__ == '__main__':
129 |
130 | torch.manual_seed(0)
131 | torch.backends.cudnn.benchmark = True
132 |
133 | device_ids = [0, 1]
134 | torch.cuda.set_device(device_ids[0])
135 | backbone = resnet.resnet101
136 |
137 | dir_dataset = ''
138 | dir_save = ''
139 |
140 | main()
141 |
--------------------------------------------------------------------------------
/utils/adjust_lr.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | def adjust_lr_multi_step(optimizer, step, cfg, warm_up=None):
4 | for param_group in optimizer.param_groups:
5 | if warm_up is not None and step <= warm_up[0]:
6 | param_group['lr'] = warm_up[1] + step / warm_up[0] * (warm_up[2] - warm_up[1])
7 | else:
8 | for s, lr in cfg:
9 | if s is None or step <= s:
10 | param_group['lr'] = lr
11 | break
12 |
--------------------------------------------------------------------------------
/utils/box/bbox.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : bbox.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 11:08
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import torch
12 |
13 | from torchvision.ops.boxes import nms, batched_nms, box_iou
14 |
15 |
16 | def bbox_switch(bbox, in_type, out_type): # 'xyxy', 'xywh'
17 | if in_type == 'xyxy' and out_type == 'xywh':
18 | bbox = torch.cat([(bbox[..., 0: 2] + bbox[..., 2: 4]) / 2, bbox[..., 2: 4] - bbox[..., 0: 2]], dim=-1)
19 | elif in_type == 'xywh' and out_type == 'xyxy':
20 | bbox = torch.cat([bbox[..., 0: 2] - bbox[..., 2: 4] / 2, bbox[..., 0: 2] + bbox[..., 2: 4] / 2], dim=-1)
21 | return bbox
22 |
23 |
24 | def bbox_iou(bbox1, bbox2, bbox_type='xyxy'): # nx4, mx4 -> nxm
25 | bbox1 = bbox_switch(bbox1, bbox_type, 'xyxy')
26 | bbox2 = bbox_switch(bbox2, bbox_type, 'xyxy')
27 | return box_iou(bbox1, bbox2)
28 |
29 |
30 | def bbox_nms(bboxes, scores, iou_thresh):
31 | return nms(bboxes, scores, iou_thresh)
32 |
33 |
34 | def bbox_batched_nms(bboxes, scores, labels, iou_thresh):
35 | return batched_nms(bboxes, scores, labels, iou_thresh)
36 |
37 |
38 | def encode(gt_bbox, det_bbox, anchor, variance):
39 | xy = (gt_bbox[..., 0: 2] - anchor[..., 0: 2]) / anchor[..., 2: 4] / variance[0]
40 | wh = torch.log(gt_bbox[..., 2: 4] / anchor[..., 2: 4]) / variance[1]
41 | a = gt_bbox[..., [4]] / 45 / variance[2]
42 | gt_bbox = torch.cat([xy, wh, a], dim=-1)
43 | det_bbox = torch.cat([det_bbox[..., :4], torch.tanh(det_bbox[..., [4]]) / variance[2]], dim=-1)
44 | return gt_bbox, det_bbox
45 |
46 |
47 | def decode(det_bbox, anchor, variance):
48 | xy = det_bbox[..., 0: 2] * variance[0] * anchor[..., 2: 4] + anchor[..., 0: 2]
49 | wh = torch.exp(det_bbox[..., 2: 4] * variance[1]) * anchor[..., 2: 4]
50 | a = torch.tanh(det_bbox[..., [4]]) * 45
51 | return torch.cat([xy, wh, a], dim=-1)
52 |
--------------------------------------------------------------------------------
/utils/box/bbox_np.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : bbox_np.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 11:08
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import numpy as np
12 |
13 |
14 | def bbox_switch(bbox, in_type, out_type): # 'xyxy', 'xywh'
15 | if in_type == 'xyxy' and out_type == 'xywh':
16 | bbox = np.concatenate([(bbox[..., 0: 2] + bbox[..., 2: 4]) / 2, bbox[..., 2: 4] - bbox[..., 0: 2]], axis=-1)
17 | elif in_type == 'xywh' and out_type == 'xyxy':
18 | bbox = np.concatenate([bbox[..., 0: 2] - bbox[..., 2: 4] / 2, bbox[..., 0: 2] + bbox[..., 2: 4] / 2], axis=-1)
19 | return bbox
20 |
21 |
22 | def xywha2xy4(xywha): # a represents the angle(degree), clockwise, a=0 along the X axis
23 | x, y, w, h, a = xywha
24 | corner = np.array([[-w / 2, -h / 2], [w / 2, -h / 2], [w / 2, h / 2], [-w / 2, h / 2]])
25 | a = np.deg2rad(a)
26 | transform = np.array([[np.cos(a), -np.sin(a)], [np.sin(a), np.cos(a)]])
27 | return transform.dot(corner.T).T + [x, y]
28 |
29 |
30 | def xy42xywha(xy4, flag=0): # bbox(4x2) represents a rectangle
31 | # flag=0, 0 <= a < 180
32 | # flag=1, 0 <= a < 180, w >= h
33 | # flag=2, -45 <= a < 45
34 | x, y = np.mean(xy4, axis=0)
35 | diff01 = xy4[0] - xy4[1]
36 | diff03 = xy4[0] - xy4[3]
37 | w = np.sqrt(np.square(diff01).sum())
38 | h = np.sqrt(np.square(diff03).sum())
39 | if w >= h:
40 | a = np.rad2deg(np.arctan2(diff01[1], diff01[0]))
41 | else:
42 | a = np.rad2deg(np.arctan2(diff03[1], diff03[0])) + 90
43 | if flag > 0:
44 | if w < h:
45 | w, h = h, w
46 | a += 90
47 | a = (a % 180 + 180) % 180
48 | if flag > 1:
49 | if 45 <= a < 135:
50 | w, h = h, w
51 | a -= 90
52 | elif a >= 135:
53 | a -= 180
54 | return np.stack([x, y, w, h, a])
55 |
--------------------------------------------------------------------------------
/utils/box/ext/rbbox_overlap_cpu/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from .rbbox_overlap import rbbox_iou
4 | from .rbbox_overlap import rbbox_iou_1x1
5 | from .rbbox_overlap import rbbox_iou_nxn
6 | from .rbbox_overlap import rbbox_nms
--------------------------------------------------------------------------------
/utils/box/ext/rbbox_overlap_cpu/rbbox_overlap.h:
--------------------------------------------------------------------------------
1 |
2 |
3 | #include
4 |
5 |
6 | # define PI 3.14159265358979323846
7 |
8 |
9 | double trangle_area(double * a, double * b, double * c) {
10 | return ((a[0] - c[0]) * (b[1] - c[1]) - (a[1] - c[1]) * (b[0] - c[0]))/2.0;
11 | }
12 |
13 |
14 | double area(double * int_pts, int num_of_inter) {
15 |
16 | double area = 0;
17 | for(int i = 0;i < num_of_inter - 2;i++) {
18 | area += fabs(trangle_area(int_pts, int_pts + 2 * i + 2, int_pts + 2 * i + 4));
19 | }
20 | return area;
21 | }
22 |
23 |
24 | void reorder_pts(double * int_pts, int num_of_inter) {
25 |
26 | if(num_of_inter > 0) {
27 |
28 | double center[2];
29 |
30 | center[0] = 0.0;
31 | center[1] = 0.0;
32 |
33 | for(int i = 0;i < num_of_inter;i++) {
34 | center[0] += int_pts[2 * i];
35 | center[1] += int_pts[2 * i + 1];
36 | }
37 | center[0] /= num_of_inter;
38 | center[1] /= num_of_inter;
39 |
40 | double vs[16];
41 | double v[2];
42 | double d;
43 | for(int i = 0;i < num_of_inter;i++) {
44 | v[0] = int_pts[2 * i]-center[0];
45 | v[1] = int_pts[2 * i + 1]-center[1];
46 | d = sqrt(v[0] * v[0] + v[1] * v[1]);
47 | v[0] = v[0] / d;
48 | v[1] = v[1] / d;
49 | if(v[1] < 0) {
50 | v[0]= - 2 - v[0];
51 | }
52 | vs[i] = v[0];
53 | }
54 |
55 | double temp,tx,ty;
56 | int j;
57 | for(int i=1;ivs[i]){
59 | temp = vs[i];
60 | tx = int_pts[2*i];
61 | ty = int_pts[2*i+1];
62 | j=i;
63 | while(j>0&&vs[j-1]>temp){
64 | vs[j] = vs[j-1];
65 | int_pts[j*2] = int_pts[j*2-2];
66 | int_pts[j*2+1] = int_pts[j*2-1];
67 | j--;
68 | }
69 | vs[j] = temp;
70 | int_pts[j*2] = tx;
71 | int_pts[j*2+1] = ty;
72 | }
73 | }
74 | }
75 | }
76 |
77 | bool inter2line(double * pts1, double *pts2, int i, int j, double * temp_pts) {
78 |
79 | double a[2];
80 | double b[2];
81 | double c[2];
82 | double d[2];
83 |
84 | double area_abc, area_abd, area_cda, area_cdb;
85 |
86 | a[0] = pts1[2 * i];
87 | a[1] = pts1[2 * i + 1];
88 |
89 | b[0] = pts1[2 * ((i + 1) % 4)];
90 | b[1] = pts1[2 * ((i + 1) % 4) + 1];
91 |
92 | c[0] = pts2[2 * j];
93 | c[1] = pts2[2 * j + 1];
94 |
95 | d[0] = pts2[2 * ((j + 1) % 4)];
96 | d[1] = pts2[2 * ((j + 1) % 4) + 1];
97 |
98 | area_abc = trangle_area(a, b, c);
99 | area_abd = trangle_area(a, b, d);
100 |
101 | if(area_abc * area_abd >= 0) {
102 | return false;
103 | }
104 |
105 | area_cda = trangle_area(c, d, a);
106 | area_cdb = area_cda + area_abc - area_abd;
107 |
108 | if (area_cda * area_cdb >= 0) {
109 | return false;
110 | }
111 | double t = area_cda / (area_abd - area_abc);
112 |
113 | double dx = t * (b[0] - a[0]);
114 | double dy = t * (b[1] - a[1]);
115 | temp_pts[0] = a[0] + dx;
116 | temp_pts[1] = a[1] + dy;
117 |
118 | return true;
119 | }
120 |
121 | bool in_rect(double pt_x, double pt_y, double * pts) {
122 |
123 | double ab[2];
124 | double ad[2];
125 | double ap[2];
126 |
127 | double abab;
128 | double abap;
129 | double adad;
130 | double adap;
131 |
132 | ab[0] = pts[2] - pts[0];
133 | ab[1] = pts[3] - pts[1];
134 |
135 | ad[0] = pts[6] - pts[0];
136 | ad[1] = pts[7] - pts[1];
137 |
138 | ap[0] = pt_x - pts[0];
139 | ap[1] = pt_y - pts[1];
140 |
141 | abab = ab[0] * ab[0] + ab[1] * ab[1];
142 | abap = ab[0] * ap[0] + ab[1] * ap[1];
143 | adad = ad[0] * ad[0] + ad[1] * ad[1];
144 | adap = ad[0] * ap[0] + ad[1] * ap[1];
145 |
146 | return abab >= abap && abap >= 0 && adad >= adap && adap >= 0;
147 | }
148 |
149 | int inter_pts(double * pts1, double * pts2, double * int_pts) {
150 |
151 | int num_of_inter = 0;
152 |
153 | for(int i = 0;i < 4;i++) {
154 | if(in_rect(pts1[2 * i], pts1[2 * i + 1], pts2)) {
155 | int_pts[num_of_inter * 2] = pts1[2 * i];
156 | int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1];
157 | num_of_inter++;
158 | }
159 | if(in_rect(pts2[2 * i], pts2[2 * i + 1], pts1)) {
160 | int_pts[num_of_inter * 2] = pts2[2 * i];
161 | int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1];
162 | num_of_inter++;
163 | }
164 | }
165 |
166 | double temp_pts[2];
167 |
168 | for(int i = 0;i < 4;i++) {
169 | for(int j = 0;j < 4;j++) {
170 | bool has_pts = inter2line(pts1, pts2, i, j, temp_pts);
171 | if(has_pts) {
172 | int_pts[num_of_inter * 2] = temp_pts[0];
173 | int_pts[num_of_inter * 2 + 1] = temp_pts[1];
174 | num_of_inter++;
175 | }
176 | }
177 | }
178 |
179 |
180 | return num_of_inter;
181 | }
182 |
183 |
184 | void convert_region(double * pts , double * region) {
185 |
186 | double angle = region[4];
187 | double a_cos = cos(angle/180.0*PI);
188 | double a_sin = sin(angle/180.0*PI);
189 |
190 | double ctr_x = region[0];
191 | double ctr_y = region[1];
192 |
193 | double w = region[2];
194 | double h = region[3];
195 |
196 | double pts_x[4];
197 | double pts_y[4];
198 |
199 | pts_x[0] = - w / 2;
200 | pts_x[1] = w / 2;
201 | pts_x[2] = w / 2;
202 | pts_x[3] = - w / 2;
203 |
204 | pts_y[0] = - h / 2;
205 | pts_y[1] = - h / 2;
206 | pts_y[2] = h / 2;
207 | pts_y[3] = h / 2;
208 |
209 | for(int i = 0;i < 4;i++) {
210 | pts[7 - 2 * i - 1] = a_cos * pts_x[i] - a_sin * pts_y[i] + ctr_x;
211 | pts[7 - 2 * i] = a_sin * pts_x[i] + a_cos * pts_y[i] + ctr_y;
212 |
213 | }
214 |
215 | }
216 |
217 |
218 | double inter(double * region1, double * region2) {
219 |
220 | double pts1[8];
221 | double pts2[8];
222 | double int_pts[16];
223 | int num_of_inter;
224 |
225 | convert_region(pts1, region1);
226 | convert_region(pts2, region2);
227 |
228 | num_of_inter = inter_pts(pts1, pts2, int_pts);
229 |
230 | reorder_pts(int_pts, num_of_inter);
231 |
232 | return area(int_pts, num_of_inter);
233 |
234 |
235 | }
236 |
237 | double RotateIoU(double * region1, double * region2) {
238 |
239 | double area1 = region1[2] * region1[3];
240 | double area2 = region2[2] * region2[3];
241 | double area_inter = inter(region1, region2);
242 |
243 | return area_inter / (area1 + area2 - area_inter);
244 |
245 | }
246 |
247 |
248 | void RotateIoU_1x1(double * region1, double * region2, int n, double * ret){
249 | for ( int i = 0; i < n; i++ ){
250 | ret[i] = RotateIoU(region1 + i * 5, region2 + i * 5);
251 | }
252 | }
253 |
254 |
255 | void RotateIoU_nxn(double * region1, double * region2, int n1, int n2, double * ret){
256 | for ( int i = 0; i < n1; i++ ){
257 | for ( int j = 0; j < n2; j++ ){
258 | ret[i * n2 + j] = RotateIoU(region1 + i * 5, region2 + j * 5);
259 | }
260 | }
261 | }
262 |
263 | void RotateNMS(double * bboxes, int n, double thresh, int * keeps){
264 | int i, flag;
265 | n--;
266 | while(n > 0){
267 | flag = 0;
268 | for ( i = 0; i < n; i++ ){
269 | if (keeps[i]){
270 | if (RotateIoU(bboxes + n * 5, bboxes + i * 5) > thresh){
271 | keeps[i] = 0;
272 | }
273 | else{
274 | flag = i;
275 | }
276 | }
277 | }
278 | n = flag;
279 | }
280 | }
--------------------------------------------------------------------------------
/utils/box/ext/rbbox_overlap_cpu/rbbox_overlap.pyx:
--------------------------------------------------------------------------------
1 |
2 |
3 | # distutils: language = c++
4 |
5 |
6 | import numpy as np
7 | cimport numpy as np
8 |
9 |
10 | assert sizeof(int) == sizeof(np.int32_t)
11 |
12 |
13 | cdef extern from 'rbbox_overlap.h':
14 | cdef float RotateIoU(np.float64_t * region1, np.float64_t * region2)
15 | cdef void RotateIoU_1x1(np.float64_t * region1, np.float64_t * region2, int n, np.float64_t * ret)
16 | cdef void RotateIoU_nxn(np.float64_t * region1, np.float64_t * region2, int n1, int n2, np.float64_t * ret)
17 | cdef void RotateNMS(np.float64_t * bboxes, int n, float thresh, np.int32_t * keeps)
18 |
19 |
20 | def rbbox_iou(np.ndarray[np.float64_t, ndim=1] a, np.ndarray[np.float64_t, ndim=1] b):
21 | return RotateIoU(&a[0], &b[0])
22 |
23 |
24 | def rbbox_iou_1x1(np.ndarray[np.float64_t, ndim=2] a, np.ndarray[np.float64_t, ndim=2] b):
25 | cdef int n1 = a.shape[0]
26 | cdef int n2 = b.shape[0]
27 | assert n1 == n2
28 | cdef np.ndarray[np.float64_t, ndim=1] ret = np.zeros([n1], dtype=np.float64)
29 | RotateIoU_1x1(&a[0, 0], &b[0, 0], n1, &ret[0])
30 | return ret
31 |
32 |
33 | def rbbox_iou_nxn(np.ndarray[np.float64_t, ndim=2] a, np.ndarray[np.float64_t, ndim=2] b):
34 | cdef int n1 = a.shape[0]
35 | cdef int n2 = b.shape[0]
36 | cdef np.ndarray[np.float64_t, ndim=2] ret = np.zeros([n1, n2], dtype=np.float64)
37 | RotateIoU_nxn(&a[0, 0], &b[0, 0], n1, n2, &ret[0, 0])
38 | return ret
39 |
40 |
41 | def rbbox_nms(np.ndarray[np.float64_t, ndim=2] boxes, np.ndarray[np.float64_t, ndim=1] scores, float thresh):
42 | cdef int n = boxes.shape[0]
43 | cdef np.ndarray[np.int32_t, ndim=1] keeps = np.ones([n], dtype=np.int32)
44 | cdef np.ndarray[np.int32_t, ndim=1] indexes = np.argsort(scores).astype(np.int32)
45 | boxes = boxes[indexes]
46 | RotateNMS(&boxes[0, 0], n, thresh, &keeps[0])
47 | keeps = indexes[keeps.astype(np.bool)]
48 | if len(keeps) > 1:
49 | keeps = np.ascontiguousarray(keeps[::-1])
50 | return keeps
51 |
52 |
53 | # python setup.py build_ext --inplace
54 |
55 | # iou.cpp(2961): error C2664: 'void RotateNMS(float *,int,float,int *)': cannot convert argument 4 from '__pyx_t_5numpy_int32_t *' to 'int *'
56 | #
57 | # go to line(2961) in the generated file in iou.cpp
58 | # Modify corresponding __pyx_t_5numpy_int32_t to int
59 |
60 |
61 |
--------------------------------------------------------------------------------
/utils/box/ext/rbbox_overlap_cpu/setup.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import numpy as np
4 |
5 | from distutils.core import setup
6 | from Cython.Build import cythonize
7 |
8 |
9 | try:
10 | numpy_include = np.get_include()
11 | except AttributeError:
12 | numpy_include = np.get_numpy_include()
13 |
14 |
15 | setup(
16 | ext_modules=cythonize("rbbox_overlap.pyx"),
17 | include_dirs=[numpy_include],
18 | )
19 |
--------------------------------------------------------------------------------
/utils/box/ext/rbbox_overlap_gpu/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from .rbbox_overlap import rbbox_overlaps as rbbox_iou
4 | from .rbbox_overlap import rotate_gpu_nms as rbbox_nms
--------------------------------------------------------------------------------
/utils/box/ext/rbbox_overlap_gpu/rbbox_overlap.hpp:
--------------------------------------------------------------------------------
1 |
2 |
3 | void _overlaps(float* overlaps,const float* boxes,const float* query_boxes, int n, int k, int device_id);
4 |
5 |
6 | void _rotate_nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
7 | int boxes_dim, float nms_overlap_thresh, int device_id);
8 |
--------------------------------------------------------------------------------
/utils/box/ext/rbbox_overlap_gpu/rbbox_overlap.pyx:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | cimport numpy as np
3 |
4 | assert sizeof(int) == sizeof(np.int32_t)
5 |
6 | cdef extern from "rbbox_overlap.hpp":
7 | void _rotate_nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
8 | void _overlaps(np.float32_t*, np.float32_t*, np.float32_t*, int, int, int)
9 |
10 |
11 | def rbbox_overlaps (np.ndarray[np.float32_t, ndim=2] boxes, np.ndarray[np.float32_t, ndim=2] query_boxes, np.int32_t device_id=0):
12 | # boxes: [x, y, w, h, theta]
13 | cdef int N = boxes.shape[0]
14 | cdef int K = query_boxes.shape[0]
15 | cdef np.ndarray[np.float32_t, ndim=2] overlaps = np.zeros((N, K), dtype = np.float32)
16 | _overlaps(&overlaps[0, 0], &boxes[0, 0], &query_boxes[0, 0], N, K, device_id)
17 | return overlaps
18 |
19 |
20 | def rotate_gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float_t thresh, np.int32_t device_id=0):
21 | cdef int boxes_num = dets.shape[0]
22 | cdef int boxes_dim = dets.shape[1]
23 | cdef int num_out
24 | cdef np.ndarray[np.int32_t, ndim=1] \
25 | keep = np.zeros(boxes_num, dtype=np.int32)
26 | cdef np.ndarray[np.float32_t, ndim=1] \
27 | scores = dets[:, 5]
28 | cdef np.ndarray[np.int_t, ndim=1] \
29 | order = scores.argsort()[::-1]
30 | cdef np.ndarray[np.float32_t, ndim=2] \
31 | sorted_dets = dets[order, :]
32 | thresh = thresh
33 | _rotate_nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
34 | keep = keep[:num_out]
35 | return order[keep]
36 |
--------------------------------------------------------------------------------
/utils/box/ext/rbbox_overlap_gpu/rbbox_overlap_kernel.cu:
--------------------------------------------------------------------------------
1 |
2 | #include "rbbox_overlap.hpp"
3 | #include
4 | #include
5 | #include
6 |
7 | #define CUDA_CHECK(condition) \
8 | /* Code block avoids redefinition of cudaError_t error */ \
9 | do { \
10 | cudaError_t error = condition; \
11 | if (error != cudaSuccess) { \
12 | std::cout << cudaGetErrorString(error) << std::endl; \
13 | } \
14 | } while (0)
15 |
16 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
17 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
18 |
19 | __device__ inline float trangle_area(float * a, float * b, float * c) {
20 | return ((a[0] - c[0]) * (b[1] - c[1]) - (a[1] - c[1]) * (b[0] - c[0]))/2.0;
21 | }
22 |
23 | __device__ inline float area(float * int_pts, int num_of_inter) {
24 |
25 | float area = 0.0;
26 | for(int i = 0;i < num_of_inter - 2;i++) {
27 | area += fabs(trangle_area(int_pts, int_pts + 2 * i + 2, int_pts + 2 * i + 4));
28 | }
29 | return area;
30 | }
31 |
32 | __device__ inline void reorder_pts(float * int_pts, int num_of_inter) {
33 |
34 |
35 |
36 | if(num_of_inter > 0) {
37 |
38 | float center[2];
39 |
40 | center[0] = 0.0;
41 | center[1] = 0.0;
42 |
43 | for(int i = 0;i < num_of_inter;i++) {
44 | center[0] += int_pts[2 * i];
45 | center[1] += int_pts[2 * i + 1];
46 | }
47 | center[0] /= num_of_inter;
48 | center[1] /= num_of_inter;
49 |
50 | float vs[16];
51 | float v[2];
52 | float d;
53 | for(int i = 0;i < num_of_inter;i++) {
54 | v[0] = int_pts[2 * i]-center[0];
55 | v[1] = int_pts[2 * i + 1]-center[1];
56 | d = sqrt(v[0] * v[0] + v[1] * v[1]);
57 | v[0] = v[0] / d;
58 | v[1] = v[1] / d;
59 | if(v[1] < 0) {
60 | v[0]= - 2 - v[0];
61 | }
62 | vs[i] = v[0];
63 | }
64 |
65 | float temp,tx,ty;
66 | int j;
67 | for(int i=1;ivs[i]){
69 | temp = vs[i];
70 | tx = int_pts[2*i];
71 | ty = int_pts[2*i+1];
72 | j=i;
73 | while(j>0&&vs[j-1]>temp){
74 | vs[j] = vs[j-1];
75 | int_pts[j*2] = int_pts[j*2-2];
76 | int_pts[j*2+1] = int_pts[j*2-1];
77 | j--;
78 | }
79 | vs[j] = temp;
80 | int_pts[j*2] = tx;
81 | int_pts[j*2+1] = ty;
82 | }
83 | }
84 | }
85 |
86 | }
87 | __device__ inline bool inter2line(float * pts1, float *pts2, int i, int j, float * temp_pts) {
88 |
89 | float a[2];
90 | float b[2];
91 | float c[2];
92 | float d[2];
93 |
94 | float area_abc, area_abd, area_cda, area_cdb;
95 |
96 | a[0] = pts1[2 * i];
97 | a[1] = pts1[2 * i + 1];
98 |
99 | b[0] = pts1[2 * ((i + 1) % 4)];
100 | b[1] = pts1[2 * ((i + 1) % 4) + 1];
101 |
102 | c[0] = pts2[2 * j];
103 | c[1] = pts2[2 * j + 1];
104 |
105 | d[0] = pts2[2 * ((j + 1) % 4)];
106 | d[1] = pts2[2 * ((j + 1) % 4) + 1];
107 |
108 | area_abc = trangle_area(a, b, c);
109 | area_abd = trangle_area(a, b, d);
110 |
111 | if(area_abc * area_abd >= 0) {
112 | return false;
113 | }
114 |
115 | area_cda = trangle_area(c, d, a);
116 | area_cdb = area_cda + area_abc - area_abd;
117 |
118 | if (area_cda * area_cdb >= 0) {
119 | return false;
120 | }
121 | float t = area_cda / (area_abd - area_abc);
122 |
123 | float dx = t * (b[0] - a[0]);
124 | float dy = t * (b[1] - a[1]);
125 | temp_pts[0] = a[0] + dx;
126 | temp_pts[1] = a[1] + dy;
127 |
128 | return true;
129 | }
130 |
131 | __device__ inline bool in_rect(float pt_x, float pt_y, float * pts) {
132 |
133 | float ab[2];
134 | float ad[2];
135 | float ap[2];
136 |
137 | float abab;
138 | float abap;
139 | float adad;
140 | float adap;
141 |
142 | ab[0] = pts[2] - pts[0];
143 | ab[1] = pts[3] - pts[1];
144 |
145 | ad[0] = pts[6] - pts[0];
146 | ad[1] = pts[7] - pts[1];
147 |
148 | ap[0] = pt_x - pts[0];
149 | ap[1] = pt_y - pts[1];
150 |
151 | abab = ab[0] * ab[0] + ab[1] * ab[1];
152 | abap = ab[0] * ap[0] + ab[1] * ap[1];
153 | adad = ad[0] * ad[0] + ad[1] * ad[1];
154 | adap = ad[0] * ap[0] + ad[1] * ap[1];
155 |
156 | return abab >= abap and abap >= 0 and adad >= adap and adap >= 0;
157 | }
158 |
159 | __device__ inline int inter_pts(float * pts1, float * pts2, float * int_pts) {
160 |
161 | int num_of_inter = 0;
162 |
163 | for(int i = 0;i < 4;i++) {
164 | if(in_rect(pts1[2 * i], pts1[2 * i + 1], pts2)) {
165 | int_pts[num_of_inter * 2] = pts1[2 * i];
166 | int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1];
167 | num_of_inter++;
168 | }
169 | if(in_rect(pts2[2 * i], pts2[2 * i + 1], pts1)) {
170 | int_pts[num_of_inter * 2] = pts2[2 * i];
171 | int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1];
172 | num_of_inter++;
173 | }
174 | }
175 |
176 | float temp_pts[2];
177 |
178 | for(int i = 0;i < 4;i++) {
179 | for(int j = 0;j < 4;j++) {
180 | bool has_pts = inter2line(pts1, pts2, i, j, temp_pts);
181 | if(has_pts) {
182 | int_pts[num_of_inter * 2] = temp_pts[0];
183 | int_pts[num_of_inter * 2 + 1] = temp_pts[1];
184 | num_of_inter++;
185 | }
186 | }
187 | }
188 |
189 |
190 | return num_of_inter;
191 | }
192 |
193 | __device__ inline void convert_region(float * pts , float const * const region) {
194 |
195 | float angle = region[4];
196 | float a_cos = cos(angle/180.0*3.1415926535);
197 | float a_sin = sin(angle/180.0*3.1415926535);
198 |
199 | float ctr_x = region[0];
200 | float ctr_y = region[1];
201 |
202 | float w = region[2];
203 | float h = region[3];
204 |
205 | float pts_x[4];
206 | float pts_y[4];
207 |
208 | pts_x[0] = - w / 2;
209 | pts_x[1] = w / 2;
210 | pts_x[2] = w / 2;
211 | pts_x[3] = - w / 2;
212 |
213 | pts_y[0] = - h / 2;
214 | pts_y[1] = - h / 2;
215 | pts_y[2] = h / 2;
216 | pts_y[3] = h / 2;
217 |
218 | for(int i = 0;i < 4;i++) {
219 | pts[7 - 2 * i - 1] = a_cos * pts_x[i] - a_sin * pts_y[i] + ctr_x;
220 | pts[7 - 2 * i] = a_sin * pts_x[i] + a_cos * pts_y[i] + ctr_y;
221 |
222 | }
223 |
224 | }
225 |
226 |
227 | __device__ inline float inter(float const * const region1, float const * const region2) {
228 |
229 | float pts1[8];
230 | float pts2[8];
231 | float int_pts[16];
232 | int num_of_inter;
233 |
234 | convert_region(pts1, region1);
235 | convert_region(pts2, region2);
236 |
237 | num_of_inter = inter_pts(pts1, pts2, int_pts);
238 |
239 | reorder_pts(int_pts, num_of_inter);
240 |
241 | return area(int_pts, num_of_inter);
242 |
243 |
244 | }
245 |
246 | __device__ inline float devRotateIoU(float const * const region1, float const * const region2) {
247 |
248 | float area1 = region1[2] * region1[3];
249 | float area2 = region2[2] * region2[3];
250 | float area_inter = inter(region1, region2);
251 |
252 | return area_inter / (area1 + area2 - area_inter);
253 |
254 |
255 | }
256 |
257 | __global__ void overlaps_kernel(const int N, const int K, const float* dev_boxes,
258 | const float * dev_query_boxes, float* dev_overlaps) {
259 |
260 | const int col_start = blockIdx.y;
261 | const int row_start = blockIdx.x;
262 |
263 | const int row_size =
264 | min(N - row_start * threadsPerBlock, threadsPerBlock);
265 | const int col_size =
266 | min(K - col_start * threadsPerBlock, threadsPerBlock);
267 |
268 |
269 | __shared__ float block_boxes[threadsPerBlock * 5];
270 | __shared__ float block_query_boxes[threadsPerBlock * 5];
271 | if (threadIdx.x < col_size) {
272 | block_query_boxes[threadIdx.x * 5 + 0] =
273 | dev_query_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
274 | block_query_boxes[threadIdx.x * 5 + 1] =
275 | dev_query_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
276 | block_query_boxes[threadIdx.x * 5 + 2] =
277 | dev_query_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
278 | block_query_boxes[threadIdx.x * 5 + 3] =
279 | dev_query_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
280 | block_query_boxes[threadIdx.x * 5 + 4] =
281 | dev_query_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
282 | }
283 |
284 | if (threadIdx.x < row_size) {
285 | block_boxes[threadIdx.x * 5 + 0] =
286 | dev_boxes[(threadsPerBlock * row_start + threadIdx.x) * 5 + 0];
287 | block_boxes[threadIdx.x * 5 + 1] =
288 | dev_boxes[(threadsPerBlock * row_start + threadIdx.x) * 5 + 1];
289 | block_boxes[threadIdx.x * 5 + 2] =
290 | dev_boxes[(threadsPerBlock * row_start + threadIdx.x) * 5 + 2];
291 | block_boxes[threadIdx.x * 5 + 3] =
292 | dev_boxes[(threadsPerBlock * row_start + threadIdx.x) * 5 + 3];
293 | block_boxes[threadIdx.x * 5 + 4] =
294 | dev_boxes[(threadsPerBlock * row_start + threadIdx.x) * 5 + 4];
295 | }
296 |
297 | __syncthreads();
298 |
299 | if (threadIdx.x < row_size) {
300 |
301 | for(int i = 0;i < col_size; i++) {
302 | int offset = row_start*threadsPerBlock * K + col_start*threadsPerBlock + threadIdx.x*K+ i ;
303 | dev_overlaps[offset] = devRotateIoU(block_boxes + threadIdx.x * 5, block_query_boxes + i * 5);
304 | }
305 |
306 | }
307 | }
308 |
309 |
310 | __global__ void rotate_nms_kernel(const int n_boxes, const float nms_overlap_thresh,
311 | const float *dev_boxes, unsigned long long *dev_mask) {
312 | const int row_start = blockIdx.y;
313 | const int col_start = blockIdx.x;
314 |
315 | // if (row_start > col_start) return;
316 |
317 | const int row_size =
318 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
319 | const int col_size =
320 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
321 |
322 | __shared__ float block_boxes[threadsPerBlock * 6];
323 | if (threadIdx.x < col_size) {
324 | block_boxes[threadIdx.x * 6 + 0] =
325 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 0];
326 | block_boxes[threadIdx.x * 6 + 1] =
327 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 1];
328 | block_boxes[threadIdx.x * 6 + 2] =
329 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 2];
330 | block_boxes[threadIdx.x * 6 + 3] =
331 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 3];
332 | block_boxes[threadIdx.x * 6 + 4] =
333 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 4];
334 | block_boxes[threadIdx.x * 6 + 5] =
335 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 5];
336 | }
337 | __syncthreads();
338 |
339 | if (threadIdx.x < row_size) {
340 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
341 | const float *cur_box = dev_boxes + cur_box_idx * 6;
342 | int i = 0;
343 | unsigned long long t = 0;
344 | int start = 0;
345 | if (row_start == col_start) {
346 | start = threadIdx.x + 1;
347 | }
348 | for (i = start; i < col_size; i++) {
349 | if (devRotateIoU(cur_box, block_boxes + i * 6) > nms_overlap_thresh) {
350 | t |= 1ULL << i;
351 | }
352 | }
353 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
354 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
355 | }
356 | }
357 |
358 |
359 | void _set_device(int device_id) {
360 | int current_device;
361 | CUDA_CHECK(cudaGetDevice(¤t_device));
362 | if (current_device == device_id) {
363 | return;
364 | }
365 | // The call to cudaSetDevice must come before any calls to Get, which
366 | // may perform initialization using the GPU.
367 | CUDA_CHECK(cudaSetDevice(device_id));
368 | }
369 |
370 |
371 | void _overlaps(float* overlaps,const float* boxes,const float* query_boxes, int n, int k, int device_id) {
372 |
373 | _set_device(device_id);
374 |
375 | float* overlaps_dev = NULL;
376 | float* boxes_dev = NULL;
377 | float* query_boxes_dev = NULL;
378 |
379 |
380 | CUDA_CHECK(cudaMalloc(&boxes_dev,
381 | n * 5 * sizeof(float)));
382 |
383 | CUDA_CHECK(cudaMemcpy(boxes_dev,
384 | boxes,
385 | n * 5 * sizeof(float),
386 | cudaMemcpyHostToDevice));
387 |
388 | CUDA_CHECK(cudaMalloc(&query_boxes_dev,
389 | k * 5 * sizeof(float)));
390 |
391 | CUDA_CHECK(cudaMemcpy(query_boxes_dev,
392 | query_boxes,
393 | k * 5 * sizeof(float),
394 | cudaMemcpyHostToDevice));
395 |
396 | CUDA_CHECK(cudaMalloc(&overlaps_dev,
397 | n * k * sizeof(float)));
398 |
399 | dim3 blocks(DIVUP(n, threadsPerBlock),
400 | DIVUP(k, threadsPerBlock));
401 |
402 | dim3 threads(threadsPerBlock);
403 |
404 | overlaps_kernel<<>>(n, k,
405 | boxes_dev,
406 | query_boxes_dev,
407 | overlaps_dev);
408 |
409 | CUDA_CHECK(cudaMemcpy(overlaps,
410 | overlaps_dev,
411 | n * k * sizeof(float),
412 | cudaMemcpyDeviceToHost));
413 |
414 | CUDA_CHECK(cudaFree(overlaps_dev));
415 | CUDA_CHECK(cudaFree(boxes_dev));
416 | CUDA_CHECK(cudaFree(query_boxes_dev));
417 |
418 | }
419 |
420 |
421 | void _rotate_nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
422 | int boxes_dim, float nms_overlap_thresh, int device_id) {
423 | _set_device(device_id);
424 |
425 | float* boxes_dev = NULL;
426 | unsigned long long* mask_dev = NULL;
427 |
428 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
429 |
430 | CUDA_CHECK(cudaMalloc(&boxes_dev,
431 | boxes_num * boxes_dim * sizeof(float)));
432 | CUDA_CHECK(cudaMemcpy(boxes_dev,
433 | boxes_host,
434 | boxes_num * boxes_dim * sizeof(float),
435 | cudaMemcpyHostToDevice));
436 |
437 | CUDA_CHECK(cudaMalloc(&mask_dev,
438 | boxes_num * col_blocks * sizeof(unsigned long long)));
439 |
440 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
441 | DIVUP(boxes_num, threadsPerBlock));
442 | dim3 threads(threadsPerBlock);
443 | rotate_nms_kernel<<>>(boxes_num,
444 | nms_overlap_thresh,
445 | boxes_dev,
446 | mask_dev);
447 |
448 | std::vector mask_host(boxes_num * col_blocks);
449 | CUDA_CHECK(cudaMemcpy(&mask_host[0],
450 | mask_dev,
451 | sizeof(unsigned long long) * boxes_num * col_blocks,
452 | cudaMemcpyDeviceToHost));
453 |
454 | std::vector remv(col_blocks);
455 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
456 |
457 | int num_to_keep = 0;
458 | for (int i = 0; i < boxes_num; i++) {
459 | int nblock = i / threadsPerBlock;
460 | int inblock = i % threadsPerBlock;
461 |
462 | if (!(remv[nblock] & (1ULL << inblock))) {
463 | keep_out[num_to_keep++] = i;
464 | unsigned long long *p = &mask_host[0] + i * col_blocks;
465 | for (int j = nblock; j < col_blocks; j++) {
466 | remv[j] |= p[j];
467 | }
468 | }
469 | }
470 | *num_out = num_to_keep;
471 |
472 | CUDA_CHECK(cudaFree(boxes_dev));
473 | CUDA_CHECK(cudaFree(mask_dev));
474 | }
475 |
--------------------------------------------------------------------------------
/utils/box/ext/rbbox_overlap_gpu/setup.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import os
9 | from os.path import join as pjoin
10 | from setuptools import setup
11 | from distutils.extension import Extension
12 | from Cython.Distutils import build_ext
13 | import numpy as np
14 |
15 |
16 | def find_in_path(name, path):
17 | "Find a file in a search path"
18 | # Adapted fom
19 | # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
20 | for dir in path.split(os.pathsep):
21 | binpath = pjoin(dir, name)
22 | if os.path.exists(binpath):
23 | return os.path.abspath(binpath)
24 | return None
25 |
26 |
27 | def locate_cuda():
28 | """Locate the CUDA environment on the system
29 |
30 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
31 | and values giving the absolute path to each directory.
32 |
33 | Starts by looking for the CUDAHOME env variable. If not found, everything
34 | is based on finding 'nvcc' in the PATH.
35 | """
36 |
37 | # first check if the CUDAHOME env variable is in use
38 | if 'CUDAHOME' in os.environ:
39 | home = os.environ['CUDAHOME']
40 | nvcc = pjoin(home, 'bin', 'nvcc')
41 | else:
42 | # otherwise, search the PATH for NVCC
43 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
44 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
45 | if nvcc is None:
46 | raise EnvironmentError('The nvcc binary could not be '
47 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
48 | home = os.path.dirname(os.path.dirname(nvcc))
49 |
50 | cudaconfig = {'home':home, 'nvcc':nvcc,
51 | 'include': pjoin(home, 'include'),
52 | 'lib64': pjoin(home, 'lib64')}
53 | for k, v in cudaconfig.items():
54 | if not os.path.exists(v):
55 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
56 |
57 | return cudaconfig
58 | CUDA = locate_cuda()
59 |
60 |
61 | # Obtain the numpy include directory. This logic works across numpy versions.
62 | try:
63 | numpy_include = np.get_include()
64 | except AttributeError:
65 | numpy_include = np.get_numpy_include()
66 |
67 |
68 | def customize_compiler_for_nvcc(self):
69 | """inject deep into distutils to customize how the dispatch
70 | to gcc/nvcc works.
71 |
72 | If you subclass UnixCCompiler, it's not trivial to get your subclass
73 | injected in, and still have the right customizations (i.e.
74 | distutils.sysconfig.customize_compiler) run on it. So instead of going
75 | the OO route, I have this. Note, it's kindof like a wierd functional
76 | subclassing going on."""
77 |
78 | # tell the compiler it can processes .cu
79 | self.src_extensions.append('.cu')
80 |
81 | # save references to the default compiler_so and _comple methods
82 | default_compiler_so = self.compiler_so
83 | super = self._compile
84 |
85 | # now redefine the _compile method. This gets executed for each
86 | # object but distutils doesn't have the ability to change compilers
87 | # based on source extension: we add it.
88 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
89 | if os.path.splitext(src)[1] == '.cu':
90 | # use the cuda for .cu files
91 | self.set_executable('compiler_so', CUDA['nvcc'])
92 | # use only a subset of the extra_postargs, which are 1-1 translated
93 | # from the extra_compile_args in the Extension class
94 | postargs = extra_postargs['nvcc']
95 | else:
96 | postargs = extra_postargs['gcc']
97 |
98 | super(obj, src, ext, cc_args, postargs, pp_opts)
99 | # reset the default compiler_so, which we might have changed for cuda
100 | self.compiler_so = default_compiler_so
101 |
102 | # inject our redefined _compile method into the class
103 | self._compile = _compile
104 |
105 |
106 | # run the customize_compiler
107 | class custom_build_ext(build_ext):
108 | def build_extensions(self):
109 | customize_compiler_for_nvcc(self.compiler)
110 | build_ext.build_extensions(self)
111 |
112 |
113 | ext_modules = [
114 | Extension('rbbox_overlap',
115 | ['rbbox_overlap_kernel.cu', 'rbbox_overlap.pyx'],
116 | library_dirs=[CUDA['lib64']],
117 | libraries=['cudart'],
118 | language='c++',
119 | runtime_library_dirs=[CUDA['lib64']],
120 | # this syntax is specific to this build system
121 | # we're only going to use certain compiler args with nvcc and not with
122 | # gcc the implementation of this trick is in customize_compiler() below
123 | extra_compile_args={'gcc': ["-Wno-unused-function"],
124 | 'nvcc': ['-arch=sm_35',
125 | '--ptxas-options=-v',
126 | '-c',
127 | '--compiler-options',
128 | "'-fPIC'"]},
129 | include_dirs=[numpy_include, CUDA['include']]
130 | ),
131 | ]
132 |
133 | setup(
134 | name='fast_rcnn',
135 | ext_modules=ext_modules,
136 | # inject our custom trigger
137 | cmdclass={'build_ext': custom_build_ext},
138 | )
139 |
--------------------------------------------------------------------------------
/utils/box/metric.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : metric.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 11:08
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import numpy as np
12 |
13 | from collections import defaultdict, Counter
14 |
15 | from .rbbox_np import rbbox_iou
16 |
17 |
18 | def get_ap(recall, precision):
19 | recall = [0] + list(recall) + [1]
20 | precision = [0] + list(precision) + [0]
21 | for i in range(len(precision) - 1, 0, -1):
22 | precision[i - 1] = max(precision[i - 1], precision[i])
23 | ap = sum((recall[i] - recall[i - 1]) * precision[i] for i in range(1, len(recall)) if recall[i] != recall[i - 1])
24 | return ap * 100
25 |
26 |
27 | def get_ap_07(recall, precision):
28 | ap = 0.
29 | for t in np.linspace(0, 1, 11, endpoint=True):
30 | mask = recall >= t
31 | if np.any(mask):
32 | ap += np.max(precision[mask]) / 11
33 | return ap * 100
34 |
35 |
36 | def get_det_aps(detect, target, num_classes, iou_thresh=0.5, use_07_metric=False):
37 | # [[index, bbox, score, label], ...]
38 | aps = []
39 | for c in range(num_classes):
40 | target_c = list(filter(lambda x: x[3] == c, target))
41 | detect_c = filter(lambda x: x[3] == c, detect)
42 | detect_c = sorted(detect_c, key=lambda x: x[2], reverse=True)
43 | tp = np.zeros(len(detect_c))
44 | fp = np.zeros(len(detect_c))
45 | target_count = Counter([x[0] for x in target_c])
46 | target_count = {index: np.zeros(count) for index, count in target_count.items()}
47 | target_lut = defaultdict(list)
48 | for index, bbox, conf, label in target_c:
49 | target_lut[index].append(bbox)
50 | detect_lut = defaultdict(list)
51 | for index, bbox, conf, label in detect_c:
52 | detect_lut[index].append(bbox)
53 | iou_lut = dict()
54 | for index, bboxes in detect_lut.items():
55 | if index in target_lut:
56 | iou_lut[index] = rbbox_iou(np.stack(bboxes), np.stack(target_lut[index]))
57 | counter = defaultdict(int)
58 | for i, (index, bbox, conf, label) in enumerate(detect_c):
59 | count = counter[index]
60 | counter[index] += 1
61 | iou_max = -np.inf
62 | hit_j = 0
63 | if index in iou_lut:
64 | for j, iou in enumerate(iou_lut[index][count]):
65 | if iou > iou_max:
66 | iou_max = iou
67 | hit_j = j
68 | if iou_max > iou_thresh and target_count[index][hit_j] == 0:
69 | tp[i] = 1
70 | target_count[index][hit_j] = 1
71 | else:
72 | fp[i] = 1
73 | tp_sum = np.cumsum(tp)
74 | fp_sum = np.cumsum(fp)
75 | npos = len(target_c)
76 | recall = tp_sum / npos
77 | precision = tp_sum / (tp_sum + fp_sum)
78 | aps.append((get_ap_07 if use_07_metric else get_ap)(recall, precision))
79 | return aps
80 |
--------------------------------------------------------------------------------
/utils/box/rbbox.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import numpy as np
4 |
5 |
6 | try:
7 | from .ext.rbbox_overlap_gpu import rbbox_iou as rbbox_iou_gpu
8 | from .ext.rbbox_overlap_gpu import rbbox_nms as rbbox_nms_gpu
9 |
10 |
11 | def rbbox_iou(boxes1, boxes2, device=None): # [x, y, w, h, a]
12 | if device is None:
13 | device = 0 if boxes1.device.type == 'cpu' else boxes1.device.index
14 | boxes1 = boxes1.reshape([-1, 5]).detach().cpu().numpy().astype(np.float32)
15 | boxes2 = boxes2.reshape([-1, 5]).detach().cpu().numpy().astype(np.float32)
16 | ious = rbbox_iou_gpu(boxes1, boxes2, device)
17 | return ious
18 |
19 | def rbbox_nms(boxes, scores, iou_thresh=0.5, device=None):
20 | if device is None:
21 | device = 0 if boxes.device.type == 'cpu' else boxes.device.index
22 | boxes = boxes.reshape([-1, 5]).detach().cpu().numpy().astype(np.float32)
23 | scores = scores.reshape([-1, 1]).detach().cpu().numpy().astype(np.float32)
24 | boxes = np.c_[boxes, scores]
25 | keeps = rbbox_nms_gpu(boxes, iou_thresh, device)
26 | return keeps
27 |
28 | except ModuleNotFoundError as e:
29 |
30 | from .ext.rbbox_overlap_cpu import rbbox_iou_nxn as rbbox_iou_cpu
31 | from .ext.rbbox_overlap_cpu import rbbox_nms as rbbox_nms_cpu
32 |
33 |
34 | def rbbox_iou(boxes1, boxes2):
35 | boxes1 = boxes1.reshape([-1, 5]).detach().cpu().numpy().astype(np.float64)
36 | boxes2 = boxes2.reshape([-1, 5]).detach().cpu().numpy().astype(np.float64)
37 | ious = rbbox_iou_cpu(boxes1, boxes2)
38 | return ious
39 |
40 |
41 | def rbbox_nms(boxes, scores, iou_thresh=0.5):
42 | boxes = boxes.reshape([-1, 5]).detach().cpu().numpy().astype(np.float64)
43 | scores = scores.reshape([-1]).detach().cpu().numpy().astype(np.float64)
44 | keeps = rbbox_nms_cpu(boxes, scores, iou_thresh)
45 | return keeps
46 |
47 |
48 | def rbbox_batched_nms(boxes, scores, labels, iou_thresh=0.5):
49 | if len(boxes) == 0:
50 | return np.empty([0], dtype=np.int)
51 | max_coordinate = boxes[:, 0:2].max() + boxes[:, 2:4].max()
52 | labels = labels.to(boxes)
53 | offsets = labels * (max_coordinate + 1)
54 | boxes = boxes.clone()
55 | boxes[:, :2] += offsets[:, None]
56 | return rbbox_nms(boxes, scores, iou_thresh)
57 |
--------------------------------------------------------------------------------
/utils/box/rbbox_np.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import numpy as np
4 |
5 |
6 | try:
7 | from .ext.rbbox_overlap_gpu import rbbox_iou as rbbox_iou_gpu
8 | from .ext.rbbox_overlap_gpu import rbbox_nms as rbbox_nms_gpu
9 |
10 |
11 | def rbbox_iou(boxes1, boxes2, device=0): # [x, y, w, h, a]
12 | boxes1 = boxes1.reshape([-1, 5]).astype(np.float32)
13 | boxes2 = boxes2.reshape([-1, 5]).astype(np.float32)
14 | ious = rbbox_iou_gpu(boxes1, boxes2, device)
15 | return ious
16 |
17 | def rbbox_nms(boxes, scores, iou_thresh=0.5, device=0):
18 | boxes = boxes.reshape([-1, 5]).astype(np.float32)
19 | scores = scores.reshape([-1, 1]).astype(np.float32)
20 | boxes = np.c_[boxes, scores]
21 | keeps = rbbox_nms_gpu(boxes, iou_thresh, device)
22 | return keeps
23 |
24 | except ModuleNotFoundError as e:
25 |
26 | from .ext.rbbox_overlap_cpu import rbbox_iou_nxn as rbbox_iou_cpu
27 | from .ext.rbbox_overlap_cpu import rbbox_nms as rbbox_nms_cpu
28 |
29 |
30 | def rbbox_iou(boxes1, boxes2):
31 | boxes1 = boxes1.reshape([-1, 5]).astype(np.float64)
32 | boxes2 = boxes2.reshape([-1, 5]).astype(np.float64)
33 | ious = rbbox_iou_cpu(boxes1, boxes2)
34 | return ious
35 |
36 |
37 | def rbbox_nms(boxes, scores, iou_thresh=0.5):
38 | boxes = boxes.reshape([-1, 5]).astype(np.float64)
39 | scores = scores.reshape([-1]).astype(np.float64)
40 | keeps = rbbox_nms_cpu(boxes, scores, iou_thresh)
41 | return keeps
42 |
43 |
44 | def rbbox_batched_nms(boxes, scores, labels, iou_thresh=0.5):
45 | if len(boxes) == 0:
46 | return np.empty([0], dtype=np.int)
47 | max_coordinate = boxes[:, 0:2].max() + boxes[:, 2:4].max()
48 | offsets = labels * (max_coordinate + 1)
49 | boxes = boxes.copy()
50 | boxes[:, :2] += offsets[:, None]
51 | return rbbox_nms(boxes, scores, iou_thresh)
52 |
--------------------------------------------------------------------------------
/utils/crop_image.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : crop_image.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 11:09
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import os
12 | import json
13 | import cv2 as cv
14 | import numpy as np
15 | import multiprocessing
16 |
17 | from copy import deepcopy
18 |
19 | from .box.bbox_np import xy42xywha
20 | from .image import imread, imwrite
21 |
22 |
23 | class Cropper:
24 | def __init__(self, size, overlap):
25 | self.sizes = sorted([size] if isinstance(size, int) else size)
26 | self.overlap = overlap
27 |
28 | @staticmethod
29 | def crop_bbox(objs, roi):
30 | sub_objs = []
31 | x1, y1, x2, y2 = roi
32 | roi = ((x1 + x2) / 2, (y1 + y2) / 2), (x2 - x1, y2 - y1), 0
33 | for obj in objs:
34 | x, y, w, h, a = xy42xywha(np.array(obj['bbox'], dtype=np.float32))
35 | inter_points = cv.rotatedRectangleIntersection(roi, ((x, y), (w, h), a))[1]
36 | if inter_points is not None:
37 | order_points = cv.convexHull(inter_points, returnPoints=True)
38 | inter_area = cv.contourArea(order_points)
39 | iou = inter_area / (w * h)
40 | if iou > 0.5:
41 | sub_bbox = cv.boxPoints(cv.minAreaRect(order_points)) - [x1, y1]
42 | obj = deepcopy(obj)
43 | obj['bbox'] = sub_bbox.tolist()
44 | sub_objs.append(obj)
45 | return sub_objs
46 |
47 | def crop_with_anno(self, path_img, path_anno, out_dir_images, out_dir_annos, save_empty=False):
48 | print('crop:', path_img, path_anno)
49 | img = imread(path_img)
50 | ih, iw = img.shape[:2]
51 | name = os.path.splitext(os.path.basename(path_img))[0]
52 | anno = [] if path_anno is None else json.load(open(path_anno))
53 | for i, size in enumerate(self.sizes):
54 | if i > 0 and (max if save_empty else min)(iw, ih) < self.sizes[i - 1]:
55 | break
56 | stride = int(size * (1 - self.overlap))
57 | for x in range(0, iw, stride):
58 | for y in range(0, ih, stride):
59 | w, h = size, size
60 | if x + size > iw:
61 | x = max(0, iw - size)
62 | w = iw - x
63 | if y + size > ih:
64 | y = max(0, ih - size)
65 | h = ih - y
66 | save_name = '%s-%d-%d-%d-%d' % (name, x, y, w, h)
67 | sub_anno = self.crop_bbox(anno, (x, y, x + w, y + h))
68 | if sub_anno:
69 | json.dump(sub_anno, open(os.path.join(out_dir_annos, save_name + '.json'), 'wt'), indent=2)
70 | if sub_anno or save_empty:
71 | save_path = os.path.join(out_dir_images, save_name + '.jpg')
72 | sub_img = img[y: y + h, x: x + w]
73 | imwrite(sub_img, save_path)
74 |
75 | def crop_batch(self, pairs, out_dir_images, out_dir_annos, save_empty=False):
76 | os.makedirs(out_dir_images, exist_ok=True)
77 | os.makedirs(out_dir_annos, exist_ok=True)
78 | pool = multiprocessing.Pool(processes=min(multiprocessing.cpu_count(), 8))
79 | for image, anno in pairs:
80 | pool.apply_async(self.crop_with_anno, (image, anno, out_dir_images, out_dir_annos, save_empty))
81 | pool.close()
82 | pool.join()
83 |
--------------------------------------------------------------------------------
/utils/image.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import numpy as np
4 |
5 | from PIL import Image
6 | from PIL import ImageFile
7 |
8 |
9 | ImageFile.LOAD_TRUNCATED_IMAGES = True
10 |
11 |
12 | def imread(path, mode='RGB'):
13 | img = Image.open(path)
14 | if mode is not None:
15 | img = img.convert(mode)
16 | return np.array(img)
17 |
18 |
19 | def imwrite(image, path):
20 | Image.fromarray(image).save(path)
21 |
--------------------------------------------------------------------------------
/utils/init.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from torch import nn
4 |
5 |
6 | def weight_init_normal(m):
7 | if isinstance(m, (nn.Linear, nn.Conv2d, nn.ConvTranspose2d)):
8 | nn.init.normal_(m.weight, 0, 0.02)
9 | elif isinstance(m, nn.BatchNorm2d):
10 | nn.init.normal_( m.weight, 1, 0.02)
11 | nn.init.constant_( m.bias, 0)
12 |
13 |
14 | def weight_init_uniform(m):
15 | if isinstance(m, (nn.Linear, nn.Conv2d, nn.ConvTranspose2d)):
16 | nn.init.uniform_(m.weight, 0, 0.02)
17 | elif isinstance(m, nn.BatchNorm2d):
18 | nn.init.constant_( m.weight, 1)
19 | nn.init.constant_( m.bias, 0)
20 |
21 |
22 | def weight_init_kaiming_normal(m):
23 | if isinstance(m, (nn.Linear, nn.Conv2d, nn.ConvTranspose2d)):
24 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
25 | elif isinstance(m, nn.BatchNorm2d):
26 | nn.init.constant_(m.weight, 1)
27 | nn.init.constant_(m.bias, 0)
28 |
29 |
30 | def weight_init_kaiming_uniform(m):
31 | if isinstance(m, (nn.Linear, nn.Conv2d, nn.ConvTranspose2d)):
32 | nn.init.kaiming_uniform_(m.weight, mode='fan_out', nonlinearity='relu')
33 | elif isinstance(m, nn.BatchNorm2d):
34 | nn.init.constant_(m.weight, 1)
35 | nn.init.constant_(m.bias, 0)
36 |
37 |
38 | def weight_init_xavier_normal(m):
39 | if isinstance(m, (nn.Linear, nn.Conv2d, nn.ConvTranspose2d)):
40 | nn.init.xavier_normal_(m.weight)
41 | elif isinstance(m, nn.BatchNorm2d):
42 | nn.init.constant_(m.weight, 1)
43 | nn.init.constant_(m.bias, 0)
44 |
45 |
46 | def weight_init_xavier_uniform(m):
47 | if isinstance(m, (nn.Linear, nn.Conv2d, nn.ConvTranspose2d)):
48 | nn.init.xavier_uniform_(m.weight)
49 | elif isinstance(m, nn.BatchNorm2d):
50 | nn.init.constant_(m.weight, 1)
51 | nn.init.constant_(m.bias, 0)
52 |
53 |
54 | weight_init = {
55 | 'normal': weight_init_normal,
56 | 'uniform': weight_init_uniform,
57 | 'kaiming_normal': weight_init_kaiming_normal,
58 | 'kaiming_uniform': weight_init_kaiming_uniform,
59 | 'xavier_normal': weight_init_xavier_normal,
60 | 'xavier_uniform': weight_init_xavier_uniform,
61 | }
62 |
--------------------------------------------------------------------------------
/utils/misc.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import os
4 |
5 | from collections import defaultdict, OrderedDict
6 |
7 |
8 | def containerize(x, n=1):
9 | return x if isinstance(x, (list, tuple)) else [x] * n
10 |
11 |
12 | def convert_path(path):
13 | return path.replace(r'\/'.replace(os.sep, ''), os.sep)
14 |
15 |
16 | class Node:
17 | __slots__ = 'key', 'val', 'cnt'
18 |
19 | def __init__(self, key, val, cnt=0):
20 | self.key, self.val, self.cnt = key, val, cnt
21 |
22 |
23 | class LFUCache:
24 | def __init__(self, capacity):
25 | self.capacity = capacity
26 | self.cache = {} # type {key: node}
27 | self.cnt2node = defaultdict(OrderedDict)
28 | self.mincnt = 0
29 |
30 | def get(self, key, default=None):
31 | if key not in self.cache:
32 | return default
33 |
34 | node = self.cache[key]
35 | del self.cnt2node[node.cnt][key]
36 |
37 | if not self.cnt2node[node.cnt]:
38 | del self.cnt2node[node.cnt]
39 |
40 | node.cnt += 1
41 | self.cnt2node[node.cnt][key] = node
42 |
43 | if not self.cnt2node[self.mincnt]:
44 | self.mincnt += 1
45 | return node.val
46 |
47 | def put(self, key, value):
48 | if key in self.cache:
49 | self.cache[key].val = value
50 | self.get(key)
51 | return
52 | if len(self.cache) >= self.capacity:
53 | pop_key, _pop_node = self.cnt2node[self.mincnt].popitem(last=False)
54 | del self.cache[pop_key]
55 |
56 | self.cache[key] = self.cnt2node[1][key] = Node(key, value, 1)
57 | self.mincnt = 1
58 |
--------------------------------------------------------------------------------
/utils/parallel/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from .sync_batchnorm import convert_model
4 | from .data_parallel import CustomDetDataParallel
5 |
--------------------------------------------------------------------------------
/utils/parallel/data_parallel.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import torch
4 |
5 | from torch import nn
6 |
7 |
8 | class CustomDetDataParallel(nn.DataParallel):
9 | """
10 | force splitting data to all gpus instead of sending all data to cuda:0 and then moving around.
11 | """
12 |
13 | def __init__(self, module, device_ids):
14 | super().__init__(module, device_ids)
15 |
16 | def scatter(self, inputs, kwargs, device_ids):
17 | # More like scatter and data prep at the same time. The point is we prep the data in such a way
18 | # that no scatter is necessary, and there's no need to shuffle stuff around different GPUs.
19 | data_splits = []
20 | for i, device in enumerate(device_ids):
21 | data_split = []
22 | for data in inputs:
23 | data = data[i:: len(device_ids)]
24 | if isinstance(data, torch.Tensor):
25 | data = data.to(f'cuda:{device}', non_blocking=True)
26 | data_split.append(data)
27 | data_splits.append(data_split)
28 | return data_splits, [kwargs] * len(device_ids)
29 |
30 | def gather(self, outputs, output_device):
31 | if self.training:
32 | # (
33 | # {}, {}, ...
34 | # )
35 | outputs = super().gather(outputs, output_device)
36 | for key, val in list(outputs.items()):
37 | outputs[key] = val.mean()
38 | else:
39 | # (
40 | # [[], [], ...], [[], [], ...]
41 | # )
42 | outputs = sum(map(list, zip(*outputs)), [])
43 | return outputs
44 |
--------------------------------------------------------------------------------
/utils/parallel/sync_batchnorm/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : __init__.py
3 | # Author : Jiayuan Mao
4 | # Email : maojiayuan@gmail.com
5 | # Date : 27/01/2018
6 | #
7 | # This file is part of Synchronized-BatchNorm-PyTorch.
8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
9 | # Distributed under MIT License.
10 |
11 | from .batchnorm import SynchronizedBatchNorm1d, SynchronizedBatchNorm2d, SynchronizedBatchNorm3d
12 | from .batchnorm import patch_sync_batchnorm, convert_model
13 | from .replicate import DataParallelWithCallback, patch_replication_callback
14 |
--------------------------------------------------------------------------------
/utils/parallel/sync_batchnorm/batchnorm.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : batchnorm.py
3 | # Author : Jiayuan Mao
4 | # Email : maojiayuan@gmail.com
5 | # Date : 27/01/2018
6 | #
7 | # This file is part of Synchronized-BatchNorm-PyTorch.
8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
9 | # Distributed under MIT License.
10 |
11 | import collections
12 | import contextlib
13 |
14 | import torch
15 | import torch.nn.functional as F
16 |
17 | from torch.nn.modules.batchnorm import _BatchNorm
18 |
19 | try:
20 | from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast
21 | except ImportError:
22 | ReduceAddCoalesced = Broadcast = None
23 |
24 | try:
25 | from jactorch.parallel.comm import SyncMaster
26 | from jactorch.parallel.data_parallel import JacDataParallel as DataParallelWithCallback
27 | except ImportError:
28 | from .comm import SyncMaster
29 | from .replicate import DataParallelWithCallback
30 |
31 | __all__ = [
32 | 'SynchronizedBatchNorm1d', 'SynchronizedBatchNorm2d', 'SynchronizedBatchNorm3d',
33 | 'patch_sync_batchnorm', 'convert_model'
34 | ]
35 |
36 |
37 | def _sum_ft(tensor):
38 | """sum over the first and last dimention"""
39 | return tensor.sum(dim=0).sum(dim=-1)
40 |
41 |
42 | def _unsqueeze_ft(tensor):
43 | """add new dimensions at the front and the tail"""
44 | return tensor.unsqueeze(0).unsqueeze(-1)
45 |
46 |
47 | _ChildMessage = collections.namedtuple('_ChildMessage', ['sum', 'ssum', 'sum_size'])
48 | _MasterMessage = collections.namedtuple('_MasterMessage', ['sum', 'inv_std'])
49 |
50 |
51 | class _SynchronizedBatchNorm(_BatchNorm):
52 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True):
53 | assert ReduceAddCoalesced is not None, 'Can not use Synchronized Batch Normalization without CUDA support.'
54 |
55 | super(_SynchronizedBatchNorm, self).__init__(num_features, eps=eps, momentum=momentum, affine=affine)
56 |
57 | self._sync_master = SyncMaster(self._data_parallel_master)
58 |
59 | self._is_parallel = False
60 | self._parallel_id = None
61 | self._slave_pipe = None
62 |
63 | def forward(self, input):
64 | # If it is not parallel computation or is in evaluation mode, use PyTorch's implementation.
65 | if not (self._is_parallel and self.training):
66 | return F.batch_norm(
67 | input, self.running_mean, self.running_var, self.weight, self.bias,
68 | self.training, self.momentum, self.eps)
69 |
70 | # Resize the input to (B, C, -1).
71 | input_shape = input.size()
72 | input = input.view(input.size(0), self.num_features, -1)
73 |
74 | # Compute the sum and square-sum.
75 | sum_size = input.size(0) * input.size(2)
76 | input_sum = _sum_ft(input)
77 | input_ssum = _sum_ft(input ** 2)
78 |
79 | # Reduce-and-broadcast the statistics.
80 | if self._parallel_id == 0:
81 | mean, inv_std = self._sync_master.run_master(_ChildMessage(input_sum, input_ssum, sum_size))
82 | else:
83 | mean, inv_std = self._slave_pipe.run_slave(_ChildMessage(input_sum, input_ssum, sum_size))
84 |
85 | # Compute the output.
86 | if self.affine:
87 | # MJY:: Fuse the multiplication for speed.
88 | output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std * self.weight) + _unsqueeze_ft(self.bias)
89 | else:
90 | output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std)
91 |
92 | # Reshape it.
93 | return output.view(input_shape)
94 |
95 | def __data_parallel_replicate__(self, ctx, copy_id):
96 | self._is_parallel = True
97 | self._parallel_id = copy_id
98 |
99 | # parallel_id == 0 means master device.
100 | if self._parallel_id == 0:
101 | ctx.sync_master = self._sync_master
102 | else:
103 | self._slave_pipe = ctx.sync_master.register_slave(copy_id)
104 |
105 | def _data_parallel_master(self, intermediates):
106 | """Reduce the sum and square-sum, compute the statistics, and broadcast it."""
107 |
108 | # Always using same "device order" makes the ReduceAdd operation faster.
109 | # Thanks to:: Tete Xiao (http://tetexiao.com/)
110 | intermediates = sorted(intermediates, key=lambda i: i[1].sum.get_device())
111 |
112 | to_reduce = [i[1][:2] for i in intermediates]
113 | to_reduce = [j for i in to_reduce for j in i] # flatten
114 | target_gpus = [i[1].sum.get_device() for i in intermediates]
115 |
116 | sum_size = sum([i[1].sum_size for i in intermediates])
117 | sum_, ssum = ReduceAddCoalesced.apply(target_gpus[0], 2, *to_reduce)
118 | mean, inv_std = self._compute_mean_std(sum_, ssum, sum_size)
119 |
120 | broadcasted = Broadcast.apply(target_gpus, mean, inv_std)
121 |
122 | outputs = []
123 | for i, rec in enumerate(intermediates):
124 | outputs.append((rec[0], _MasterMessage(*broadcasted[i*2:i*2+2])))
125 |
126 | return outputs
127 |
128 | def _compute_mean_std(self, sum_, ssum, size):
129 | """Compute the mean and standard-deviation with sum and square-sum. This method
130 | also maintains the moving average on the master device."""
131 | assert size > 1, 'BatchNorm computes unbiased standard-deviation, which requires size > 1.'
132 | mean = sum_ / size
133 | sumvar = ssum - sum_ * mean
134 | unbias_var = sumvar / (size - 1)
135 | bias_var = sumvar / size
136 |
137 | if hasattr(torch, 'no_grad'):
138 | with torch.no_grad():
139 | self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean.data
140 | self.running_var = (1 - self.momentum) * self.running_var + self.momentum * unbias_var.data
141 | else:
142 | self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean.data
143 | self.running_var = (1 - self.momentum) * self.running_var + self.momentum * unbias_var.data
144 |
145 | return mean, bias_var.clamp(self.eps) ** -0.5
146 |
147 |
148 | class SynchronizedBatchNorm1d(_SynchronizedBatchNorm):
149 | r"""Applies Synchronized Batch Normalization over a 2d or 3d input that is seen as a
150 | mini-batch.
151 |
152 | .. math::
153 |
154 | y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta
155 |
156 | This module differs from the built-in PyTorch BatchNorm1d as the mean and
157 | standard-deviation are reduced across all devices during training.
158 |
159 | For example, when one uses `nn.DataParallel` to wrap the network during
160 | training, PyTorch's implementation normalize the tensor on each device using
161 | the statistics only on that device, which accelerated the computation and
162 | is also easy to implement, but the statistics might be inaccurate.
163 | Instead, in this synchronized version, the statistics will be computed
164 | over all training samples distributed on multiple devices.
165 |
166 | Note that, for one-GPU or CPU-only case, this module behaves exactly same
167 | as the built-in PyTorch implementation.
168 |
169 | The mean and standard-deviation are calculated per-dimension over
170 | the mini-batches and gamma and beta are learnable parameter vectors
171 | of size C (where C is the input size).
172 |
173 | During training, this layer keeps a running estimate of its computed mean
174 | and variance. The running sum is kept with a default momentum of 0.1.
175 |
176 | During evaluation, this running mean/variance is used for normalization.
177 |
178 | Because the BatchNorm is done over the `C` dimension, computing statistics
179 | on `(N, L)` slices, it's common terminology to call this Temporal BatchNorm
180 |
181 | Args:
182 | num_features: num_features from an expected input of size
183 | `batch_size x num_features [x width]`
184 | eps: a value added to the denominator for numerical stability.
185 | Default: 1e-5
186 | momentum: the value used for the running_mean and running_var
187 | computation. Default: 0.1
188 | affine: a boolean value that when set to ``True``, gives the layer learnable
189 | affine parameters. Default: ``True``
190 |
191 | Shape::
192 | - Input: :math:`(N, C)` or :math:`(N, C, L)`
193 | - Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input)
194 |
195 | Examples:
196 | >>> # With Learnable Parameters
197 | >>> m = SynchronizedBatchNorm1d(100)
198 | >>> # Without Learnable Parameters
199 | >>> m = SynchronizedBatchNorm1d(100, affine=False)
200 | >>> input = torch.autograd.Variable(torch.randn(20, 100))
201 | >>> output = m(input)
202 | """
203 |
204 | def _check_input_dim(self, input):
205 | if input.dim() != 2 and input.dim() != 3:
206 | raise ValueError('expected 2D or 3D input (got {}D input)'
207 | .format(input.dim()))
208 | super(SynchronizedBatchNorm1d, self)._check_input_dim(input)
209 |
210 |
211 | class SynchronizedBatchNorm2d(_SynchronizedBatchNorm):
212 | r"""Applies Batch Normalization over a 4d input that is seen as a mini-batch
213 | of 3d inputs
214 |
215 | .. math::
216 |
217 | y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta
218 |
219 | This module differs from the built-in PyTorch BatchNorm2d as the mean and
220 | standard-deviation are reduced across all devices during training.
221 |
222 | For example, when one uses `nn.DataParallel` to wrap the network during
223 | training, PyTorch's implementation normalize the tensor on each device using
224 | the statistics only on that device, which accelerated the computation and
225 | is also easy to implement, but the statistics might be inaccurate.
226 | Instead, in this synchronized version, the statistics will be computed
227 | over all training samples distributed on multiple devices.
228 |
229 | Note that, for one-GPU or CPU-only case, this module behaves exactly same
230 | as the built-in PyTorch implementation.
231 |
232 | The mean and standard-deviation are calculated per-dimension over
233 | the mini-batches and gamma and beta are learnable parameter vectors
234 | of size C (where C is the input size).
235 |
236 | During training, this layer keeps a running estimate of its computed mean
237 | and variance. The running sum is kept with a default momentum of 0.1.
238 |
239 | During evaluation, this running mean/variance is used for normalization.
240 |
241 | Because the BatchNorm is done over the `C` dimension, computing statistics
242 | on `(N, H, W)` slices, it's common terminology to call this Spatial BatchNorm
243 |
244 | Args:
245 | num_features: num_features from an expected input of
246 | size batch_size x num_features x height x width
247 | eps: a value added to the denominator for numerical stability.
248 | Default: 1e-5
249 | momentum: the value used for the running_mean and running_var
250 | computation. Default: 0.1
251 | affine: a boolean value that when set to ``True``, gives the layer learnable
252 | affine parameters. Default: ``True``
253 |
254 | Shape::
255 | - Input: :math:`(N, C, H, W)`
256 | - Output: :math:`(N, C, H, W)` (same shape as input)
257 |
258 | Examples:
259 | >>> # With Learnable Parameters
260 | >>> m = SynchronizedBatchNorm2d(100)
261 | >>> # Without Learnable Parameters
262 | >>> m = SynchronizedBatchNorm2d(100, affine=False)
263 | >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45))
264 | >>> output = m(input)
265 | """
266 |
267 | def _check_input_dim(self, input):
268 | if input.dim() != 4:
269 | raise ValueError('expected 4D input (got {}D input)'
270 | .format(input.dim()))
271 | super(SynchronizedBatchNorm2d, self)._check_input_dim(input)
272 |
273 |
274 | class SynchronizedBatchNorm3d(_SynchronizedBatchNorm):
275 | r"""Applies Batch Normalization over a 5d input that is seen as a mini-batch
276 | of 4d inputs
277 |
278 | .. math::
279 |
280 | y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta
281 |
282 | This module differs from the built-in PyTorch BatchNorm3d as the mean and
283 | standard-deviation are reduced across all devices during training.
284 |
285 | For example, when one uses `nn.DataParallel` to wrap the network during
286 | training, PyTorch's implementation normalize the tensor on each device using
287 | the statistics only on that device, which accelerated the computation and
288 | is also easy to implement, but the statistics might be inaccurate.
289 | Instead, in this synchronized version, the statistics will be computed
290 | over all training samples distributed on multiple devices.
291 |
292 | Note that, for one-GPU or CPU-only case, this module behaves exactly same
293 | as the built-in PyTorch implementation.
294 |
295 | The mean and standard-deviation are calculated per-dimension over
296 | the mini-batches and gamma and beta are learnable parameter vectors
297 | of size C (where C is the input size).
298 |
299 | During training, this layer keeps a running estimate of its computed mean
300 | and variance. The running sum is kept with a default momentum of 0.1.
301 |
302 | During evaluation, this running mean/variance is used for normalization.
303 |
304 | Because the BatchNorm is done over the `C` dimension, computing statistics
305 | on `(N, D, H, W)` slices, it's common terminology to call this Volumetric BatchNorm
306 | or Spatio-temporal BatchNorm
307 |
308 | Args:
309 | num_features: num_features from an expected input of
310 | size batch_size x num_features x depth x height x width
311 | eps: a value added to the denominator for numerical stability.
312 | Default: 1e-5
313 | momentum: the value used for the running_mean and running_var
314 | computation. Default: 0.1
315 | affine: a boolean value that when set to ``True``, gives the layer learnable
316 | affine parameters. Default: ``True``
317 |
318 | Shape::
319 | - Input: :math:`(N, C, D, H, W)`
320 | - Output: :math:`(N, C, D, H, W)` (same shape as input)
321 |
322 | Examples:
323 | >>> # With Learnable Parameters
324 | >>> m = SynchronizedBatchNorm3d(100)
325 | >>> # Without Learnable Parameters
326 | >>> m = SynchronizedBatchNorm3d(100, affine=False)
327 | >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45, 10))
328 | >>> output = m(input)
329 | """
330 |
331 | def _check_input_dim(self, input):
332 | if input.dim() != 5:
333 | raise ValueError('expected 5D input (got {}D input)'
334 | .format(input.dim()))
335 | super(SynchronizedBatchNorm3d, self)._check_input_dim(input)
336 |
337 |
338 | @contextlib.contextmanager
339 | def patch_sync_batchnorm():
340 | import torch.nn as nn
341 |
342 | backup = nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d
343 |
344 | nn.BatchNorm1d = SynchronizedBatchNorm1d
345 | nn.BatchNorm2d = SynchronizedBatchNorm2d
346 | nn.BatchNorm3d = SynchronizedBatchNorm3d
347 |
348 | yield
349 |
350 | nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d = backup
351 |
352 |
353 | def convert_model(module):
354 | """Traverse the input module and its child recursively
355 | and replace all instance of torch.nn.modules.batchnorm.BatchNorm*N*d
356 | to SynchronizedBatchNorm*N*d
357 |
358 | Args:
359 | module: the input module needs to be convert to SyncBN model
360 |
361 | Examples:
362 | >>> import torch.nn as nn
363 | >>> import torchvision
364 | >>> # m is a standard pytorch model
365 | >>> m = torchvision.models.resnet18(True)
366 | >>> m = nn.DataParallel(m)
367 | >>> # after convert, m is using SyncBN
368 | >>> m = convert_model(m)
369 | """
370 | if isinstance(module, torch.nn.DataParallel):
371 | mod = module.module
372 | mod = convert_model(mod)
373 | mod = DataParallelWithCallback(mod, device_ids=module.device_ids)
374 | return mod
375 |
376 | mod = module
377 | for pth_module, sync_module in zip([torch.nn.modules.batchnorm.BatchNorm1d,
378 | torch.nn.modules.batchnorm.BatchNorm2d,
379 | torch.nn.modules.batchnorm.BatchNorm3d],
380 | [SynchronizedBatchNorm1d,
381 | SynchronizedBatchNorm2d,
382 | SynchronizedBatchNorm3d]):
383 | if isinstance(module, pth_module):
384 | mod = sync_module(module.num_features, module.eps, module.momentum, module.affine)
385 | mod.running_mean = module.running_mean
386 | mod.running_var = module.running_var
387 | if module.affine:
388 | mod.weight.data = module.weight.data.clone().detach()
389 | mod.bias.data = module.bias.data.clone().detach()
390 |
391 | for name, child in module.named_children():
392 | mod.add_module(name, convert_model(child))
393 |
394 | return mod
395 |
--------------------------------------------------------------------------------
/utils/parallel/sync_batchnorm/comm.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : comm.py
3 | # Author : Jiayuan Mao
4 | # Email : maojiayuan@gmail.com
5 | # Date : 27/01/2018
6 | #
7 | # This file is part of Synchronized-BatchNorm-PyTorch.
8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
9 | # Distributed under MIT License.
10 |
11 | import queue
12 | import collections
13 | import threading
14 |
15 | __all__ = ['FutureResult', 'SlavePipe', 'SyncMaster']
16 |
17 |
18 | class FutureResult(object):
19 | """A thread-safe future implementation. Used only as one-to-one pipe."""
20 |
21 | def __init__(self):
22 | self._result = None
23 | self._lock = threading.Lock()
24 | self._cond = threading.Condition(self._lock)
25 |
26 | def put(self, result):
27 | with self._lock:
28 | assert self._result is None, 'Previous result has\'t been fetched.'
29 | self._result = result
30 | self._cond.notify()
31 |
32 | def get(self):
33 | with self._lock:
34 | if self._result is None:
35 | self._cond.wait()
36 |
37 | res = self._result
38 | self._result = None
39 | return res
40 |
41 |
42 | _MasterRegistry = collections.namedtuple('MasterRegistry', ['result'])
43 | _SlavePipeBase = collections.namedtuple('_SlavePipeBase', ['identifier', 'queue', 'result'])
44 |
45 |
46 | class SlavePipe(_SlavePipeBase):
47 | """Pipe for master-slave communication."""
48 |
49 | def run_slave(self, msg):
50 | self.queue.put((self.identifier, msg))
51 | ret = self.result.get()
52 | self.queue.put(True)
53 | return ret
54 |
55 |
56 | class SyncMaster(object):
57 | """An abstract `SyncMaster` object.
58 |
59 | - During the replication, as the data parallel will trigger an callback of each module, all slave devices should
60 | call `register(id)` and obtain an `SlavePipe` to communicate with the master.
61 | - During the forward pass, master device invokes `run_master`, all messages from slave devices will be collected,
62 | and passed to a registered callback.
63 | - After receiving the messages, the master device should gather the information and determine to message passed
64 | back to each slave devices.
65 | """
66 |
67 | def __init__(self, master_callback):
68 | """
69 |
70 | Args:
71 | master_callback: a callback to be invoked after having collected messages from slave devices.
72 | """
73 | self._master_callback = master_callback
74 | self._queue = queue.Queue()
75 | self._registry = collections.OrderedDict()
76 | self._activated = False
77 |
78 | def __getstate__(self):
79 | return {'master_callback': self._master_callback}
80 |
81 | def __setstate__(self, state):
82 | self.__init__(state['master_callback'])
83 |
84 | def register_slave(self, identifier):
85 | """
86 | Register an slave device.
87 |
88 | Args:
89 | identifier: an identifier, usually is the device id.
90 |
91 | Returns: a `SlavePipe` object which can be used to communicate with the master device.
92 |
93 | """
94 | if self._activated:
95 | assert self._queue.empty(), 'Queue is not clean before next initialization.'
96 | self._activated = False
97 | self._registry.clear()
98 | future = FutureResult()
99 | self._registry[identifier] = _MasterRegistry(future)
100 | return SlavePipe(identifier, self._queue, future)
101 |
102 | def run_master(self, master_msg):
103 | """
104 | Main entry for the master device in each forward pass.
105 | The messages were first collected from each devices (including the master device), and then
106 | an callback will be invoked to compute the message to be sent back to each devices
107 | (including the master device).
108 |
109 | Args:
110 | master_msg: the message that the master want to send to itself. This will be placed as the first
111 | message when calling `master_callback`. For detailed usage, see `_SynchronizedBatchNorm` for an example.
112 |
113 | Returns: the message to be sent back to the master device.
114 |
115 | """
116 | self._activated = True
117 |
118 | intermediates = [(0, master_msg)]
119 | for i in range(self.nr_slaves):
120 | intermediates.append(self._queue.get())
121 |
122 | results = self._master_callback(intermediates)
123 | assert results[0][0] == 0, 'The first result should belongs to the master.'
124 |
125 | for i, res in results:
126 | if i == 0:
127 | continue
128 | self._registry[i].result.put(res)
129 |
130 | for i in range(self.nr_slaves):
131 | assert self._queue.get() is True
132 |
133 | return results[0][1]
134 |
135 | @property
136 | def nr_slaves(self):
137 | return len(self._registry)
138 |
--------------------------------------------------------------------------------
/utils/parallel/sync_batchnorm/replicate.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : replicate.py
3 | # Author : Jiayuan Mao
4 | # Email : maojiayuan@gmail.com
5 | # Date : 27/01/2018
6 | #
7 | # This file is part of Synchronized-BatchNorm-PyTorch.
8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
9 | # Distributed under MIT License.
10 |
11 | import functools
12 |
13 | from torch.nn.parallel.data_parallel import DataParallel
14 |
15 | __all__ = [
16 | 'CallbackContext',
17 | 'execute_replication_callbacks',
18 | 'DataParallelWithCallback',
19 | 'patch_replication_callback'
20 | ]
21 |
22 |
23 | class CallbackContext(object):
24 | pass
25 |
26 |
27 | def execute_replication_callbacks(modules):
28 | """
29 | Execute an replication callback `__data_parallel_replicate__` on each module created by original replication.
30 |
31 | The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)`
32 |
33 | Note that, as all modules are isomorphism, we assign each sub-module with a context
34 | (shared among multiple copies of this module on different devices).
35 | Through this context, different copies can share some information.
36 |
37 | We guarantee that the callback on the master copy (the first copy) will be called ahead of calling the callback
38 | of any slave copies.
39 | """
40 | master_copy = modules[0]
41 | nr_modules = len(list(master_copy.modules()))
42 | ctxs = [CallbackContext() for _ in range(nr_modules)]
43 |
44 | for i, module in enumerate(modules):
45 | for j, m in enumerate(module.modules()):
46 | if hasattr(m, '__data_parallel_replicate__'):
47 | m.__data_parallel_replicate__(ctxs[j], i)
48 |
49 |
50 | class DataParallelWithCallback(DataParallel):
51 | """
52 | Data Parallel with a replication callback.
53 |
54 | An replication callback `__data_parallel_replicate__` of each module will be invoked after being created by
55 | original `replicate` function.
56 | The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)`
57 |
58 | Examples:
59 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
60 | > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1])
61 | # sync_bn.__data_parallel_replicate__ will be invoked.
62 | """
63 |
64 | def replicate(self, module, device_ids):
65 | modules = super(DataParallelWithCallback, self).replicate(module, device_ids)
66 | execute_replication_callbacks(modules)
67 | return modules
68 |
69 |
70 | def patch_replication_callback(data_parallel):
71 | """
72 | Monkey-patch an existing `DataParallel` object. Add the replication callback.
73 | Useful when you have customized `DataParallel` implementation.
74 |
75 | Examples:
76 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
77 | > sync_bn = DataParallel(sync_bn, device_ids=[0, 1])
78 | > patch_replication_callback(sync_bn)
79 | # this is equivalent to
80 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
81 | > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1])
82 | """
83 |
84 | assert isinstance(data_parallel, DataParallel)
85 |
86 | old_replicate = data_parallel.replicate
87 |
88 | @functools.wraps(old_replicate)
89 | def new_replicate(module, device_ids):
90 | modules = old_replicate(module, device_ids)
91 | execute_replication_callbacks(modules)
92 | return modules
93 |
94 | data_parallel.replicate = new_replicate
95 |
--------------------------------------------------------------------------------
/xtorch/README.md:
--------------------------------------------------------------------------------
1 | # xtorch
2 |
3 | This is a simple encapsulation of pytorch, so that in_featuers/in_channels can be implicitly determined when the model is defined, rather than explicitly specified.
4 |
5 | A simple example is as follows:
6 |
7 | ```python
8 | import torch
9 | from torch import nn
10 | from xtorch import xnn
11 |
12 | model = xnn.Sequential(xnn.Linear(16), nn.ReLU(), xnn.Linear(2))
13 | # <===> nn.Sequential(nn.Linear(8, 16), nn.ReLU(), xnn.Linear(16, 2))
14 |
15 | model.build_pipe(shape=[2, 8])
16 | # alternative
17 | # model.build(torch.randn(2, 8))
18 |
19 | x = torch.randn(32, 8)
20 | y = model(x)
21 | ```
22 |
--------------------------------------------------------------------------------
/xtorch/xnn/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from .containers import *
4 | from .layers import *
5 |
--------------------------------------------------------------------------------
/xtorch/xnn/containers.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : containers.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 12:07
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | import torch
12 |
13 | from torch import nn
14 |
15 |
16 | __all__ = ['Module', 'ModuleAtom', 'ModulePipe', 'Sequential']
17 |
18 |
19 | class Module(nn.Module):
20 | def __init__(self):
21 | super(Module, self).__init__()
22 |
23 | def forward(self, *args, **kwargs):
24 | raise NotImplementedError
25 |
26 | def __call__(self, *args, **kwargs):
27 | return self.forward(*args, **kwargs)
28 |
29 | def build_pipe(self, shape):
30 | return self(torch.randn(shape))
31 |
32 | build = __call__
33 |
34 |
35 | class ModuleAtom(Module):
36 | def __init__(self, *args, **kwargs):
37 | super(ModuleAtom, self).__init__()
38 | self.args = args
39 | self.kwargs = kwargs
40 | self.module = None
41 |
42 | def _init_module(self, *args, **kwargs):
43 | raise NotImplementedError
44 |
45 | def forward(self, *args, **kwargs):
46 | if self.module is None:
47 | self._init_module(*args, **kwargs)
48 | return self.module(*args, **kwargs)
49 |
50 |
51 | class ModulePipe(Module):
52 | def __init__(self):
53 | super(ModulePipe, self).__init__()
54 |
55 | def forward(self, x):
56 | for module in self._modules.values():
57 | x = module(x)
58 | return x
59 |
60 |
61 | class Sequential(nn.Sequential, Module):
62 | def __init__(self, *args):
63 | super(Sequential, self).__init__(*args)
64 |
--------------------------------------------------------------------------------
/xtorch/xnn/layers.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : layers.py
3 | # Author : Kai Ao
4 | # Email : capino627@163.com
5 | # Date : 2020/12/12 12:07
6 | #
7 | # This file is part of Rotation-Decoupled Detector.
8 | # https://github.com/Capino512/pytorch-rotation-decoupled-detector
9 | # Distributed under MIT License.
10 |
11 | from torch import nn
12 |
13 | from .containers import ModuleAtom
14 |
15 |
16 | __all__ = ['Linear', 'Conv1d', 'Conv2d', 'Conv3d', 'ConvTranspose1d', 'ConvTranspose2d', 'ConvTranspose3d',
17 | 'BatchNorm1d', 'BatchNorm2d', 'BatchNorm3d', 'GroupNorm', 'InstanceNorm1d', 'InstanceNorm2d',
18 | 'InstanceNorm3d', 'LayerNorm']
19 |
20 |
21 | class Linear(ModuleAtom):
22 | def __init__(self, out_features, bias=True):
23 | super(Linear, self).__init__(out_features, bias=bias)
24 |
25 | def _init_module(self, x):
26 | if self.args[0] is None:
27 | self.args = (x.shape[1], *self.args[1:])
28 | self.args = (x.shape[1], *self.args)
29 | self.module = nn.Linear(*self.args, **self.kwargs)
30 |
31 |
32 | class ConvNd(ModuleAtom):
33 | def __init__(self, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True,
34 | padding_mode='zeros'):
35 | super(ConvNd, self).__init__(out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation,
36 | groups=groups, bias=bias, padding_mode=padding_mode)
37 |
38 | def _init_params(self, x):
39 | if self.kwargs['groups'] < 0:
40 | assert x.shape[1] % self.kwargs['groups'] == 0
41 | self.kwargs['groups'] = x.shape[1] // -self.kwargs['groups']
42 | if self.args[0] is None:
43 | self.args = (x.shape[1], *self.args[1:])
44 | self.args = (x.shape[1], *self.args)
45 |
46 |
47 | class Conv1d(ConvNd):
48 | def _init_module(self, x):
49 | self._init_params(x)
50 | self.module = nn.Conv1d(*self.args, **self.kwargs)
51 |
52 |
53 | class Conv2d(ConvNd):
54 | def _init_module(self, x):
55 | self._init_params(x)
56 | self.module = nn.Conv2d(*self.args, **self.kwargs)
57 |
58 |
59 | class Conv3d(ConvNd):
60 | def _init_module(self, x):
61 | self._init_params(x)
62 | self.module = nn.Conv3d(*self.args, **self.kwargs)
63 |
64 |
65 | class ConvTransposeNd(ModuleAtom):
66 | def __init__(self, out_channels, kernel_size, stride=1, padding=0, output_padding=0, dilation=1, groups=1,
67 | bias=True, padding_mode='zeros'):
68 | super(ConvTransposeNd, self).__init__(out_channels, kernel_size, stride=stride, padding=padding,
69 | output_padding=output_padding, dilation=dilation, groups=groups,
70 | bias=bias, padding_mode=padding_mode)
71 |
72 | def _init_params(self, x):
73 | if self.kwargs['groups'] < 0:
74 | assert x.shape[1] % self.kwargs['groups'] == 0
75 | self.kwargs['groups'] = x.shape[1] // -self.kwargs['groups']
76 | if self.args[0] is None:
77 | self.args = (x.shape[1], *self.args[1:])
78 | self.args = (x.shape[1], *self.args)
79 |
80 |
81 | class ConvTranspose1d(ConvTransposeNd):
82 | def _init_module(self, x):
83 | self._init_params(x)
84 | self.module = nn.ConvTranspose1d(*self.args, **self.kwargs)
85 |
86 |
87 | class ConvTranspose2d(ConvTransposeNd):
88 | def _init_module(self, x):
89 | self._init_params(x)
90 | self.module = nn.ConvTranspose2d(*self.args, **self.kwargs)
91 |
92 |
93 | class ConvTranspose3d(ConvTransposeNd):
94 | def _init_module(self, x):
95 | self._init_params(x)
96 | self.module = nn.ConvTranspose3d(*self.args, **self.kwargs)
97 |
98 |
99 | class BatchNormNd(ModuleAtom):
100 | def __init__(self, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True):
101 | super(BatchNormNd, self).__init__(eps=eps, momentum=momentum, affine=affine,
102 | track_running_stats=track_running_stats)
103 |
104 |
105 | class BatchNorm1d(BatchNormNd):
106 | def _init_module(self, x):
107 | self.args = (x.shape[1], *self.args)
108 | self.module = nn.BatchNorm1d(*self.args, **self.kwargs)
109 |
110 |
111 | class BatchNorm2d(BatchNormNd):
112 | def _init_module(self, x):
113 | self.args = (x.shape[1], *self.args)
114 | self.module = nn.BatchNorm2d(*self.args, **self.kwargs)
115 |
116 |
117 | class BatchNorm3d(BatchNormNd):
118 | def _init_module(self, x):
119 | self.args = (x.shape[1], *self.args)
120 | self.module = nn.BatchNorm3d(*self.args, **self.kwargs)
121 |
122 |
123 | class GroupNorm(ModuleAtom):
124 | def __init__(self, num_groups, eps=1e-5, affine=True):
125 | super(GroupNorm, self).__init__(num_groups, eps=eps, affine=affine)
126 |
127 | def _init_module(self, x):
128 | num_groups = self.args[0]
129 | if num_groups < 0:
130 | assert x.shape[1] % num_groups == 0
131 | num_groups = x.shape[1] // -num_groups
132 | self.args = (num_groups, x.shape[1])
133 | self.module = nn.GroupNorm(*self.args, **self.kwargs)
134 |
135 |
136 | class InstanceNormNd(ModuleAtom):
137 | def __init__(self, eps=1e-5, momentum=0.1, affine=False, track_running_stats=False):
138 | super(InstanceNormNd, self).__init__(eps=eps, momentum=momentum, affine=affine,
139 | track_running_stats=track_running_stats)
140 |
141 |
142 | class InstanceNorm1d(InstanceNormNd):
143 | def _init_module(self, x):
144 | self.args = (x.shape[1], *self.args)
145 | self.module = nn.InstanceNorm1d(*self.args, **self.kwargs)
146 |
147 |
148 | class InstanceNorm2d(InstanceNormNd):
149 | def _init_module(self, x):
150 | self.args = (x.shape[1], *self.args)
151 | self.module = nn.InstanceNorm2d(*self.args, **self.kwargs)
152 |
153 |
154 | class InstanceNorm3d(InstanceNormNd):
155 | def _init_module(self, x):
156 | self.args = (x.shape[1], *self.args)
157 | self.module = nn.InstanceNorm3d(*self.args, **self.kwargs)
158 |
159 |
160 | class LayerNorm(ModuleAtom):
161 | def __init__(self, num_last_dimensions, *args, **kwargs):
162 | super(LayerNorm, self).__init__(num_last_dimensions, *args, **kwargs)
163 |
164 | def _init_module(self, x):
165 | self.args = (x.shape[-self.args[0]:],)
166 | self.module = nn.LayerNorm(*self.args, **self.kwargs)
167 |
--------------------------------------------------------------------------------