├── .gitignore ├── DOTA_devkit ├── DOTA.py ├── DOTA2COCO.py ├── ImgSplit.py ├── ImgSplit_multi_process.py ├── ImgSplit_multi_process_bak.py ├── ResultMerge.py ├── ResultMerge_multi_process.py ├── SplitOnlyImage.py ├── SplitOnlyImage_multi_process.py ├── _polyiou.cpython-37m-x86_64-linux-gnu.so ├── demo.ipynb ├── dota-v1.5_evaluation_task1.py ├── dota-v1.5_evaluation_task2.py ├── dota_evaluation_task1.py ├── dota_evaluation_task2.py ├── dota_poly2rbox.py ├── dota_utils.py ├── mAOE_evaluation.py ├── poly_nms_gpu │ ├── Makefile │ ├── nms_wrapper.py │ ├── poly_nms.hpp │ ├── poly_nms.pyx │ └── poly_nms_kernel.cu ├── polyiou.cpp ├── polyiou.h ├── polyiou.i ├── polyiou.py ├── polyiou_wrap.cxx └── setup.py ├── LICENSE ├── README.md ├── datasets ├── DotaDataset.py └── HRSCDataset.py ├── evaluate.py ├── images └── rotation_invarience.png ├── nets ├── resnet_dcn_DFPN.py └── resnet_dcn_DFPN_model.py ├── result ├── DOTA_results.pdf ├── HRSC2016_results.pdf ├── P0007.png ├── UCAS_AOD_results.pdf ├── hrsc_testID.txt └── testID.txt ├── scripts ├── evaluate_dota.sh ├── evaluate_hrsc_ms.sh ├── evaluate_hrsc_ss.sh ├── train_dota.sh ├── train_hrsc.sh └── visualize.sh ├── train_dota.py ├── train_hrsc.py └── utils ├── Gaussian_focal_loss.py ├── angle_coders.py ├── aug.py ├── smooth_label.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | checkpoint/ 2 | result/test*/ 3 | **/__pycache__ 4 | DOTA_devkit/build/ -------------------------------------------------------------------------------- /DOTA_devkit/DOTA.py: -------------------------------------------------------------------------------- 1 | #The code is used for visulization, inspired from cocoapi 2 | # Licensed under the Simplified BSD License [see bsd.txt] 3 | 4 | import os 5 | import matplotlib.pyplot as plt 6 | from matplotlib.collections import PatchCollection 7 | from matplotlib.patches import Polygon, Circle 8 | import numpy as np 9 | import dota_utils as util 10 | from collections import defaultdict 11 | import cv2 12 | 13 | def _isArrayLike(obj): 14 | if type(obj) == str: 15 | return False 16 | return hasattr(obj, '__iter__') and hasattr(obj, '__len__') 17 | 18 | class DOTA: 19 | def __init__(self, basepath): 20 | self.basepath = basepath 21 | self.labelpath = os.path.join(basepath, 'labelTxt') 22 | self.imagepath = os.path.join(basepath, 'images') 23 | self.imgpaths = util.GetFileFromThisRootDir(self.labelpath) 24 | self.imglist = [util.custombasename(x) for x in self.imgpaths] 25 | self.catToImgs = defaultdict(list) 26 | self.ImgToAnns = defaultdict(list) 27 | self.createIndex() 28 | 29 | def createIndex(self): 30 | for filename in self.imgpaths: 31 | objects = util.parse_dota_poly(filename) 32 | imgid = util.custombasename(filename) 33 | self.ImgToAnns[imgid] = objects 34 | for obj in objects: 35 | cat = obj['name'] 36 | self.catToImgs[cat].append(imgid) 37 | 38 | def getImgIds(self, catNms=[]): 39 | """ 40 | :param catNms: category names 41 | :return: all the image ids contain the categories 42 | """ 43 | catNms = catNms if _isArrayLike(catNms) else [catNms] 44 | if len(catNms) == 0: 45 | return self.imglist 46 | else: 47 | imgids = [] 48 | for i, cat in enumerate(catNms): 49 | if i == 0: 50 | imgids = set(self.catToImgs[cat]) 51 | else: 52 | imgids &= set(self.catToImgs[cat]) 53 | return list(imgids) 54 | 55 | def loadAnns(self, catNms=[], imgId = None, difficult=None): 56 | """ 57 | :param catNms: category names 58 | :param imgId: the img to load anns 59 | :return: objects 60 | """ 61 | catNms = catNms if _isArrayLike(catNms) else [catNms] 62 | objects = self.ImgToAnns[imgId] 63 | if len(catNms) == 0: 64 | return objects 65 | outobjects = [obj for obj in objects if (obj['name'] in catNms)] 66 | return outobjects 67 | def showAnns(self, objects, imgId, range): 68 | """ 69 | :param catNms: category names 70 | :param objects: objects to show 71 | :param imgId: img to show 72 | :param range: display range in the img 73 | :return: 74 | """ 75 | img = self.loadImgs(imgId)[0] 76 | plt.imshow(img) 77 | plt.axis('off') 78 | 79 | ax = plt.gca() 80 | ax.set_autoscale_on(False) 81 | polygons = [] 82 | color = [] 83 | circles = [] 84 | r = 5 85 | for obj in objects: 86 | c = (np.random.random((1, 3)) * 0.6 + 0.4).tolist()[0] 87 | poly = obj['poly'] 88 | polygons.append(Polygon(poly)) 89 | color.append(c) 90 | point = poly[0] 91 | circle = Circle((point[0], point[1]), r) 92 | circles.append(circle) 93 | p = PatchCollection(polygons, facecolors=color, linewidths=0, alpha=0.4) 94 | ax.add_collection(p) 95 | p = PatchCollection(polygons, facecolors='none', edgecolors=color, linewidths=2) 96 | ax.add_collection(p) 97 | p = PatchCollection(circles, facecolors='red') 98 | ax.add_collection(p) 99 | def loadImgs(self, imgids=[]): 100 | """ 101 | :param imgids: integer ids specifying img 102 | :return: loaded img objects 103 | """ 104 | #print('isarralike:', _isArrayLike(imgids)) 105 | imgids = imgids if _isArrayLike(imgids) else [imgids] 106 | #print('imgids:', imgids) 107 | imgs = [] 108 | for imgid in imgids: 109 | filename = os.path.join(self.imagepath, imgid + '.png') 110 | #print('filename:', filename) 111 | img = cv2.imread(filename) 112 | imgs.append(img) 113 | return imgs 114 | 115 | # if __name__ == '__main__': 116 | # examplesplit = DOTA('examplesplit') 117 | # imgids = examplesplit.getImgIds(catNms=['plane']) 118 | # img = examplesplit.loadImgs(imgids) 119 | # for imgid in imgids: 120 | # anns = examplesplit.loadAnns(imgId=imgid) 121 | # examplesplit.showAnns(anns, imgid, 2) -------------------------------------------------------------------------------- /DOTA_devkit/DOTA2COCO.py: -------------------------------------------------------------------------------- 1 | import dota_utils as util 2 | import os 3 | import cv2 4 | import json 5 | 6 | wordname_15 = ['plane', 'baseball-diamond', 'bridge', 'ground-track-field', 'small-vehicle', 'large-vehicle', 'ship', 'tennis-court', 7 | 'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter'] 8 | 9 | def DOTA2COCO(srcpath, destfile): 10 | imageparent = os.path.join(srcpath, 'images') 11 | labelparent = os.path.join(srcpath, 'labelTxt') 12 | 13 | data_dict = {} 14 | info = {'contributor': 'captain group', 15 | 'data_created': '2018', 16 | 'description': 'This is 1.0 version of DOTA dataset.', 17 | 'url': 'http://captain.whu.edu.cn/DOTAweb/', 18 | 'version': '1.0', 19 | 'year': 2018} 20 | data_dict['info'] = info 21 | data_dict['images'] = [] 22 | data_dict['categories'] = [] 23 | data_dict['annotations'] = [] 24 | for idex, name in enumerate(wordname_15): 25 | single_cat = {'id': idex + 1, 'name': name, 'supercategory': name} 26 | data_dict['categories'].append(single_cat) 27 | 28 | inst_count = 1 29 | image_id = 1 30 | with open(destfile, 'w') as f_out: 31 | filenames = util.GetFileFromThisRootDir(labelparent) 32 | for file in filenames: 33 | basename = util.custombasename(file) 34 | # image_id = int(basename[1:]) 35 | 36 | imagepath = os.path.join(imageparent, basename + '.png') 37 | img = cv2.imread(imagepath) 38 | height, width, c = img.shape 39 | 40 | single_image = {} 41 | single_image['file_name'] = basename + '.png' 42 | single_image['id'] = image_id 43 | single_image['width'] = width 44 | single_image['height'] = height 45 | data_dict['images'].append(single_image) 46 | 47 | # annotations 48 | objects = util.parse_dota_poly2(file) 49 | for obj in objects: 50 | single_obj = {} 51 | single_obj['area'] = obj['area'] 52 | single_obj['category_id'] = wordname_15.index(obj['name']) + 1 53 | single_obj['segmentation'] = [] 54 | single_obj['segmentation'].append(obj['poly']) 55 | single_obj['iscrowd'] = 0 56 | xmin, ymin, xmax, ymax = min(obj['poly'][0::2]), min(obj['poly'][1::2]), \ 57 | max(obj['poly'][0::2]), max(obj['poly'][1::2]) 58 | 59 | width, height = xmax - xmin, ymax - ymin 60 | single_obj['bbox'] = xmin, ymin, width, height 61 | single_obj['image_id'] = image_id 62 | data_dict['annotations'].append(single_obj) 63 | single_obj['id'] = inst_count 64 | inst_count = inst_count + 1 65 | image_id = image_id + 1 66 | json.dump(data_dict, f_out) 67 | if __name__ == '__main__': 68 | DOTA2COCO(r'/data0/data_dj/1024_new', r'/data0/data_dj/1024_new/DOTA_trainval1024.json') 69 | -------------------------------------------------------------------------------- /DOTA_devkit/ImgSplit.py: -------------------------------------------------------------------------------- 1 | import os 2 | import codecs 3 | import numpy as np 4 | import math 5 | from dota_utils import GetFileFromThisRootDir 6 | import cv2 7 | import shapely.geometry as shgeo 8 | import dota_utils as util 9 | import copy 10 | 11 | def choose_best_pointorder_fit_another(poly1, poly2): 12 | """ 13 | To make the two polygons best fit with each point 14 | """ 15 | x1 = poly1[0] 16 | y1 = poly1[1] 17 | x2 = poly1[2] 18 | y2 = poly1[3] 19 | x3 = poly1[4] 20 | y3 = poly1[5] 21 | x4 = poly1[6] 22 | y4 = poly1[7] 23 | combinate = [np.array([x1, y1, x2, y2, x3, y3, x4, y4]), np.array([x2, y2, x3, y3, x4, y4, x1, y1]), 24 | np.array([x3, y3, x4, y4, x1, y1, x2, y2]), np.array([x4, y4, x1, y1, x2, y2, x3, y3])] 25 | dst_coordinate = np.array(poly2) 26 | distances = np.array([np.sum((coord - dst_coordinate)**2) for coord in combinate]) 27 | sorted = distances.argsort() 28 | return combinate[sorted[0]] 29 | 30 | def cal_line_length(point1, point2): 31 | return math.sqrt( math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1], 2)) 32 | 33 | 34 | class splitbase(): 35 | def __init__(self, 36 | basepath, 37 | outpath, 38 | code = 'utf-8', 39 | gap=256, 40 | subsize=1024, 41 | thresh=0.7, 42 | choosebestpoint=True, 43 | ext = '.png' 44 | ): 45 | """ 46 | :param basepath: base path for dota data 47 | :param outpath: output base path for dota data, 48 | the basepath and outputpath have the similar subdirectory, 'images' and 'labelTxt' 49 | :param code: encodeing format of txt file 50 | :param gap: overlap between two patches 51 | :param subsize: subsize of patch 52 | :param thresh: the thresh determine whether to keep the instance if the instance is cut down in the process of split 53 | :param choosebestpoint: used to choose the first point for the 54 | :param ext: ext for the image format 55 | """ 56 | self.basepath = basepath 57 | self.outpath = outpath 58 | self.code = code 59 | self.gap = gap 60 | self.subsize = subsize 61 | self.slide = self.subsize - self.gap 62 | self.thresh = thresh 63 | self.imagepath = os.path.join(self.basepath, 'images') 64 | self.labelpath = os.path.join(self.basepath, 'labelTxt') 65 | self.outimagepath = os.path.join(self.outpath, 'images') 66 | self.outlabelpath = os.path.join(self.outpath, 'labelTxt') 67 | self.choosebestpoint = choosebestpoint 68 | self.ext = ext 69 | if not os.path.exists(self.outimagepath): 70 | os.makedirs(self.outimagepath) 71 | if not os.path.exists(self.outlabelpath): 72 | os.makedirs(self.outlabelpath) 73 | 74 | ## point: (x, y), rec: (xmin, ymin, xmax, ymax) 75 | # def __del__(self): 76 | # self.f_sub.close() 77 | ## grid --> (x, y) position of grids 78 | def polyorig2sub(self, left, up, poly): 79 | polyInsub = np.zeros(len(poly)) 80 | for i in range(int(len(poly)/2)): 81 | polyInsub[i * 2] = int(poly[i * 2] - left) 82 | polyInsub[i * 2 + 1] = int(poly[i * 2 + 1] - up) 83 | return polyInsub 84 | 85 | def calchalf_iou(self, poly1, poly2): 86 | """ 87 | It is not the iou on usual, the iou is the value of intersection over poly1 88 | """ 89 | inter_poly = poly1.intersection(poly2) 90 | inter_area = inter_poly.area 91 | poly1_area = poly1.area 92 | half_iou = inter_area / poly1_area 93 | return inter_poly, half_iou 94 | 95 | def saveimagepatches(self, img, subimgname, left, up): 96 | subimg = copy.deepcopy(img[up: (up + self.subsize), left: (left + self.subsize)]) 97 | outdir = os.path.join(self.outimagepath, subimgname + self.ext) 98 | cv2.imwrite(outdir, subimg) 99 | 100 | def GetPoly4FromPoly5(self, poly): 101 | distances = [cal_line_length((poly[i * 2], poly[i * 2 + 1] ), (poly[(i + 1) * 2], poly[(i + 1) * 2 + 1])) for i in range(int(len(poly)/2 - 1))] 102 | distances.append(cal_line_length((poly[0], poly[1]), (poly[8], poly[9]))) 103 | pos = np.array(distances).argsort()[0] 104 | count = 0 105 | outpoly = [] 106 | while count < 5: 107 | #print('count:', count) 108 | if (count == pos): 109 | outpoly.append((poly[count * 2] + poly[(count * 2 + 2)%10])/2) 110 | outpoly.append((poly[(count * 2 + 1)%10] + poly[(count * 2 + 3)%10])/2) 111 | count = count + 1 112 | elif (count == (pos + 1)%5): 113 | count = count + 1 114 | continue 115 | 116 | else: 117 | outpoly.append(poly[count * 2]) 118 | outpoly.append(poly[count * 2 + 1]) 119 | count = count + 1 120 | return outpoly 121 | 122 | def savepatches(self, resizeimg, objects, subimgname, left, up, right, down): 123 | outdir = os.path.join(self.outlabelpath, subimgname + '.txt') 124 | mask_poly = [] 125 | imgpoly = shgeo.Polygon([(left, up), (right, up), (right, down), 126 | (left, down)]) 127 | with codecs.open(outdir, 'w', self.code) as f_out: 128 | for obj in objects: 129 | gtpoly = shgeo.Polygon([(obj['poly'][0], obj['poly'][1]), 130 | (obj['poly'][2], obj['poly'][3]), 131 | (obj['poly'][4], obj['poly'][5]), 132 | (obj['poly'][6], obj['poly'][7])]) 133 | if (gtpoly.area <= 0): 134 | continue 135 | inter_poly, half_iou = self.calchalf_iou(gtpoly, imgpoly) 136 | 137 | # print('writing...') 138 | if (half_iou == 1): 139 | polyInsub = self.polyorig2sub(left, up, obj['poly']) 140 | outline = ' '.join(list(map(str, polyInsub))) 141 | outline = outline + ' ' + obj['name'] + ' ' + str(obj['difficult']) 142 | f_out.write(outline + '\n') 143 | elif (half_iou > 0): 144 | #elif (half_iou > self.thresh): 145 | ## print('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<') 146 | inter_poly = shgeo.polygon.orient(inter_poly, sign=1) 147 | out_poly = list(inter_poly.exterior.coords)[0: -1] 148 | if len(out_poly) < 4: 149 | continue 150 | 151 | out_poly2 = [] 152 | for i in range(len(out_poly)): 153 | out_poly2.append(out_poly[i][0]) 154 | out_poly2.append(out_poly[i][1]) 155 | 156 | if (len(out_poly) == 5): 157 | #print('==========================') 158 | out_poly2 = self.GetPoly4FromPoly5(out_poly2) 159 | elif (len(out_poly) > 5): 160 | """ 161 | if the cut instance is a polygon with points more than 5, we do not handle it currently 162 | """ 163 | continue 164 | if (self.choosebestpoint): 165 | out_poly2 = choose_best_pointorder_fit_another(out_poly2, obj['poly']) 166 | 167 | polyInsub = self.polyorig2sub(left, up, out_poly2) 168 | 169 | for index, item in enumerate(polyInsub): 170 | if (item <= 1): 171 | polyInsub[index] = 1 172 | elif (item >= self.subsize): 173 | polyInsub[index] = self.subsize 174 | outline = ' '.join(list(map(str, polyInsub))) 175 | if (half_iou > self.thresh): 176 | outline = outline + ' ' + obj['name'] + ' ' + str(obj['difficult']) 177 | else: 178 | ## if the left part is too small, label as '2' 179 | outline = outline + ' ' + obj['name'] + ' ' + '2' 180 | f_out.write(outline + '\n') 181 | #else: 182 | # mask_poly.append(inter_poly) 183 | self.saveimagepatches(resizeimg, subimgname, left, up) 184 | 185 | def SplitSingle(self, name, rate, extent): 186 | """ 187 | split a single image and ground truth 188 | :param name: image name 189 | :param rate: the resize scale for the image 190 | :param extent: the image format 191 | :return: 192 | """ 193 | img = cv2.imread(os.path.join(self.imagepath, name + extent)) 194 | if np.shape(img) == (): 195 | return 196 | fullname = os.path.join(self.labelpath, name + '.txt') 197 | objects = util.parse_dota_poly2(fullname) 198 | for obj in objects: 199 | obj['poly'] = list(map(lambda x:rate*x, obj['poly'])) 200 | #obj['poly'] = list(map(lambda x: ([2 * y for y in x]), obj['poly'])) 201 | 202 | if (rate != 1): 203 | resizeimg = cv2.resize(img, None, fx=rate, fy=rate, interpolation = cv2.INTER_CUBIC) 204 | else: 205 | resizeimg = img 206 | outbasename = name + '__' + str(rate) + '__' 207 | weight = np.shape(resizeimg)[1] 208 | height = np.shape(resizeimg)[0] 209 | 210 | left, up = 0, 0 211 | while (left < weight): 212 | if (left + self.subsize >= weight): 213 | left = max(weight - self.subsize, 0) 214 | up = 0 215 | while (up < height): 216 | if (up + self.subsize >= height): 217 | up = max(height - self.subsize, 0) 218 | right = min(left + self.subsize, weight - 1) 219 | down = min(up + self.subsize, height - 1) 220 | subimgname = outbasename + str(left) + '___' + str(up) 221 | # self.f_sub.write(name + ' ' + subimgname + ' ' + str(left) + ' ' + str(up) + '\n') 222 | self.savepatches(resizeimg, objects, subimgname, left, up, right, down) 223 | if (up + self.subsize >= height): 224 | break 225 | else: 226 | up = up + self.slide 227 | if (left + self.subsize >= weight): 228 | break 229 | else: 230 | left = left + self.slide 231 | 232 | def splitdata(self, rate): 233 | """ 234 | :param rate: resize rate before cut 235 | """ 236 | imagelist = GetFileFromThisRootDir(self.imagepath) 237 | imagenames = [util.custombasename(x) for x in imagelist if (util.custombasename(x) != 'Thumbs')] 238 | for name in imagenames: 239 | self.SplitSingle(name, rate, self.ext) 240 | 241 | if __name__ == '__main__': 242 | # example usage of ImgSplit 243 | split = splitbase(r'./datasets/DOTA/train', 244 | r'./datasets/DOTA/trainvalsplit-1024-256') 245 | split.splitdata(1) -------------------------------------------------------------------------------- /DOTA_devkit/ResultMerge.py: -------------------------------------------------------------------------------- 1 | """ 2 | To use the code, users should to config detpath, annopath and imagesetfile 3 | detpath is the path for 15 result files, for the format, you can refer to "http://captain.whu.edu.cn/DOTAweb/tasks.html" 4 | search for PATH_TO_BE_CONFIGURED to config the paths 5 | Note, the evaluation is on the large scale images 6 | """ 7 | import os 8 | import numpy as np 9 | import dota_utils as util 10 | import re 11 | import time 12 | import polyiou 13 | 14 | ## the thresh for nms when merge image 15 | nms_thresh = 0.3 16 | 17 | def py_cpu_nms_poly(dets, thresh): 18 | scores = dets[:, 8] 19 | polys = [] 20 | areas = [] 21 | for i in range(len(dets)): 22 | tm_polygon = polyiou.VectorDouble([dets[i][0], dets[i][1], 23 | dets[i][2], dets[i][3], 24 | dets[i][4], dets[i][5], 25 | dets[i][6], dets[i][7]]) 26 | polys.append(tm_polygon) 27 | order = scores.argsort()[::-1] 28 | 29 | keep = [] 30 | while order.size > 0: 31 | ovr = [] 32 | i = order[0] 33 | keep.append(i) 34 | for j in range(order.size - 1): 35 | iou = polyiou.iou_poly(polys[i], polys[order[j + 1]]) 36 | ovr.append(iou) 37 | ovr = np.array(ovr) 38 | inds = np.where(ovr <= thresh)[0] 39 | order = order[inds + 1] 40 | return keep 41 | 42 | def py_cpu_nms(dets, thresh): 43 | """Pure Python NMS baseline.""" 44 | #print('dets:', dets) 45 | x1 = dets[:, 0] 46 | y1 = dets[:, 1] 47 | x2 = dets[:, 2] 48 | y2 = dets[:, 3] 49 | scores = dets[:, 4] 50 | 51 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 52 | ## index for dets 53 | order = scores.argsort()[::-1] 54 | 55 | keep = [] 56 | while order.size > 0: 57 | i = order[0] 58 | keep.append(i) 59 | xx1 = np.maximum(x1[i], x1[order[1:]]) 60 | yy1 = np.maximum(y1[i], y1[order[1:]]) 61 | xx2 = np.minimum(x2[i], x2[order[1:]]) 62 | yy2 = np.minimum(y2[i], y2[order[1:]]) 63 | 64 | w = np.maximum(0.0, xx2 - xx1 + 1) 65 | h = np.maximum(0.0, yy2 - yy1 + 1) 66 | inter = w * h 67 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 68 | 69 | inds = np.where(ovr <= thresh)[0] 70 | order = order[inds + 1] 71 | 72 | return keep 73 | 74 | def nmsbynamedict(nameboxdict, nms, thresh): 75 | nameboxnmsdict = {x: [] for x in nameboxdict} 76 | for imgname in nameboxdict: 77 | #print('imgname:', imgname) 78 | #keep = py_cpu_nms(np.array(nameboxdict[imgname]), thresh) 79 | #print('type nameboxdict:', type(nameboxnmsdict)) 80 | #print('type imgname:', type(imgname)) 81 | #print('type nms:', type(nms)) 82 | keep = nms(np.array(nameboxdict[imgname]), thresh) 83 | #print('keep:', keep) 84 | outdets = [] 85 | #print('nameboxdict[imgname]: ', nameboxnmsdict[imgname]) 86 | for index in keep: 87 | # print('index:', index) 88 | outdets.append(nameboxdict[imgname][index]) 89 | nameboxnmsdict[imgname] = outdets 90 | return nameboxnmsdict 91 | def poly2origpoly(poly, x, y, rate): 92 | origpoly = [] 93 | for i in range(int(len(poly)/2)): 94 | tmp_x = float(poly[i * 2] + x) / float(rate) 95 | tmp_y = float(poly[i * 2 + 1] + y) / float(rate) 96 | origpoly.append(tmp_x) 97 | origpoly.append(tmp_y) 98 | return origpoly 99 | 100 | def mergebase(srcpath, dstpath, nms): 101 | filelist = util.GetFileFromThisRootDir(srcpath) 102 | for fullname in filelist: 103 | name = util.custombasename(fullname) 104 | #print('name:', name) 105 | dstname = os.path.join(dstpath, name + '.txt') 106 | with open(fullname, 'r') as f_in: 107 | nameboxdict = {} 108 | lines = f_in.readlines() 109 | splitlines = [x.strip().split(' ') for x in lines] 110 | for splitline in splitlines: 111 | subname = splitline[0] 112 | splitname = subname.split('__') 113 | oriname = splitname[0] 114 | pattern1 = re.compile(r'__\d+___\d+') 115 | #print('subname:', subname) 116 | x_y = re.findall(pattern1, subname) 117 | x_y_2 = re.findall(r'\d+', x_y[0]) 118 | x, y = int(x_y_2[0]), int(x_y_2[1]) 119 | 120 | pattern2 = re.compile(r'__([\d+\.]+)__\d+___') 121 | 122 | rate = re.findall(pattern2, subname)[0] 123 | 124 | confidence = splitline[1] 125 | poly = list(map(float, splitline[2:])) 126 | origpoly = poly2origpoly(poly, x, y, rate) 127 | det = origpoly 128 | det.append(confidence) 129 | det = list(map(float, det)) 130 | if (oriname not in nameboxdict): 131 | nameboxdict[oriname] = [] 132 | nameboxdict[oriname].append(det) 133 | nameboxnmsdict = nmsbynamedict(nameboxdict, nms, nms_thresh) 134 | with open(dstname, 'w') as f_out: 135 | for imgname in nameboxnmsdict: 136 | for det in nameboxnmsdict[imgname]: 137 | #print('det:', det) 138 | confidence = det[-1] 139 | bbox = det[0:-1] 140 | outline = imgname + ' ' + str(confidence) + ' ' + ' '.join(map(str, bbox)) 141 | #print('outline:', outline) 142 | f_out.write(outline + '\n') 143 | def mergebyrec(srcpath, dstpath): 144 | """ 145 | srcpath: result files before merge and nms 146 | dstpath: result files after merge and nms 147 | """ 148 | # srcpath = r'E:\bod-dataset\results\bod-v3_rfcn_2000000' 149 | # dstpath = r'E:\bod-dataset\results\bod-v3_rfcn_2000000_nms' 150 | 151 | mergebase(srcpath, 152 | dstpath, 153 | py_cpu_nms) 154 | def mergebypoly(srcpath, dstpath): 155 | """ 156 | srcpath: result files before merge and nms 157 | dstpath: result files after merge and nms 158 | """ 159 | # srcpath = r'/home/dingjian/evaluation_task1/result/faster-rcnn-59/comp4_test_results' 160 | # dstpath = r'/home/dingjian/evaluation_task1/result/faster-rcnn-59/testtime' 161 | 162 | mergebase(srcpath, 163 | dstpath, 164 | py_cpu_nms_poly) 165 | if __name__ == '__main__': 166 | # see demo for example 167 | mergebypoly(r'path_to_configure', r'path_to_configure') 168 | # mergebyrec() -------------------------------------------------------------------------------- /DOTA_devkit/ResultMerge_multi_process.py: -------------------------------------------------------------------------------- 1 | """ 2 | To use the code, users should to config detpath, annopath and imagesetfile 3 | detpath is the path for 15 result files, for the format, you can refer to "http://captain.whu.edu.cn/DOTAweb/tasks.html" 4 | search for PATH_TO_BE_CONFIGURED to config the paths 5 | Note, the evaluation is on the large scale images 6 | """ 7 | import os 8 | import numpy as np 9 | import re 10 | import time 11 | import sys 12 | sys.path.insert(0,'..') 13 | try: 14 | import dota_utils as util 15 | except: 16 | import dota_kit.dota_utils as util 17 | import polyiou 18 | import pdb 19 | import math 20 | from multiprocessing import Pool 21 | from functools import partial 22 | 23 | ## the thresh for nms when merge image 24 | nms_thresh = 0.3 25 | 26 | def py_cpu_nms_poly(dets, thresh): 27 | scores = dets[:, 8] 28 | polys = [] 29 | areas = [] 30 | for i in range(len(dets)): 31 | tm_polygon = polyiou.VectorDouble([dets[i][0], dets[i][1], 32 | dets[i][2], dets[i][3], 33 | dets[i][4], dets[i][5], 34 | dets[i][6], dets[i][7]]) 35 | polys.append(tm_polygon) 36 | order = scores.argsort()[::-1] 37 | 38 | keep = [] 39 | while order.size > 0: 40 | ovr = [] 41 | i = order[0] 42 | keep.append(i) 43 | for j in range(order.size - 1): 44 | iou = polyiou.iou_poly(polys[i], polys[order[j + 1]]) 45 | ovr.append(iou) 46 | ovr = np.array(ovr) 47 | 48 | # print('ovr: ', ovr) 49 | # print('thresh: ', thresh) 50 | try: 51 | if math.isnan(ovr[0]): 52 | pdb.set_trace() 53 | except: 54 | pass 55 | inds = np.where(ovr <= thresh)[0] 56 | # print('inds: ', inds) 57 | 58 | order = order[inds + 1] 59 | 60 | return keep 61 | 62 | 63 | def py_cpu_nms_poly_fast(dets, thresh): 64 | obbs = dets[:, 0:-1] 65 | x1 = np.min(obbs[:, 0::2], axis=1) 66 | y1 = np.min(obbs[:, 1::2], axis=1) 67 | x2 = np.max(obbs[:, 0::2], axis=1) 68 | y2 = np.max(obbs[:, 1::2], axis=1) 69 | scores = dets[:, 8] 70 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 71 | 72 | polys = [] 73 | for i in range(len(dets)): 74 | tm_polygon = polyiou.VectorDouble([dets[i][0], dets[i][1], 75 | dets[i][2], dets[i][3], 76 | dets[i][4], dets[i][5], 77 | dets[i][6], dets[i][7]]) 78 | polys.append(tm_polygon) 79 | order = scores.argsort()[::-1] 80 | 81 | keep = [] 82 | while order.size > 0: 83 | ovr = [] 84 | i = order[0] 85 | keep.append(i) 86 | # if order.size == 0: 87 | # break 88 | xx1 = np.maximum(x1[i], x1[order[1:]]) 89 | yy1 = np.maximum(y1[i], y1[order[1:]]) 90 | xx2 = np.minimum(x2[i], x2[order[1:]]) 91 | yy2 = np.minimum(y2[i], y2[order[1:]]) 92 | # w = np.maximum(0.0, xx2 - xx1 + 1) 93 | # h = np.maximum(0.0, yy2 - yy1 + 1) 94 | w = np.maximum(0.0, xx2 - xx1) 95 | h = np.maximum(0.0, yy2 - yy1) 96 | hbb_inter = w * h 97 | hbb_ovr = hbb_inter / (areas[i] + areas[order[1:]] - hbb_inter) 98 | # h_keep_inds = np.where(hbb_ovr == 0)[0] 99 | h_inds = np.where(hbb_ovr > 0)[0] 100 | tmp_order = order[h_inds + 1] 101 | for j in range(tmp_order.size): 102 | iou = polyiou.iou_poly(polys[i], polys[tmp_order[j]]) 103 | hbb_ovr[h_inds[j]] = iou 104 | # ovr.append(iou) 105 | # ovr_index.append(tmp_order[j]) 106 | 107 | # ovr = np.array(ovr) 108 | # ovr_index = np.array(ovr_index) 109 | # print('ovr: ', ovr) 110 | # print('thresh: ', thresh) 111 | try: 112 | if math.isnan(ovr[0]): 113 | pdb.set_trace() 114 | except: 115 | pass 116 | inds = np.where(hbb_ovr <= thresh)[0] 117 | 118 | # order_obb = ovr_index[inds] 119 | # print('inds: ', inds) 120 | # order_hbb = order[h_keep_inds + 1] 121 | order = order[inds + 1] 122 | # pdb.set_trace() 123 | # order = np.concatenate((order_obb, order_hbb), axis=0).astype(np.int) 124 | return keep 125 | 126 | def py_cpu_nms(dets, thresh): 127 | """Pure Python NMS baseline.""" 128 | #print('dets:', dets) 129 | x1 = dets[:, 0] 130 | y1 = dets[:, 1] 131 | x2 = dets[:, 2] 132 | y2 = dets[:, 3] 133 | scores = dets[:, 4] 134 | 135 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 136 | ## index for dets 137 | order = scores.argsort()[::-1] 138 | 139 | 140 | keep = [] 141 | while order.size > 0: 142 | i = order[0] 143 | keep.append(i) 144 | xx1 = np.maximum(x1[i], x1[order[1:]]) 145 | yy1 = np.maximum(y1[i], y1[order[1:]]) 146 | xx2 = np.minimum(x2[i], x2[order[1:]]) 147 | yy2 = np.minimum(y2[i], y2[order[1:]]) 148 | 149 | w = np.maximum(0.0, xx2 - xx1 + 1) 150 | h = np.maximum(0.0, yy2 - yy1 + 1) 151 | inter = w * h 152 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 153 | 154 | inds = np.where(ovr <= thresh)[0] 155 | order = order[inds + 1] 156 | 157 | return keep 158 | 159 | def nmsbynamedict(nameboxdict, nms, thresh): 160 | nameboxnmsdict = {x: [] for x in nameboxdict} 161 | for imgname in nameboxdict: 162 | #print('imgname:', imgname) 163 | #keep = py_cpu_nms(np.array(nameboxdict[imgname]), thresh) 164 | #print('type nameboxdict:', type(nameboxnmsdict)) 165 | #print('type imgname:', type(imgname)) 166 | #print('type nms:', type(nms)) 167 | keep = nms(np.array(nameboxdict[imgname]), thresh) 168 | #print('keep:', keep) 169 | outdets = [] 170 | #print('nameboxdict[imgname]: ', nameboxnmsdict[imgname]) 171 | for index in keep: 172 | # print('index:', index) 173 | outdets.append(nameboxdict[imgname][index]) 174 | nameboxnmsdict[imgname] = outdets 175 | return nameboxnmsdict 176 | def poly2origpoly(poly, x, y, rate): 177 | origpoly = [] 178 | for i in range(int(len(poly)/2)): 179 | tmp_x = float(poly[i * 2] + x) / float(rate) 180 | tmp_y = float(poly[i * 2 + 1] + y) / float(rate) 181 | origpoly.append(tmp_x) 182 | origpoly.append(tmp_y) 183 | return origpoly 184 | 185 | def mergesingle(dstpath, nms, fullname): 186 | name = util.custombasename(fullname) 187 | #print('name:', name) 188 | dstname = os.path.join(dstpath, name + '.txt') 189 | with open(fullname, 'r') as f_in: 190 | nameboxdict = {} 191 | lines = f_in.readlines() 192 | splitlines = [x.strip().split(' ') for x in lines] 193 | for splitline in splitlines: 194 | subname = splitline[0] 195 | splitname = subname.split('__') 196 | oriname = splitname[0] 197 | pattern1 = re.compile(r'__\d+___\d+') 198 | #print('subname:', subname) 199 | x_y = re.findall(pattern1, subname) 200 | x_y_2 = re.findall(r'\d+', x_y[0]) 201 | x, y = int(x_y_2[0]), int(x_y_2[1]) 202 | 203 | pattern2 = re.compile(r'__([\d+\.]+)__\d+___') 204 | 205 | rate = re.findall(pattern2, subname)[0] 206 | 207 | confidence = splitline[1] 208 | poly = list(map(float, splitline[2:])) 209 | origpoly = poly2origpoly(poly, x, y, rate) 210 | det = origpoly 211 | det.append(confidence) 212 | det = list(map(float, det)) 213 | if (oriname not in nameboxdict): 214 | nameboxdict[oriname] = [] 215 | nameboxdict[oriname].append(det) 216 | nameboxnmsdict = nmsbynamedict(nameboxdict, nms, nms_thresh) 217 | with open(dstname, 'w') as f_out: 218 | for imgname in nameboxnmsdict: 219 | for det in nameboxnmsdict[imgname]: 220 | #print('det:', det) 221 | confidence = det[-1] 222 | bbox = det[0:-1] 223 | outline = imgname + ' ' + str(confidence) + ' ' + ' '.join(map(str, bbox)) 224 | #print('outline:', outline) 225 | f_out.write(outline + '\n') 226 | 227 | def mergebase_parallel(srcpath, dstpath, nms): 228 | pool = Pool(16) 229 | filelist = util.GetFileFromThisRootDir(srcpath) 230 | 231 | mergesingle_fn = partial(mergesingle, dstpath, nms) 232 | # pdb.set_trace() 233 | pool.map(mergesingle_fn, filelist) 234 | 235 | def mergebase(srcpath, dstpath, nms): 236 | filelist = util.GetFileFromThisRootDir(srcpath) 237 | for filename in filelist: 238 | mergesingle(dstpath, nms, filename) 239 | 240 | def mergebyrec(srcpath, dstpath): 241 | """ 242 | srcpath: result files before merge and nms 243 | dstpath: result files after merge and nms 244 | """ 245 | # srcpath = r'E:\bod-dataset\results\bod-v3_rfcn_2000000' 246 | # dstpath = r'E:\bod-dataset\results\bod-v3_rfcn_2000000_nms' 247 | 248 | mergebase(srcpath, 249 | dstpath, 250 | py_cpu_nms) 251 | def mergebypoly(srcpath, dstpath): 252 | """ 253 | srcpath: result files before merge and nms 254 | dstpath: result files after merge and nms 255 | """ 256 | # srcpath = r'/home/dingjian/evaluation_task1/result/faster-rcnn-59/comp4_test_results' 257 | # dstpath = r'/home/dingjian/evaluation_task1/result/faster-rcnn-59/testtime' 258 | 259 | # mergebase(srcpath, 260 | # dstpath, 261 | # py_cpu_nms_poly) 262 | mergebase_parallel(srcpath, 263 | dstpath, 264 | py_cpu_nms_poly_fast) 265 | if __name__ == '__main__': 266 | mergebypoly(r'path_to_configure', r'path_to_configure') 267 | # mergebyrec() 268 | -------------------------------------------------------------------------------- /DOTA_devkit/SplitOnlyImage.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import cv2 4 | import copy 5 | import dota_utils as util 6 | 7 | class splitbase(): 8 | def __init__(self, 9 | srcpath, 10 | dstpath, 11 | gap=100, 12 | subsize=1024, 13 | ext='.png'): 14 | self.srcpath = srcpath 15 | self.outpath = dstpath 16 | self.gap = gap 17 | self.subsize = subsize 18 | self.slide = self.subsize - self.gap 19 | self.srcpath = srcpath 20 | self.dstpath = dstpath 21 | self.ext = ext 22 | def saveimagepatches(self, img, subimgname, left, up, ext='.png'): 23 | subimg = copy.deepcopy(img[up: (up + self.subsize), left: (left + self.subsize)]) 24 | outdir = os.path.join(self.dstpath, subimgname + ext) 25 | cv2.imwrite(outdir, subimg) 26 | 27 | def SplitSingle(self, name, rate, extent): 28 | img = cv2.imread(os.path.join(self.srcpath, name + extent)) 29 | assert np.shape(img) != () 30 | 31 | if (rate != 1): 32 | resizeimg = cv2.resize(img, None, fx=rate, fy=rate, interpolation = cv2.INTER_CUBIC) 33 | else: 34 | resizeimg = img 35 | outbasename = name + '__' + str(rate) + '__' 36 | 37 | weight = np.shape(resizeimg)[1] 38 | height = np.shape(resizeimg)[0] 39 | 40 | left, up = 0, 0 41 | while (left < weight): 42 | if (left + self.subsize >= weight): 43 | left = max(weight - self.subsize, 0) 44 | up = 0 45 | while (up < height): 46 | if (up + self.subsize >= height): 47 | up = max(height - self.subsize, 0) 48 | subimgname = outbasename + str(left) + '___' + str(up) 49 | self.saveimagepatches(resizeimg, subimgname, left, up) 50 | if (up + self.subsize >= height): 51 | break 52 | else: 53 | up = up + self.slide 54 | if (left + self.subsize >= weight): 55 | break 56 | else: 57 | left = left + self.slide 58 | 59 | def splitdata(self, rate): 60 | 61 | imagelist = util.GetFileFromThisRootDir(self.srcpath) 62 | imagenames = [util.custombasename(x) for x in imagelist if (util.custombasename(x) != 'Thumbs')] 63 | for name in imagenames: 64 | self.SplitSingle(name, rate, self.ext) 65 | if __name__ == '__main__': 66 | split = splitbase(r'example/images', 67 | r'example/imagesSplit') 68 | split.splitdata(1) -------------------------------------------------------------------------------- /DOTA_devkit/SplitOnlyImage_multi_process.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import cv2 4 | import copy 5 | import dota_utils as util 6 | from multiprocessing import Pool 7 | from functools import partial 8 | 9 | 10 | def split_single_warp(name, split_base, rate, extent): 11 | split_base.SplitSingle(name, rate, extent) 12 | class splitbase(): 13 | def __init__(self, 14 | srcpath, 15 | dstpath, 16 | gap=100, 17 | subsize=1024, 18 | ext='.png', 19 | padding=True, 20 | num_process=32): 21 | self.srcpath = srcpath 22 | self.outpath = dstpath 23 | self.gap = gap 24 | self.subsize = subsize 25 | self.slide = self.subsize - self.gap 26 | self.srcpath = srcpath 27 | self.dstpath = dstpath 28 | self.ext = ext 29 | self.padding = padding 30 | self.pool = Pool(num_process) 31 | 32 | if not os.path.isdir(self.outpath): 33 | os.mkdir(self.outpath) 34 | 35 | def saveimagepatches(self, img, subimgname, left, up, ext='.png'): 36 | subimg = copy.deepcopy(img[up: (up + self.subsize), left: (left + self.subsize)]) 37 | outdir = os.path.join(self.dstpath, subimgname + ext) 38 | h, w, c = np.shape(subimg) 39 | if (self.padding): 40 | outimg = np.zeros((self.subsize, self.subsize, 3)) 41 | outimg[0:h, 0:w, :] = subimg 42 | cv2.imwrite(outdir, outimg) 43 | else: 44 | cv2.imwrite(outdir, subimg) 45 | 46 | def SplitSingle(self, name, rate, extent): 47 | img = cv2.imread(os.path.join(self.srcpath, name + extent)) 48 | assert np.shape(img) != () 49 | 50 | if (rate != 1): 51 | resizeimg = cv2.resize(img, None, fx=rate, fy=rate, interpolation=cv2.INTER_CUBIC) 52 | else: 53 | resizeimg = img 54 | outbasename = name + '__' + str(rate) + '__' 55 | 56 | weight = np.shape(resizeimg)[1] 57 | height = np.shape(resizeimg)[0] 58 | 59 | # if (max(weight, height) < self.subsize/2): 60 | # return 61 | 62 | left, up = 0, 0 63 | while (left < weight): 64 | if (left + self.subsize >= weight): 65 | left = max(weight - self.subsize, 0) 66 | up = 0 67 | while (up < height): 68 | if (up + self.subsize >= height): 69 | up = max(height - self.subsize, 0) 70 | subimgname = outbasename + str(left) + '___' + str(up) 71 | self.saveimagepatches(resizeimg, subimgname, left, up) 72 | if (up + self.subsize >= height): 73 | break 74 | else: 75 | up = up + self.slide 76 | if (left + self.subsize >= weight): 77 | break 78 | else: 79 | left = left + self.slide 80 | 81 | def splitdata(self, rate): 82 | 83 | imagelist = util.GetFileFromThisRootDir(self.srcpath) 84 | imagenames = [util.custombasename(x) for x in imagelist if (util.custombasename(x) != 'Thumbs')] 85 | 86 | # worker = partial(self.SplitSingle, rate=rate, extent=self.ext) 87 | worker = partial(split_single_warp, split_base=self, rate=rate, extent=self.ext) 88 | self.pool.map(worker, imagenames) 89 | # 90 | # for name in imagenames: 91 | # self.SplitSingle(name, rate, self.ext) 92 | def __getstate__(self): 93 | self_dict = self.__dict__.copy() 94 | del self_dict['pool'] 95 | return self_dict 96 | 97 | def __setstate__(self, state): 98 | self.__dict__.update(state) 99 | 100 | if __name__ == '__main__': 101 | split = splitbase(r'/home/dingjian/data/dota/val/images', 102 | r'/home/dingjian/data/dota/valsplit', 103 | num_process=32) 104 | split.splitdata(1) -------------------------------------------------------------------------------- /DOTA_devkit/_polyiou.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pandora-CV/cvpr24acm/769b0d4fdf1e595bad967a7f51303f9b4a41f52f/DOTA_devkit/_polyiou.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /DOTA_devkit/dota-v1.5_evaluation_task2.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # dota_evaluation_task1 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Jian Ding, based on code from Bharath Hariharan 5 | # -------------------------------------------------------- 6 | 7 | """ 8 | To use the code, users should to config detpath, annopath and imagesetfile 9 | detpath is the path for 15 result files, for the format, you can refer to "http://captain.whu.edu.cn/DOTAweb/tasks.html" 10 | search for PATH_TO_BE_CONFIGURED to config the paths 11 | Note, the evaluation is on the large scale images 12 | """ 13 | import xml.etree.ElementTree as ET 14 | import os 15 | #import cPickle 16 | import numpy as np 17 | import matplotlib.pyplot as plt 18 | 19 | def parse_gt(filename): 20 | objects = [] 21 | with open(filename, 'r') as f: 22 | lines = f.readlines() 23 | splitlines = [x.strip().split(' ') for x in lines] 24 | for splitline in splitlines: 25 | object_struct = {} 26 | object_struct['name'] = splitline[8] 27 | # if (len(splitline) == 9): 28 | # object_struct['difficult'] = 0 29 | # elif (len(splitline) == 10): 30 | # object_struct['difficult'] = int(splitline[9]) 31 | object_struct['difficult'] = 0 32 | object_struct['bbox'] = [int(float(splitline[0])), 33 | int(float(splitline[1])), 34 | int(float(splitline[4])), 35 | int(float(splitline[5]))] 36 | w = int(float(splitline[4])) - int(float(splitline[0])) 37 | h = int(float(splitline[5])) - int(float(splitline[1])) 38 | object_struct['area'] = w * h 39 | #print('area:', object_struct['area']) 40 | # if object_struct['area'] < (15 * 15): 41 | # #print('area:', object_struct['area']) 42 | # object_struct['difficult'] = 1 43 | objects.append(object_struct) 44 | return objects 45 | def voc_ap(rec, prec, use_07_metric=False): 46 | """ ap = voc_ap(rec, prec, [use_07_metric]) 47 | Compute VOC AP given precision and recall. 48 | If use_07_metric is true, uses the 49 | VOC 07 11 point method (default:False). 50 | """ 51 | if use_07_metric: 52 | # 11 point metric 53 | ap = 0. 54 | for t in np.arange(0., 1.1, 0.1): 55 | if np.sum(rec >= t) == 0: 56 | p = 0 57 | else: 58 | p = np.max(prec[rec >= t]) 59 | ap = ap + p / 11. 60 | else: 61 | # correct AP calculation 62 | # first append sentinel values at the end 63 | mrec = np.concatenate(([0.], rec, [1.])) 64 | mpre = np.concatenate(([0.], prec, [0.])) 65 | 66 | # compute the precision envelope 67 | for i in range(mpre.size - 1, 0, -1): 68 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 69 | 70 | # to calculate area under PR curve, look for points 71 | # where X axis (recall) changes value 72 | i = np.where(mrec[1:] != mrec[:-1])[0] 73 | 74 | # and sum (\Delta recall) * prec 75 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 76 | return ap 77 | 78 | def voc_eval(detpath, 79 | annopath, 80 | imagesetfile, 81 | classname, 82 | # cachedir, 83 | ovthresh=0.5, 84 | use_07_metric=False): 85 | """rec, prec, ap = voc_eval(detpath, 86 | annopath, 87 | imagesetfile, 88 | classname, 89 | [ovthresh], 90 | [use_07_metric]) 91 | Top level function that does the PASCAL VOC evaluation. 92 | detpath: Path to detections 93 | detpath.format(classname) should produce the detection results file. 94 | annopath: Path to annotations 95 | annopath.format(imagename) should be the xml annotations file. 96 | imagesetfile: Text file containing the list of images, one image per line. 97 | classname: Category name (duh) 98 | cachedir: Directory for caching the annotations 99 | [ovthresh]: Overlap threshold (default = 0.5) 100 | [use_07_metric]: Whether to use VOC07's 11 point AP computation 101 | (default False) 102 | """ 103 | # assumes detections are in detpath.format(classname) 104 | # assumes annotations are in annopath.format(imagename) 105 | # assumes imagesetfile is a text file with each line an image name 106 | # cachedir caches the annotations in a pickle file 107 | 108 | # first load gt 109 | #if not os.path.isdir(cachedir): 110 | # os.mkdir(cachedir) 111 | #cachefile = os.path.join(cachedir, 'annots.pkl') 112 | # read list of images 113 | with open(imagesetfile, 'r') as f: 114 | lines = f.readlines() 115 | imagenames = [x.strip() for x in lines] 116 | #print('imagenames: ', imagenames) 117 | #if not os.path.isfile(cachefile): 118 | # load annots 119 | recs = {} 120 | for i, imagename in enumerate(imagenames): 121 | #print('parse_files name: ', annopath.format(imagename)) 122 | recs[imagename] = parse_gt(annopath.format(imagename)) 123 | #if i % 100 == 0: 124 | # print ('Reading annotation for {:d}/{:d}'.format( 125 | # i + 1, len(imagenames)) ) 126 | # save 127 | #print ('Saving cached annotations to {:s}'.format(cachefile)) 128 | #with open(cachefile, 'w') as f: 129 | # cPickle.dump(recs, f) 130 | #else: 131 | # load 132 | #with open(cachefile, 'r') as f: 133 | # recs = cPickle.load(f) 134 | 135 | # extract gt objects for this class 136 | class_recs = {} 137 | npos = 0 138 | for imagename in imagenames: 139 | R = [obj for obj in recs[imagename] if obj['name'] == classname] 140 | bbox = np.array([x['bbox'] for x in R]) 141 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 142 | det = [False] * len(R) 143 | npos = npos + sum(~difficult) 144 | class_recs[imagename] = {'bbox': bbox, 145 | 'difficult': difficult, 146 | 'det': det} 147 | 148 | # read dets 149 | detfile = detpath.format(classname) 150 | with open(detfile, 'r') as f: 151 | lines = f.readlines() 152 | 153 | splitlines = [x.strip().split(' ') for x in lines] 154 | image_ids = [x[0] for x in splitlines] 155 | confidence = np.array([float(x[1]) for x in splitlines]) 156 | 157 | #print('check confidence: ', confidence) 158 | 159 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 160 | 161 | # sort by confidence 162 | sorted_ind = np.argsort(-confidence) 163 | sorted_scores = np.sort(-confidence) 164 | 165 | #print('check sorted_scores: ', sorted_scores) 166 | #print('check sorted_ind: ', sorted_ind) 167 | BB = BB[sorted_ind, :] 168 | image_ids = [image_ids[x] for x in sorted_ind] 169 | #print('check imge_ids: ', image_ids) 170 | #print('imge_ids len:', len(image_ids)) 171 | # go down dets and mark TPs and FPs 172 | nd = len(image_ids) 173 | tp = np.zeros(nd) 174 | fp = np.zeros(nd) 175 | for d in range(nd): 176 | R = class_recs[image_ids[d]] 177 | bb = BB[d, :].astype(float) 178 | ovmax = -np.inf 179 | BBGT = R['bbox'].astype(float) 180 | 181 | if BBGT.size > 0: 182 | # compute overlaps 183 | # intersection 184 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 185 | iymin = np.maximum(BBGT[:, 1], bb[1]) 186 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 187 | iymax = np.minimum(BBGT[:, 3], bb[3]) 188 | iw = np.maximum(ixmax - ixmin + 1., 0.) 189 | ih = np.maximum(iymax - iymin + 1., 0.) 190 | inters = iw * ih 191 | 192 | # union 193 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 194 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 195 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 196 | 197 | overlaps = inters / uni 198 | ovmax = np.max(overlaps) 199 | ## if there exist 2 200 | jmax = np.argmax(overlaps) 201 | 202 | if ovmax > ovthresh: 203 | if not R['difficult'][jmax]: 204 | if not R['det'][jmax]: 205 | tp[d] = 1. 206 | R['det'][jmax] = 1 207 | else: 208 | fp[d] = 1. 209 | # print('filename:', image_ids[d]) 210 | else: 211 | fp[d] = 1. 212 | 213 | # compute precision recall 214 | 215 | print('check fp:', fp) 216 | print('check tp', tp) 217 | 218 | 219 | print('npos num:', npos) 220 | fp = np.cumsum(fp) 221 | tp = np.cumsum(tp) 222 | 223 | rec = tp / float(npos) 224 | # avoid divide by zero in case the first detection matches a difficult 225 | # ground truth 226 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 227 | ap = voc_ap(rec, prec, use_07_metric) 228 | 229 | return rec, prec, ap 230 | 231 | def main(): 232 | detpath = r'/home/dingjian/Documents/ODAI_DOTA/DOAI_2019/doai2019_submit/gwf_Task2_merge_2/Task2_{:s}.txt' 233 | annopath = r'/home/dingjian/code/DOAI_server2/media/DOTA15_Task2_gt/{:s}.txt' 234 | imagesetfile = r'/home/dingjian/code/DOAI_server2/media/testset.txt' 235 | 236 | # detpath = r'PATH_TO_BE_CONFIGURED/Task2_{:s}.txt' 237 | # annopath = r'PATH_TO_BE_CONFIGURED/{:s}.txt'# change the directory to the path of val/labelTxt, if you want to do evaluation on the valset 238 | # imagesetfile = r'PATH_TO_BE_CONFIGURED/valset.txt' 239 | # For DOTA v1.5 240 | classnames = ['plane', 'baseball-diamond', 'bridge', 'ground-track-field', 'small-vehicle', 'large-vehicle', 'ship', 'tennis-court', 241 | 'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter', 'container-crane'] 242 | # For DOTA v1.0 243 | # classnames = ['plane', 'baseball-diamond', 'bridge', 'ground-track-field', 'small-vehicle', 'large-vehicle', 'ship', 'tennis-court', 244 | # 'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter'] 245 | classaps = [] 246 | map = 0 247 | for classname in classnames: 248 | print('classname:', classname) 249 | rec, prec, ap = voc_eval(detpath, 250 | annopath, 251 | imagesetfile, 252 | classname, 253 | ovthresh=0.5, 254 | use_07_metric=True) 255 | map = map + ap 256 | #print('rec: ', rec, 'prec: ', prec, 'ap: ', ap) 257 | print('ap: ', ap) 258 | classaps.append(ap) 259 | 260 | ## uncomment to plot p-r curve for each category 261 | # plt.figure(figsize=(8,4)) 262 | # plt.xlabel('recall') 263 | # plt.ylabel('precision') 264 | # plt.plot(rec, prec) 265 | # plt.show() 266 | map = map/len(classnames) 267 | print('map:', map) 268 | classaps = 100*np.array(classaps) 269 | print('classaps: ', classaps) 270 | if __name__ == '__main__': 271 | main() -------------------------------------------------------------------------------- /DOTA_devkit/dota_evaluation_task2.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # dota_evaluation_task1 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Jian Ding, based on code from Bharath Hariharan 5 | # -------------------------------------------------------- 6 | 7 | """ 8 | To use the code, users should to config detpath, annopath and imagesetfile 9 | detpath is the path for 15 result files, for the format, you can refer to "http://captain.whu.edu.cn/DOTAweb/tasks.html" 10 | search for PATH_TO_BE_CONFIGURED to config the paths 11 | Note, the evaluation is on the large scale images 12 | """ 13 | import xml.etree.ElementTree as ET 14 | import os 15 | #import cPickle 16 | import numpy as np 17 | import matplotlib.pyplot as plt 18 | 19 | def parse_gt(filename): 20 | objects = [] 21 | with open(filename, 'r') as f: 22 | lines = f.readlines() 23 | splitlines = [x.strip().split(' ') for x in lines] 24 | for splitline in splitlines: 25 | object_struct = {} 26 | object_struct['name'] = splitline[8] 27 | if (len(splitline) == 9): 28 | object_struct['difficult'] = 0 29 | elif (len(splitline) == 10): 30 | object_struct['difficult'] = int(splitline[9]) 31 | # object_struct['difficult'] = 0 32 | object_struct['bbox'] = [int(float(splitline[0])), 33 | int(float(splitline[1])), 34 | int(float(splitline[4])), 35 | int(float(splitline[5]))] 36 | w = int(float(splitline[4])) - int(float(splitline[0])) 37 | h = int(float(splitline[5])) - int(float(splitline[1])) 38 | object_struct['area'] = w * h 39 | #print('area:', object_struct['area']) 40 | # if object_struct['area'] < (15 * 15): 41 | # #print('area:', object_struct['area']) 42 | # object_struct['difficult'] = 1 43 | objects.append(object_struct) 44 | return objects 45 | def voc_ap(rec, prec, use_07_metric=False): 46 | """ ap = voc_ap(rec, prec, [use_07_metric]) 47 | Compute VOC AP given precision and recall. 48 | If use_07_metric is true, uses the 49 | VOC 07 11 point method (default:False). 50 | """ 51 | if use_07_metric: 52 | # 11 point metric 53 | ap = 0. 54 | for t in np.arange(0., 1.1, 0.1): 55 | if np.sum(rec >= t) == 0: 56 | p = 0 57 | else: 58 | p = np.max(prec[rec >= t]) 59 | ap = ap + p / 11. 60 | else: 61 | # correct AP calculation 62 | # first append sentinel values at the end 63 | mrec = np.concatenate(([0.], rec, [1.])) 64 | mpre = np.concatenate(([0.], prec, [0.])) 65 | 66 | # compute the precision envelope 67 | for i in range(mpre.size - 1, 0, -1): 68 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 69 | 70 | # to calculate area under PR curve, look for points 71 | # where X axis (recall) changes value 72 | i = np.where(mrec[1:] != mrec[:-1])[0] 73 | 74 | # and sum (\Delta recall) * prec 75 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 76 | return ap 77 | 78 | def voc_eval(detpath, 79 | annopath, 80 | imagesetfile, 81 | classname, 82 | # cachedir, 83 | ovthresh=0.5, 84 | use_07_metric=False): 85 | """rec, prec, ap = voc_eval(detpath, 86 | annopath, 87 | imagesetfile, 88 | classname, 89 | [ovthresh], 90 | [use_07_metric]) 91 | Top level function that does the PASCAL VOC evaluation. 92 | detpath: Path to detections 93 | detpath.format(classname) should produce the detection results file. 94 | annopath: Path to annotations 95 | annopath.format(imagename) should be the xml annotations file. 96 | imagesetfile: Text file containing the list of images, one image per line. 97 | classname: Category name (duh) 98 | cachedir: Directory for caching the annotations 99 | [ovthresh]: Overlap threshold (default = 0.5) 100 | [use_07_metric]: Whether to use VOC07's 11 point AP computation 101 | (default False) 102 | """ 103 | # assumes detections are in detpath.format(classname) 104 | # assumes annotations are in annopath.format(imagename) 105 | # assumes imagesetfile is a text file with each line an image name 106 | # cachedir caches the annotations in a pickle file 107 | 108 | # first load gt 109 | #if not os.path.isdir(cachedir): 110 | # os.mkdir(cachedir) 111 | #cachefile = os.path.join(cachedir, 'annots.pkl') 112 | # read list of images 113 | with open(imagesetfile, 'r') as f: 114 | lines = f.readlines() 115 | imagenames = [x.strip() for x in lines] 116 | #print('imagenames: ', imagenames) 117 | #if not os.path.isfile(cachefile): 118 | # load annots 119 | recs = {} 120 | for i, imagename in enumerate(imagenames): 121 | #print('parse_files name: ', annopath.format(imagename)) 122 | recs[imagename] = parse_gt(annopath.format(imagename)) 123 | #if i % 100 == 0: 124 | # print ('Reading annotation for {:d}/{:d}'.format( 125 | # i + 1, len(imagenames)) ) 126 | # save 127 | #print ('Saving cached annotations to {:s}'.format(cachefile)) 128 | #with open(cachefile, 'w') as f: 129 | # cPickle.dump(recs, f) 130 | #else: 131 | # load 132 | #with open(cachefile, 'r') as f: 133 | # recs = cPickle.load(f) 134 | 135 | # extract gt objects for this class 136 | class_recs = {} 137 | npos = 0 138 | for imagename in imagenames: 139 | R = [obj for obj in recs[imagename] if obj['name'] == classname] 140 | bbox = np.array([x['bbox'] for x in R]) 141 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 142 | det = [False] * len(R) 143 | npos = npos + sum(~difficult) 144 | class_recs[imagename] = {'bbox': bbox, 145 | 'difficult': difficult, 146 | 'det': det} 147 | 148 | # read dets 149 | detfile = detpath.format(classname) 150 | with open(detfile, 'r') as f: 151 | lines = f.readlines() 152 | 153 | splitlines = [x.strip().split(' ') for x in lines] 154 | image_ids = [x[0] for x in splitlines] 155 | confidence = np.array([float(x[1]) for x in splitlines]) 156 | 157 | #print('check confidence: ', confidence) 158 | 159 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 160 | 161 | # sort by confidence 162 | sorted_ind = np.argsort(-confidence) 163 | sorted_scores = np.sort(-confidence) 164 | 165 | #print('check sorted_scores: ', sorted_scores) 166 | #print('check sorted_ind: ', sorted_ind) 167 | BB = BB[sorted_ind, :] 168 | image_ids = [image_ids[x] for x in sorted_ind] 169 | #print('check imge_ids: ', image_ids) 170 | #print('imge_ids len:', len(image_ids)) 171 | # go down dets and mark TPs and FPs 172 | nd = len(image_ids) 173 | tp = np.zeros(nd) 174 | fp = np.zeros(nd) 175 | for d in range(nd): 176 | R = class_recs[image_ids[d]] 177 | bb = BB[d, :].astype(float) 178 | ovmax = -np.inf 179 | BBGT = R['bbox'].astype(float) 180 | 181 | if BBGT.size > 0: 182 | # compute overlaps 183 | # intersection 184 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 185 | iymin = np.maximum(BBGT[:, 1], bb[1]) 186 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 187 | iymax = np.minimum(BBGT[:, 3], bb[3]) 188 | iw = np.maximum(ixmax - ixmin + 1., 0.) 189 | ih = np.maximum(iymax - iymin + 1., 0.) 190 | inters = iw * ih 191 | 192 | # union 193 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 194 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 195 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 196 | 197 | overlaps = inters / uni 198 | ovmax = np.max(overlaps) 199 | ## if there exist 2 200 | jmax = np.argmax(overlaps) 201 | 202 | if ovmax > ovthresh: 203 | if not R['difficult'][jmax]: 204 | if not R['det'][jmax]: 205 | tp[d] = 1. 206 | R['det'][jmax] = 1 207 | else: 208 | fp[d] = 1. 209 | # print('filename:', image_ids[d]) 210 | else: 211 | fp[d] = 1. 212 | 213 | # compute precision recall 214 | 215 | print('check fp:', fp) 216 | print('check tp', tp) 217 | 218 | 219 | print('npos num:', npos) 220 | fp = np.cumsum(fp) 221 | tp = np.cumsum(tp) 222 | 223 | rec = tp / float(npos) 224 | # avoid divide by zero in case the first detection matches a difficult 225 | # ground truth 226 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 227 | ap = voc_ap(rec, prec, use_07_metric) 228 | 229 | return rec, prec, ap 230 | 231 | def main(): 232 | # detpath = r'E:\documentation\OneDrive\documentation\DotaEvaluation\evluation_task2\evluation_task2\faster-rcnn-nms_0.3_task2\nms_0.3_task\Task2_{:s}.txt' 233 | # annopath = r'I:\dota\testset\ReclabelTxt-utf-8\{:s}.txt' 234 | # imagesetfile = r'I:\dota\testset\va.txt' 235 | 236 | detpath = r'PATH_TO_BE_CONFIGURED/Task2_{:s}.txt' 237 | annopath = r'PATH_TO_BE_CONFIGURED/{:s}.txt'# change the directory to the path of val/labelTxt, if you want to do evaluation on the valset 238 | imagesetfile = r'PATH_TO_BE_CONFIGURED/valset.txt' 239 | 240 | classnames = ['plane', 'baseball-diamond', 'bridge', 'ground-track-field', 'small-vehicle', 'large-vehicle', 'ship', 'tennis-court', 241 | 'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter'] 242 | classaps = [] 243 | map = 0 244 | for classname in classnames: 245 | print('classname:', classname) 246 | rec, prec, ap = voc_eval(detpath, 247 | annopath, 248 | imagesetfile, 249 | classname, 250 | ovthresh=0.5, 251 | use_07_metric=True) 252 | map = map + ap 253 | #print('rec: ', rec, 'prec: ', prec, 'ap: ', ap) 254 | print('ap: ', ap) 255 | classaps.append(ap) 256 | 257 | ## uncomment to plot p-r curve for each category 258 | # plt.figure(figsize=(8,4)) 259 | # plt.xlabel('recall') 260 | # plt.ylabel('precision') 261 | # plt.plot(rec, prec) 262 | # plt.show() 263 | map = map/len(classnames) 264 | print('map:', map) 265 | classaps = 100*np.array(classaps) 266 | print('classaps: ', classaps) 267 | if __name__ == '__main__': 268 | main() -------------------------------------------------------------------------------- /DOTA_devkit/dota_poly2rbox.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import argparse 4 | import os.path as osp 5 | 6 | import numpy as np 7 | 8 | def cal_line_length(point1, point2): 9 | return math.sqrt(math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1], 2)) 10 | 11 | def get_best_begin_point_single(coordinate): 12 | x1, y1, x2, y2, x3, y3, x4, y4 = coordinate 13 | xmin = min(x1, x2, x3, x4) 14 | ymin = min(y1, y2, y3, y4) 15 | xmax = max(x1, x2, x3, x4) 16 | ymax = max(y1, y2, y3, y4) 17 | combinate = [[[x1, y1], [x2, y2], [x3, y3], [x4, y4]], [[x2, y2], [x3, y3], [x4, y4], [x1, y1]], 18 | [[x3, y3], [x4, y4], [x1, y1], [x2, y2]], [[x4, y4], [x1, y1], [x2, y2], [x3, y3]]] 19 | dst_coordinate = [[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]] 20 | force = 100000000.0 21 | force_flag = 0 22 | for i in range(4): 23 | temp_force = cal_line_length(combinate[i][0], dst_coordinate[0]) \ 24 | + cal_line_length(combinate[i][1], dst_coordinate[1]) \ 25 | + cal_line_length(combinate[i][2], dst_coordinate[2]) \ 26 | + cal_line_length(combinate[i][3], dst_coordinate[3]) 27 | if temp_force < force: 28 | force = temp_force 29 | force_flag = i 30 | if force_flag != 0: 31 | pass 32 | # print("choose one direction!") 33 | return np.array(combinate[force_flag]).reshape(8) 34 | 35 | def poly2rbox_single(poly): 36 | """ 37 | poly:[x0,y0,x1,y1,x2,y2,x3,y3] 38 | to 39 | rrect:[x_ctr,y_ctr,w,h,angle] 40 | """ 41 | poly = np.array(poly[:8], dtype=np.float32) 42 | 43 | pt1 = (poly[0], poly[1]) 44 | pt2 = (poly[2], poly[3]) 45 | pt3 = (poly[4], poly[5]) 46 | pt4 = (poly[6], poly[7]) 47 | 48 | edge1 = np.sqrt((pt1[0] - pt2[0]) * (pt1[0] - pt2[0]) + 49 | (pt1[1] - pt2[1]) * (pt1[1] - pt2[1])) 50 | edge2 = np.sqrt((pt2[0] - pt3[0]) * (pt2[0] - pt3[0]) + 51 | (pt2[1] - pt3[1]) * (pt2[1] - pt3[1])) 52 | 53 | angle = 0 54 | width = 0 55 | height = 0 56 | 57 | if edge1 > edge2: 58 | width = edge1 59 | height = edge2 60 | angle = np.arctan2( 61 | np.float(pt2[1] - pt1[1]), np.float(pt2[0] - pt1[0])) 62 | elif edge2 >= edge1: 63 | width = edge2 64 | height = edge1 65 | angle = np.arctan2( 66 | np.float(pt4[1] - pt1[1]), np.float(pt4[0] - pt1[0])) 67 | 68 | if angle > np.pi*3/4: 69 | angle -= np.pi 70 | if angle < -np.pi/4: 71 | angle += np.pi 72 | 73 | x_ctr = np.float(pt1[0] + pt3[0]) / 2 74 | y_ctr = np.float(pt1[1] + pt3[1]) / 2 75 | rbox = np.array([x_ctr, y_ctr, width, height, angle]) 76 | 77 | return rbox 78 | 79 | def norm_angle(angle, range=[-np.pi / 4, np.pi]): 80 | return (angle - range[0]) % range[1] + range[0] 81 | 82 | 83 | def poly2rbox_single_v2(poly): 84 | """ 85 | poly:[x0,y0,x1,y1,x2,y2,x3,y3] 86 | to 87 | rrect:[x_ctr,y_ctr,w,h,angle] 88 | """ 89 | poly = np.array(poly[:8], dtype=np.float32) 90 | 91 | pt1 = (poly[0], poly[1]) 92 | pt2 = (poly[2], poly[3]) 93 | pt3 = (poly[4], poly[5]) 94 | pt4 = (poly[6], poly[7]) 95 | 96 | edge1 = np.sqrt((pt1[0] - pt2[0]) * (pt1[0] - pt2[0]) + 97 | (pt1[1] - pt2[1]) * (pt1[1] - pt2[1])) 98 | edge2 = np.sqrt((pt2[0] - pt3[0]) * (pt2[0] - pt3[0]) + 99 | (pt2[1] - pt3[1]) * (pt2[1] - pt3[1])) 100 | 101 | angle = 0 102 | width = 0 103 | height = 0 104 | 105 | if edge1 > edge2: 106 | width = edge1 107 | height = edge2 108 | angle = np.arctan2( 109 | np.float(pt2[1] - pt1[1]), np.float(pt2[0] - pt1[0])) 110 | elif edge2 >= edge1: 111 | width = edge2 112 | height = edge1 113 | angle = np.arctan2( 114 | np.float(pt4[1] - pt1[1]), np.float(pt4[0] - pt1[0])) 115 | 116 | # if angle > np.pi*3/4: 117 | # angle -= np.pi 118 | # if angle < -np.pi/4: 119 | # angle += np.pi 120 | angle = norm_angle(angle) 121 | 122 | x_ctr = np.float(pt1[0] + pt3[0]) / 2 123 | y_ctr = np.float(pt1[1] + pt3[1]) / 2 124 | 125 | return float(x_ctr), float(y_ctr), float(width), float(height), float(angle) 126 | 127 | 128 | def poly2rbox_single_v3(poly): 129 | """ 130 | poly:[x0,y0,x1,y1,x2,y2,x3,y3] 131 | to 132 | rrect:[x_ctr,y_ctr,w,h,angle] 133 | """ 134 | poly = np.array(poly[:8], dtype=np.float32) 135 | 136 | pt1 = (poly[0], poly[1]) 137 | pt2 = (poly[2], poly[3]) 138 | pt3 = (poly[4], poly[5]) 139 | pt4 = (poly[6], poly[7]) 140 | 141 | edge1 = np.sqrt((pt1[0] - pt2[0]) * (pt1[0] - pt2[0]) + 142 | (pt1[1] - pt2[1]) * (pt1[1] - pt2[1])) 143 | edge2 = np.sqrt((pt2[0] - pt3[0]) * (pt2[0] - pt3[0]) + 144 | (pt2[1] - pt3[1]) * (pt2[1] - pt3[1])) 145 | 146 | max_edge = max(edge1, edge2) 147 | min_edge = min(edge1, edge2) 148 | ratio = max_edge / min_edge 149 | # print(ratio) 150 | if ratio < 1.15: 151 | 152 | width = max_edge 153 | height = min_edge 154 | angle1 = np.arctan2(np.float(pt2[1] - pt1[1]), np.float(pt2[0] - pt1[0])) 155 | # elif edge2 >= edge1: 156 | angle2 = np.arctan2(np.float(pt4[1] - pt1[1]), np.float(pt4[0] - pt1[0])) 157 | 158 | angle1_norm = norm_angle(angle1) 159 | angle2_norm = norm_angle(angle2) 160 | # if abs(angle1_norm) > abs(angle2_norm): 161 | # final_angle = angle2_norm 162 | # else: 163 | # final_angle = angle1_norm 164 | if abs(angle1_norm) > abs(angle2_norm): 165 | final_angle = angle2_norm 166 | else: 167 | final_angle = angle1_norm 168 | 169 | else: 170 | final_angle = 0 171 | width = 0 172 | height = 0 173 | 174 | if edge1 > edge2: 175 | width = edge1 176 | height = edge2 177 | final_angle = np.arctan2( 178 | np.float(pt2[1] - pt1[1]), np.float(pt2[0] - pt1[0])) 179 | elif edge2 >= edge1: 180 | width = edge2 181 | height = edge1 182 | final_angle = np.arctan2( 183 | np.float(pt4[1] - pt1[1]), np.float(pt4[0] - pt1[0])) 184 | 185 | final_angle = norm_angle(final_angle) 186 | 187 | x_ctr = np.float(pt1[0] + pt3[0]) / 2 188 | y_ctr = np.float(pt1[1] + pt3[1]) / 2 189 | 190 | return float(x_ctr), float(y_ctr), float(width), float(height), float(final_angle) 191 | 192 | 193 | def rbox2poly_single(rrect): 194 | """ 195 | rrect:[x_ctr,y_ctr,w,h,angle] 196 | to 197 | poly:[x0,y0,x1,y1,x2,y2,x3,y3] 198 | """ 199 | x_ctr, y_ctr, width, height, angle = rrect[:5] 200 | tl_x, tl_y, br_x, br_y = -width/2, -height/2, width/2, height/2 201 | rect = np.array([[tl_x, br_x, br_x, tl_x], [tl_y, tl_y, br_y, br_y]]) 202 | R = np.array([[np.cos(angle), -np.sin(angle)], 203 | [np.sin(angle), np.cos(angle)]]) 204 | poly = R.dot(rect) 205 | x0, x1, x2, x3 = poly[0, :4] + x_ctr 206 | y0, y1, y2, y3 = poly[1, :4] + y_ctr 207 | poly = np.array([x0, y0, x1, y1, x2, y2, x3, y3], dtype=np.float32) 208 | poly = get_best_begin_point_single(poly) 209 | return poly 210 | 211 | def convert2rbox(src_path): 212 | image_path = osp.join(src_path, 'images/') 213 | src_label_path = osp.join(src_path, 'labelTxt/') 214 | dst_label_path = osp.join(src_path, 'labelTxtRbox/') 215 | if not osp.exists(dst_label_path): 216 | os.mkdir(dst_label_path) 217 | 218 | image_list = os.listdir(image_path) 219 | image_list.sort() 220 | 221 | for image in image_list: 222 | img_name = osp.basename(image) 223 | print(img_name) 224 | ann_name = img_name.split('.')[0]+'.txt' 225 | lab_path = osp.join(src_label_path, ann_name) 226 | dst_path = osp.join(dst_label_path, ann_name) 227 | out_str = '' 228 | 229 | # import time 230 | # half the time used by poly2rbox 231 | with open(lab_path, 'r') as f: 232 | for ann_line in f.readlines(): 233 | ann_line = ann_line.strip().split(' ') 234 | bbox = [np.float32(ann_line[i]) for i in range(8)] 235 | # 8 point to 5 point xywha 236 | x_ctr, y_ctr, width, height, angle = poly2rbox_single(bbox) 237 | class_name = ann_line[8] 238 | difficult = int(ann_line[9]) 239 | 240 | out_str += "{} {} {} {} {} {} {}\n".format(str(x_ctr), str( 241 | y_ctr), str(width), str(height), str(angle), class_name, difficult) 242 | with open(dst_path, 'w') as fdst: 243 | fdst.write(out_str) 244 | 245 | 246 | if __name__ == '__main__': 247 | parser = argparse.ArgumentParser() 248 | parser.add_argument('-p', '--path', type=str, required=True) 249 | args = parser.parse_args() 250 | convert2rbox(args.path) 251 | -------------------------------------------------------------------------------- /DOTA_devkit/dota_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import codecs 3 | import numpy as np 4 | import shapely.geometry as shgeo 5 | import os 6 | import re 7 | import math 8 | # import polyiou 9 | """ 10 | some basic functions which are useful for process DOTA data 11 | """ 12 | 13 | wordname_15 = ['plane', 'baseball-diamond', 'bridge', 'ground-track-field', 'small-vehicle', 'large-vehicle', 'ship', 'tennis-court', 14 | 'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter'] 15 | 16 | def custombasename(fullname): 17 | return os.path.basename(os.path.splitext(fullname)[0]) 18 | 19 | def GetFileFromThisRootDir(dir,ext = None): 20 | allfiles = [] 21 | needExtFilter = (ext != None) 22 | for root,dirs,files in os.walk(dir): 23 | for filespath in files: 24 | filepath = os.path.join(root, filespath) 25 | extension = os.path.splitext(filepath)[1][1:] 26 | if needExtFilter and extension in ext: 27 | allfiles.append(filepath) 28 | elif not needExtFilter: 29 | allfiles.append(filepath) 30 | return allfiles 31 | 32 | def TuplePoly2Poly(poly): 33 | outpoly = [poly[0][0], poly[0][1], 34 | poly[1][0], poly[1][1], 35 | poly[2][0], poly[2][1], 36 | poly[3][0], poly[3][1] 37 | ] 38 | return outpoly 39 | 40 | def parse_dota_poly(filename): 41 | """ 42 | parse the dota ground truth in the format: 43 | [(x1, y1), (x2, y2), (x3, y3), (x4, y4)] 44 | """ 45 | objects = [] 46 | #print('filename:', filename) 47 | f = [] 48 | if (sys.version_info >= (3, 5)): 49 | fd = open(filename, 'r') 50 | f = fd 51 | elif (sys.version_info >= 2.7): 52 | fd = codecs.open(filename, 'r') 53 | f = fd 54 | # count = 0 55 | while True: 56 | line = f.readline() 57 | # count = count + 1 58 | # if count < 2: 59 | # continue 60 | if line: 61 | splitlines = line.strip().split(' ') 62 | object_struct = {} 63 | ### clear the wrong name after check all the data 64 | #if (len(splitlines) >= 9) and (splitlines[8] in classname): 65 | if (len(splitlines) < 9): 66 | continue 67 | if (len(splitlines) >= 9): 68 | object_struct['name'] = splitlines[8] 69 | if (len(splitlines) == 9): 70 | object_struct['difficult'] = '0' 71 | elif (len(splitlines) >= 10): 72 | # if splitlines[9] == '1': 73 | # if (splitlines[9] == 'tr'): 74 | # object_struct['difficult'] = '1' 75 | # else: 76 | object_struct['difficult'] = splitlines[9] 77 | # else: 78 | # object_struct['difficult'] = 0 79 | object_struct['poly'] = [(float(splitlines[0]), float(splitlines[1])), 80 | (float(splitlines[2]), float(splitlines[3])), 81 | (float(splitlines[4]), float(splitlines[5])), 82 | (float(splitlines[6]), float(splitlines[7])) 83 | ] 84 | gtpoly = shgeo.Polygon(object_struct['poly']) 85 | object_struct['area'] = gtpoly.area 86 | # poly = list(map(lambda x:np.array(x), object_struct['poly'])) 87 | # object_struct['long-axis'] = max(distance(poly[0], poly[1]), distance(poly[1], poly[2])) 88 | # object_struct['short-axis'] = min(distance(poly[0], poly[1]), distance(poly[1], poly[2])) 89 | # if (object_struct['long-axis'] < 15): 90 | # object_struct['difficult'] = '1' 91 | # global small_count 92 | # small_count = small_count + 1 93 | objects.append(object_struct) 94 | else: 95 | break 96 | return objects 97 | 98 | def parse_dota_poly2(filename): 99 | """ 100 | parse the dota ground truth in the format: 101 | [x1, y1, x2, y2, x3, y3, x4, y4] 102 | """ 103 | objects = parse_dota_poly(filename) 104 | for obj in objects: 105 | obj['poly'] = TuplePoly2Poly(obj['poly']) 106 | obj['poly'] = list(map(int, obj['poly'])) 107 | return objects 108 | 109 | def parse_dota_rec(filename): 110 | """ 111 | parse the dota ground truth in the bounding box format: 112 | "xmin, ymin, xmax, ymax" 113 | """ 114 | objects = parse_dota_poly(filename) 115 | for obj in objects: 116 | poly = obj['poly'] 117 | bbox = dots4ToRec4(poly) 118 | obj['bndbox'] = bbox 119 | return objects 120 | ## bounding box transfer for varies format 121 | 122 | def dots4ToRec4(poly): 123 | xmin, xmax, ymin, ymax = min(poly[0][0], min(poly[1][0], min(poly[2][0], poly[3][0]))), \ 124 | max(poly[0][0], max(poly[1][0], max(poly[2][0], poly[3][0]))), \ 125 | min(poly[0][1], min(poly[1][1], min(poly[2][1], poly[3][1]))), \ 126 | max(poly[0][1], max(poly[1][1], max(poly[2][1], poly[3][1]))) 127 | return xmin, ymin, xmax, ymax 128 | def dots4ToRec8(poly): 129 | xmin, ymin, xmax, ymax = dots4ToRec4(poly) 130 | return xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax 131 | #return dots2ToRec8(dots4ToRec4(poly)) 132 | def dots2ToRec8(rec): 133 | xmin, ymin, xmax, ymax = rec[0], rec[1], rec[2], rec[3] 134 | return xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax 135 | 136 | def groundtruth2Task1(srcpath, dstpath): 137 | filelist = GetFileFromThisRootDir(srcpath) 138 | # names = [custombasename(x.strip())for x in filelist] 139 | filedict = {} 140 | for cls in wordname_15: 141 | fd = open(os.path.join(dstpath, 'Task1_') + cls + r'.txt', 'w') 142 | filedict[cls] = fd 143 | for filepath in filelist: 144 | objects = parse_dota_poly2(filepath) 145 | 146 | subname = custombasename(filepath) 147 | pattern2 = re.compile(r'__([\d+\.]+)__\d+___') 148 | rate = re.findall(pattern2, subname)[0] 149 | 150 | for obj in objects: 151 | category = obj['name'] 152 | difficult = obj['difficult'] 153 | poly = obj['poly'] 154 | if difficult == '2': 155 | continue 156 | if rate == '0.5': 157 | outline = custombasename(filepath) + ' ' + '1' + ' ' + ' '.join(map(str, poly)) 158 | elif rate == '1': 159 | outline = custombasename(filepath) + ' ' + '0.8' + ' ' + ' '.join(map(str, poly)) 160 | elif rate == '2': 161 | outline = custombasename(filepath) + ' ' + '0.6' + ' ' + ' '.join(map(str, poly)) 162 | 163 | filedict[category].write(outline + '\n') 164 | 165 | def Task2groundtruth_poly(srcpath, dstpath): 166 | thresh = 0.1 167 | filedict = {} 168 | Tasklist = GetFileFromThisRootDir(srcpath, '.txt') 169 | 170 | for Taskfile in Tasklist: 171 | idname = custombasename(Taskfile).split('_')[-1] 172 | # idname = datamap_inverse[idname] 173 | f = open(Taskfile, 'r') 174 | lines = f.readlines() 175 | for line in lines: 176 | if len(line) == 0: 177 | continue 178 | # print('line:', line) 179 | splitline = line.strip().split(' ') 180 | filename = splitline[0] 181 | confidence = splitline[1] 182 | bbox = splitline[2:] 183 | if float(confidence) > thresh: 184 | if filename not in filedict: 185 | # filedict[filename] = codecs.open(os.path.join(dstpath, filename + '.txt'), 'w', 'utf_16') 186 | filedict[filename] = codecs.open(os.path.join(dstpath, filename + '.txt'), 'w') 187 | # poly = util.dots2ToRec8(bbox) 188 | poly = bbox 189 | # filedict[filename].write(' '.join(poly) + ' ' + idname + '_' + str(round(float(confidence), 2)) + '\n') 190 | # print('idname:', idname) 191 | 192 | # filedict[filename].write(' '.join(poly) + ' ' + idname + '_' + str(round(float(confidence), 2)) + '\n') 193 | 194 | filedict[filename].write(' '.join(poly) + ' ' + idname + '\n') 195 | 196 | 197 | def polygonToRotRectangle(bbox): 198 | """ 199 | :param bbox: The polygon stored in format [x1, y1, x2, y2, x3, y3, x4, y4] 200 | :return: Rotated Rectangle in format [cx, cy, w, h, theta] 201 | """ 202 | bbox = np.array(bbox,dtype=np.float32) 203 | bbox = np.reshape(bbox,newshape=(2,4),order='F') 204 | angle = math.atan2(-(bbox[0,1]-bbox[0,0]),bbox[1,1]-bbox[1,0]) 205 | 206 | center = [[0],[0]] 207 | 208 | for i in range(4): 209 | center[0] += bbox[0,i] 210 | center[1] += bbox[1,i] 211 | 212 | center = np.array(center,dtype=np.float32)/4.0 213 | 214 | R = np.array([[math.cos(angle), -math.sin(angle)], [math.sin(angle), math.cos(angle)]], dtype=np.float32) 215 | 216 | normalized = np.matmul(R.transpose(),bbox-center) 217 | 218 | xmin = np.min(normalized[0,:]) 219 | xmax = np.max(normalized[0,:]) 220 | ymin = np.min(normalized[1,:]) 221 | ymax = np.max(normalized[1,:]) 222 | 223 | w = xmax - xmin + 1 224 | h = ymax - ymin + 1 225 | 226 | return [float(center[0]),float(center[1]),w,h,angle] 227 | 228 | def cal_line_length(point1, point2): 229 | return math.sqrt( math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1], 2)) 230 | 231 | def get_best_begin_point(coordinate): 232 | x1 = coordinate[0][0] 233 | y1 = coordinate[0][1] 234 | x2 = coordinate[1][0] 235 | y2 = coordinate[1][1] 236 | x3 = coordinate[2][0] 237 | y3 = coordinate[2][1] 238 | x4 = coordinate[3][0] 239 | y4 = coordinate[3][1] 240 | xmin = min(x1, x2, x3, x4) 241 | ymin = min(y1, y2, y3, y4) 242 | xmax = max(x1, x2, x3, x4) 243 | ymax = max(y1, y2, y3, y4) 244 | combinate = [[[x1, y1], [x2, y2], [x3, y3], [x4, y4]], [[x2, y2], [x3, y3], [x4, y4], [x1, y1]], 245 | [[x3, y3], [x4, y4], [x1, y1], [x2, y2]], [[x4, y4], [x1, y1], [x2, y2], [x3, y3]]] 246 | dst_coordinate = [[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]] 247 | force = 100000000.0 248 | force_flag = 0 249 | for i in range(4): 250 | temp_force = cal_line_length(combinate[i][0], dst_coordinate[0]) + cal_line_length(combinate[i][1], 251 | dst_coordinate[ 252 | 1]) + cal_line_length( 253 | combinate[i][2], dst_coordinate[2]) + cal_line_length(combinate[i][3], dst_coordinate[3]) 254 | if temp_force < force: 255 | force = temp_force 256 | force_flag = i 257 | if force_flag != 0: 258 | print("choose one direction!") 259 | return combinate[force_flag] 260 | -------------------------------------------------------------------------------- /DOTA_devkit/mAOE_evaluation.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # mAOEevaluation 3 | # -------------------------------------------------------- 4 | 5 | """ 6 | To use the code, users should to config detpath, annopath and imagesetfile 7 | detpath is the path for class result files. The evaluation is performed on the merging results. 8 | """ 9 | 10 | import numpy as np 11 | import polyiou 12 | from dota_poly2rbox import poly2rbox_single_v2, poly2rbox_single_v3 13 | 14 | def parse_gt(filename): 15 | """ 16 | :param filename: ground truth file to parse 17 | :return: all instances in a picture 18 | """ 19 | objects = [] 20 | with open(filename, 'r') as f: 21 | while True: 22 | line = f.readline() 23 | if line: 24 | splitlines = line.strip().split(' ') 25 | object_struct = {} 26 | if (len(splitlines) < 9): 27 | continue 28 | object_struct['name'] = splitlines[8] 29 | 30 | if (len(splitlines) == 9): 31 | object_struct['difficult'] = 0 32 | elif (len(splitlines) == 10): 33 | object_struct['difficult'] = int(splitlines[9]) 34 | object_struct['bbox'] = [float(splitlines[0]), 35 | float(splitlines[1]), 36 | float(splitlines[2]), 37 | float(splitlines[3]), 38 | float(splitlines[4]), 39 | float(splitlines[5]), 40 | float(splitlines[6]), 41 | float(splitlines[7])] 42 | objects.append(object_struct) 43 | else: 44 | break 45 | return objects 46 | 47 | 48 | def aoe_eval(detpath, 49 | annopath, 50 | imagesetfile, 51 | classname, 52 | # cachedir, 53 | ovthresh=0.5): 54 | """rec, prec, ap = aoe_eval(detpath, 55 | annopath, 56 | imagesetfile, 57 | classname, 58 | [ovthresh]) 59 | 60 | detpath: Path to detections 61 | detpath.format(classname) should produce the detection results file. 62 | annopath: Path to annotations 63 | annopath.format(imagename) should be the xml annotations file. 64 | imagesetfile: Text file containing the list of images, one image per line. 65 | classname: Category name (duh) 66 | cachedir: Directory for caching the annotations 67 | [ovthresh]: Overlap threshold (default = 0.7) 68 | """ 69 | 70 | with open(imagesetfile, 'r') as f: 71 | lines = f.readlines() 72 | imagenames = [x.strip() for x in lines] 73 | 74 | recs = {} 75 | for i, imagename in enumerate(imagenames): 76 | recs[imagename] = parse_gt(annopath.format(imagename)) 77 | 78 | # extract gt objects for this class 79 | class_recs = {} 80 | npos = 0 81 | for imagename in imagenames: 82 | R = [obj for obj in recs[imagename] if obj['name'] == classname] 83 | bbox = np.array([x['bbox'] for x in R]) 84 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 85 | det = [False] * len(R) 86 | npos = npos + sum(~difficult) 87 | class_recs[imagename] = {'bbox': bbox, 88 | 'difficult': difficult, 89 | 'det': det} 90 | 91 | # read dets from Task1* files 92 | detfile = detpath.format(classname) 93 | with open(detfile, 'r') as f: 94 | lines = f.readlines() 95 | 96 | splitlines = [x.strip().split(' ') for x in lines] 97 | image_ids = [x[0] for x in splitlines] 98 | confidence = np.array([float(x[1]) for x in splitlines]) 99 | 100 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 101 | 102 | # sort by confidence 103 | sorted_ind = np.argsort(-confidence) 104 | # sorted_scores = np.sort(-confidence) 105 | 106 | ## note the usage only in numpy not for list 107 | BB = BB[sorted_ind, :] 108 | image_ids = [image_ids[x] for x in sorted_ind] 109 | # go down dets and mark TPs and FPs 110 | nd = len(image_ids) 111 | 112 | angle_dif_list = [] 113 | for d in range(nd): 114 | R = class_recs[image_ids[d]] 115 | bb = BB[d, :].astype(float) 116 | ovmax = -np.inf 117 | BBGT = R['bbox'].astype(float) 118 | 119 | ## compute det bb with each BBGT 120 | if BBGT.size > 0: 121 | # 1. calculate the overlaps between hbbs, if the iou between hbbs are 0, the iou between obbs are 0, too. 122 | # pdb.set_trace() 123 | BBGT_xmin = np.min(BBGT[:, 0::2], axis=1) 124 | BBGT_ymin = np.min(BBGT[:, 1::2], axis=1) 125 | BBGT_xmax = np.max(BBGT[:, 0::2], axis=1) 126 | BBGT_ymax = np.max(BBGT[:, 1::2], axis=1) 127 | bb_xmin = np.min(bb[0::2]) 128 | bb_ymin = np.min(bb[1::2]) 129 | bb_xmax = np.max(bb[0::2]) 130 | bb_ymax = np.max(bb[1::2]) 131 | 132 | ixmin = np.maximum(BBGT_xmin, bb_xmin) 133 | iymin = np.maximum(BBGT_ymin, bb_ymin) 134 | ixmax = np.minimum(BBGT_xmax, bb_xmax) 135 | iymax = np.minimum(BBGT_ymax, bb_ymax) 136 | iw = np.maximum(ixmax - ixmin + 1., 0.) 137 | ih = np.maximum(iymax - iymin + 1., 0.) 138 | inters = iw * ih 139 | 140 | # union 141 | uni = ((bb_xmax - bb_xmin + 1.) * (bb_ymax - bb_ymin + 1.) + 142 | (BBGT_xmax - BBGT_xmin + 1.) * 143 | (BBGT_ymax - BBGT_ymin + 1.) - inters) 144 | 145 | overlaps = inters / uni 146 | 147 | BBGT_keep_mask = overlaps > 0 148 | BBGT_keep = BBGT[BBGT_keep_mask, :] 149 | # BBGT_keep_index = np.where(overlaps > 0)[0] 150 | 151 | def calcoverlaps(BBGT_keep, bb): 152 | overlaps = [] 153 | for index, GT in enumerate(BBGT_keep): 154 | overlap = polyiou.iou_poly(polyiou.VectorDouble(BBGT_keep[index]), polyiou.VectorDouble(bb)) 155 | overlaps.append(overlap) 156 | return overlaps 157 | if len(BBGT_keep) > 0: 158 | overlaps = calcoverlaps(BBGT_keep, bb) 159 | ovmax = np.max(overlaps) 160 | if ovmax > ovthresh: 161 | jmax = np.argmax(overlaps) 162 | angle_box_GT = poly2rbox_single_v3(BBGT_keep[jmax]) 163 | angel_GT = angle_box_GT[-1] 164 | 165 | angle_box_bb = poly2rbox_single_v3(bb) 166 | angel_bb = angle_box_bb[-1] 167 | 168 | angle_dif = abs(angel_bb - angel_GT) * 57.32 169 | angle_dif_list.append(angle_dif) 170 | 171 | return angle_dif_list 172 | 173 | def main(): 174 | detpath = r'./centerRote/result/test96/test96_final/Task1_{:s}.txt' 175 | annopath = r'./datasets/val/labelTxt/{:s}.txt' # change the directory to the path of val/labelTxt, if you want to do evaluation on the valset 176 | imagesetfile = r'./datasets/val/val.txt' 177 | 178 | # For DOTA-v1.0 179 | classnames = ['plane', 'baseball-diamond', 'bridge', 'ground-track-field', 'small-vehicle', 'large-vehicle', 'ship', 'tennis-court', 180 | 'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter'] 181 | 182 | # for hrsc2016 183 | # classnames = ['ship'] 184 | 185 | # for ucas_aod 186 | # classname = ['airplane', 'car'] 187 | 188 | classaps = [] 189 | for classname in classnames: 190 | print('classname:', classname) 191 | angel_dif_list = aoe_eval(detpath, 192 | annopath, 193 | imagesetfile, 194 | classname, 195 | ovthresh=0.7) # set 0.7 as default 196 | 197 | angle_dif = 0.0 198 | 199 | for item in angel_dif_list: 200 | angle_dif = angle_dif+item 201 | 202 | angle_dif_ave = angle_dif/len(angel_dif_list) 203 | print('angle_dif_ave: ', angle_dif_ave) 204 | classaps.append(angle_dif_ave) 205 | 206 | print('mAOE: ', sum(classaps)/len(classaps)) 207 | if __name__ == '__main__': 208 | main() 209 | -------------------------------------------------------------------------------- /DOTA_devkit/poly_nms_gpu/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | -------------------------------------------------------------------------------- /DOTA_devkit/poly_nms_gpu/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | # from nms.gpu_nms import gpu_nms 9 | # from nms.cpu_nms import cpu_nms 10 | from .poly_nms import poly_gpu_nms 11 | def poly_nms_gpu(dets, thresh, force_cpu=False): 12 | """Dispatch to either CPU or GPU NMS implementations.""" 13 | 14 | if dets.shape[0] == 0: 15 | return [] 16 | return poly_gpu_nms(dets, thresh, device_id=0) 17 | 18 | -------------------------------------------------------------------------------- /DOTA_devkit/poly_nms_gpu/poly_nms.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by dingjian on 18-5-24. 3 | // 4 | 5 | #ifndef DOTA_DEVKIT_POLY_NMS_HPP 6 | #define DOTA_DEVKIT_POLY_NMS_HPP 7 | 8 | 9 | void _poly_nms(int* keep_out, int* num_out, const float* polys_host, int polys_num, 10 | int polys_dim, float nms_overlap_thresh, int device_id); 11 | 12 | #endif //DOTA_DEVKIT_POLY_NMS_HPP 13 | -------------------------------------------------------------------------------- /DOTA_devkit/poly_nms_gpu/poly_nms.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | 4 | assert sizeof(int) == sizeof(np.int32_t) 5 | 6 | cdef extern from "poly_nms.hpp": 7 | void _poly_nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 8 | 9 | def poly_gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 10 | np.int32_t device_id=0): 11 | cdef int boxes_num = dets.shape[0] 12 | cdef int boxes_dim = dets.shape[1] 13 | cdef int num_out 14 | cdef np.ndarray[np.int32_t, ndim=1] \ 15 | keep = np.zeros(boxes_num, dtype=np.int32) 16 | cdef np.ndarray[np.float32_t, ndim=1] \ 17 | scores = dets[:, 8] 18 | cdef np.ndarray[np.int_t, ndim=1] \ 19 | order = scores.argsort()[::-1] 20 | cdef np.ndarray[np.float32_t, ndim=2] \ 21 | sorted_dets = dets[order, :] 22 | _poly_nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 23 | keep = keep[:num_out] 24 | return list(order[keep]) 25 | -------------------------------------------------------------------------------- /DOTA_devkit/poly_nms_gpu/poly_nms_kernel.cu: -------------------------------------------------------------------------------- 1 | 2 | #include "poly_nms.hpp" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | //##define CUDA_CHECK(condition)\ 12 | // 13 | // do { 14 | // cudaError_t error = condition; 15 | // if (error != cudaSuccess) { 16 | // 17 | // } 18 | // } 19 | 20 | #define CUDA_CHECK(condition) \ 21 | /* Code block avoids redefinition of cudaError_t error */ \ 22 | do { \ 23 | cudaError_t error = condition; \ 24 | if (error != cudaSuccess) { \ 25 | std::cout << cudaGetErrorString(error) << std::endl; \ 26 | } \ 27 | } while (0) 28 | 29 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 30 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 31 | 32 | 33 | #define maxn 51 34 | const double eps=1E-8; 35 | 36 | __device__ inline int sig(float d){ 37 | return(d>eps)-(d<-eps); 38 | } 39 | // struct Point{ 40 | // double x,y; Point(){} 41 | // Point(double x,double y):x(x),y(y){} 42 | // bool operator==(const Point&p)const{ 43 | // return sig(x-p.x)==0&&sig(y-p.y)==0; 44 | // } 45 | // }; 46 | 47 | __device__ inline int point_eq(const float2 a, const float2 b) { 48 | return sig(a.x - b.x) == 0 && sig(a.y - b.y)==0; 49 | } 50 | 51 | __device__ inline void point_swap(float2 *a, float2 *b) { 52 | float2 temp = *a; 53 | *a = *b; 54 | *b = temp; 55 | } 56 | 57 | __device__ inline void point_reverse(float2 *first, float2* last) 58 | { 59 | while ((first!=last)&&(first!=--last)) { 60 | point_swap (first,last); 61 | ++first; 62 | } 63 | } 64 | // void point_reverse(Point* first, Point* last) 65 | // { 66 | // while ((first!=last)&&(first!=--last)) { 67 | // point_swap (first,last); 68 | // ++first; 69 | // } 70 | // } 71 | 72 | 73 | __device__ inline float cross(float2 o,float2 a,float2 b){ //叉积 74 | return(a.x-o.x)*(b.y-o.y)-(b.x-o.x)*(a.y-o.y); 75 | } 76 | __device__ inline float area(float2* ps,int n){ 77 | ps[n]=ps[0]; 78 | float res=0; 79 | for(int i=0;i0) pp[m++]=p[i]; 105 | // if(sig(cross(a,b,p[i]))!=sig(cross(a,b,p[i+1]))) 106 | // lineCross(a,b,p[i],p[i+1],pp[m++]); 107 | // } 108 | // n=0; 109 | // for(int i=0;i1&&p[n-1]==p[0])n--; 113 | // while(n>1&&point_eq(p[n-1], p[0]))n--; 114 | // } 115 | 116 | __device__ inline void polygon_cut(float2*p,int&n,float2 a,float2 b, float2* pp){ 117 | 118 | int m=0;p[n]=p[0]; 119 | for(int i=0;i0) pp[m++]=p[i]; 121 | if(sig(cross(a,b,p[i]))!=sig(cross(a,b,p[i+1]))) 122 | lineCross(a,b,p[i],p[i+1],pp[m++]); 123 | } 124 | n=0; 125 | for(int i=0;i1&&p[n-1]==p[0])n--; 129 | while(n>1&&point_eq(p[n-1], p[0]))n--; 130 | } 131 | 132 | //---------------华丽的分隔线-----------------// 133 | //返回三角形oab和三角形ocd的有向交面积,o是原点// 134 | __device__ inline float intersectArea(float2 a,float2 b,float2 c,float2 d){ 135 | float2 o = make_float2(0,0); 136 | int s1=sig(cross(o,a,b)); 137 | int s2=sig(cross(o,c,d)); 138 | if(s1==0||s2==0)return 0.0;//退化,面积为0 139 | // if(s1==-1) swap(a,b); 140 | // if(s2==-1) swap(c,d); 141 | if (s1 == -1) point_swap(&a, &b); 142 | if (s2 == -1) point_swap(&c, &d); 143 | float2 p[10]={o,a,b}; 144 | int n=3; 145 | float2 pp[maxn]; 146 | polygon_cut(p,n,o,c,pp); 147 | polygon_cut(p,n,c,d,pp); 148 | polygon_cut(p,n,d,o,pp); 149 | float res=fabs(area(p,n)); 150 | if(s1*s2==-1) res=-res;return res; 151 | } 152 | //求两多边形的交面积 153 | __device__ inline float intersectArea(float2*ps1,int n1,float2*ps2,int n2){ 154 | if(area(ps1,n1)<0) point_reverse(ps1,ps1+n1); 155 | if(area(ps2,n2)<0) point_reverse(ps2,ps2+n2); 156 | ps1[n1]=ps1[0]; 157 | ps2[n2]=ps2[0]; 158 | float res=0; 159 | for(int i=0;i p, vector q) { 171 | // Point ps1[maxn],ps2[maxn]; 172 | // int n1 = 4; 173 | // int n2 = 4; 174 | // for (int i = 0; i < 4; i++) { 175 | // ps1[i].x = p[i * 2]; 176 | // ps1[i].y = p[i * 2 + 1]; 177 | // 178 | // ps2[i].x = q[i * 2]; 179 | // ps2[i].y = q[i * 2 + 1]; 180 | // } 181 | // double inter_area = intersectArea(ps1, n1, ps2, n2); 182 | // double union_area = fabs(area(ps1, n1)) + fabs(area(ps2, n2)) - inter_area; 183 | // double iou = inter_area / union_area; 184 | // 185 | //// cout << "inter_area:" << inter_area << endl; 186 | //// cout << "union_area:" << union_area << endl; 187 | //// cout << "iou:" << iou << endl; 188 | // 189 | // return iou; 190 | //} 191 | 192 | __device__ inline float devPolyIoU(float const * const p, float const * const q) { 193 | float2 ps1[maxn], ps2[maxn]; 194 | int n1 = 4; 195 | int n2 = 4; 196 | for (int i = 0; i < 4; i++) { 197 | ps1[i].x = p[i * 2]; 198 | ps1[i].y = p[i * 2 + 1]; 199 | 200 | ps2[i].x = q[i * 2]; 201 | ps2[i].y = q[i * 2 + 1]; 202 | } 203 | float inter_area = intersectArea(ps1, n1, ps2, n2); 204 | float union_area = fabs(area(ps1, n1)) + fabs(area(ps2, n2)) - inter_area; 205 | float iou = 0; 206 | if (union_area == 0) { 207 | iou = (inter_area + 1) / (union_area + 1); 208 | } else { 209 | iou = inter_area / union_area; 210 | } 211 | return iou; 212 | } 213 | 214 | __global__ void poly_nms_kernel(const int n_polys, const float nms_overlap_thresh, 215 | const float *dev_polys, unsigned long long *dev_mask) { 216 | const int row_start = blockIdx.y; 217 | const int col_start = blockIdx.x; 218 | 219 | const int row_size = 220 | min(n_polys - row_start * threadsPerBlock, threadsPerBlock); 221 | const int cols_size = 222 | min(n_polys - col_start * threadsPerBlock, threadsPerBlock); 223 | 224 | __shared__ float block_polys[threadsPerBlock * 9]; 225 | if (threadIdx.x < cols_size) { 226 | block_polys[threadIdx.x * 9 + 0] = 227 | dev_polys[(threadsPerBlock * col_start + threadIdx.x) * 9 + 0]; 228 | block_polys[threadIdx.x * 9 + 1] = 229 | dev_polys[(threadsPerBlock * col_start + threadIdx.x) * 9 + 1]; 230 | block_polys[threadIdx.x * 9 + 2] = 231 | dev_polys[(threadsPerBlock * col_start + threadIdx.x) * 9 + 2]; 232 | block_polys[threadIdx.x * 9 + 3] = 233 | dev_polys[(threadsPerBlock * col_start + threadIdx.x) * 9 + 3]; 234 | block_polys[threadIdx.x * 9 + 4] = 235 | dev_polys[(threadsPerBlock * col_start + threadIdx.x) * 9 + 4]; 236 | block_polys[threadIdx.x * 9 + 5] = 237 | dev_polys[(threadsPerBlock * col_start + threadIdx.x) * 9 + 5]; 238 | block_polys[threadIdx.x * 9 + 6] = 239 | dev_polys[(threadsPerBlock * col_start + threadIdx.x) * 9 + 6]; 240 | block_polys[threadIdx.x * 9 + 7] = 241 | dev_polys[(threadsPerBlock * col_start + threadIdx.x) * 9 + 7]; 242 | block_polys[threadIdx.x * 9 + 8] = 243 | dev_polys[(threadsPerBlock * col_start + threadIdx.x) * 9 + 8]; 244 | } 245 | __syncthreads(); 246 | 247 | if (threadIdx.x < row_size) { 248 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 249 | const float *cur_box = dev_polys + cur_box_idx * 9; 250 | int i = 0; 251 | unsigned long long t = 0; 252 | int start = 0; 253 | if (row_start == col_start) { 254 | start = threadIdx.x + 1; 255 | } 256 | for (i = start; i < cols_size; i++) { 257 | if (devPolyIoU(cur_box, block_polys + i * 9) > nms_overlap_thresh) { 258 | t |= 1ULL << i; 259 | } 260 | } 261 | const int col_blocks = DIVUP(n_polys, threadsPerBlock); 262 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 263 | } 264 | } 265 | 266 | void _set_device(int device_id) { 267 | int current_device; 268 | CUDA_CHECK(cudaGetDevice(¤t_device)); 269 | if (current_device == device_id) { 270 | return; 271 | } 272 | // The call to cudaSetDevice must come before any calls to Get, which 273 | // may perform initailization using the GPU. 274 | CUDA_CHECK(cudaSetDevice(device_id)); 275 | } 276 | 277 | void _poly_nms(int* keep_out, int* num_out, const float* polys_host, int polys_num, 278 | int polys_dim, float nms_overlap_thresh, int device_id) { 279 | float* polys_dev = NULL; 280 | unsigned long long* mask_dev = NULL; 281 | const int col_blocks = DIVUP(polys_num, threadsPerBlock); 282 | 283 | CUDA_CHECK(cudaMalloc(&polys_dev, 284 | polys_num * polys_dim * sizeof(float))); 285 | CUDA_CHECK(cudaMemcpy(polys_dev, 286 | polys_host, 287 | polys_num * polys_dim * sizeof(float), 288 | cudaMemcpyHostToDevice)); 289 | 290 | CUDA_CHECK(cudaMalloc(&mask_dev, 291 | polys_num * col_blocks * sizeof(unsigned long long))); 292 | 293 | dim3 blocks(DIVUP(polys_num, threadsPerBlock), 294 | DIVUP(polys_num, threadsPerBlock)); 295 | dim3 threads(threadsPerBlock); 296 | // __global__ void poly_nms_kernel(const int n_polys, const float nms_overlap_thresh, 297 | // const float *dev_polys, unsigned long long *dev_mask) 298 | poly_nms_kernel<<>>(polys_num, 299 | nms_overlap_thresh, 300 | polys_dev, 301 | mask_dev); 302 | 303 | std::vector mask_host(polys_num * col_blocks); 304 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 305 | mask_dev, 306 | sizeof(unsigned long long) * polys_num * col_blocks, 307 | cudaMemcpyDeviceToHost)); 308 | 309 | std::vector remv(col_blocks); 310 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 311 | // TODO: figure out it 312 | int num_to_keep = 0; 313 | for (int i = 0; i < polys_num; i++) { 314 | int nblock = i / threadsPerBlock; 315 | int inblock = i % threadsPerBlock; 316 | 317 | if (!(remv[nblock] & (1ULL << inblock))) { 318 | keep_out[num_to_keep++] = i; 319 | unsigned long long *p = &mask_host[0] + i * col_blocks; 320 | for (int j = nblock; j < col_blocks; j++) { 321 | remv[j] |= p[j]; 322 | } 323 | } 324 | } 325 | *num_out = num_to_keep; 326 | 327 | CUDA_CHECK(cudaFree(polys_dev)); 328 | CUDA_CHECK(cudaFree(mask_dev)); 329 | } 330 | 331 | // 332 | //int main(){ 333 | // double p[8] = {0, 0, 1, 0, 1, 1, 0, 1}; 334 | // double q[8] = {0.5, 0.5, 1.5, 0.5, 1.5, 1.5, 0.5, 1.5}; 335 | // vector P(p, p + 8); 336 | // vector Q(q, q + 8); 337 | // iou_poly(P, Q); 338 | // return 0; 339 | //} 340 | 341 | //int main(){ 342 | // double p[8] = {0, 0, 1, 0, 1, 1, 0, 1}; 343 | // double q[8] = {0.5, 0.5, 1.5, 0.5, 1.5, 1.5, 0.5, 1.5}; 344 | // iou_poly(p, q); 345 | // return 0; 346 | //} -------------------------------------------------------------------------------- /DOTA_devkit/polyiou.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | using namespace std; 8 | #define maxn 51 9 | const double eps=1E-8; 10 | int sig(double d){ 11 | return(d>eps)-(d<-eps); 12 | } 13 | struct Point{ 14 | double x,y; Point(){} 15 | Point(double x,double y):x(x),y(y){} 16 | bool operator==(const Point&p)const{ 17 | return sig(x-p.x)==0&&sig(y-p.y)==0; 18 | } 19 | }; 20 | double cross(Point o,Point a,Point b){ //叉积 21 | return(a.x-o.x)*(b.y-o.y)-(b.x-o.x)*(a.y-o.y); 22 | } 23 | double area(Point* ps,int n){ 24 | ps[n]=ps[0]; 25 | double res=0; 26 | for(int i=0;i0) pp[m++]=p[i]; 49 | // if(sig(cross(a,b,p[i]))!=sig(cross(a,b,p[i+1]))) 50 | // lineCross(a,b,p[i],p[i+1],pp[m++]); 51 | // } 52 | // n=0; 53 | // for(int i=0;i1&&p[n-1]==p[0])n--; 57 | //} 58 | void polygon_cut(Point*p,int&n,Point a,Point b, Point* pp){ 59 | // static Point pp[maxn]; 60 | int m=0;p[n]=p[0]; 61 | for(int i=0;i0) pp[m++]=p[i]; 63 | if(sig(cross(a,b,p[i]))!=sig(cross(a,b,p[i+1]))) 64 | lineCross(a,b,p[i],p[i+1],pp[m++]); 65 | } 66 | n=0; 67 | for(int i=0;i1&&p[n-1]==p[0])n--; 71 | } 72 | //---------------华丽的分隔线-----------------// 73 | //返回三角形oab和三角形ocd的有向交面积,o是原点// 74 | double intersectArea(Point a,Point b,Point c,Point d){ 75 | Point o(0,0); 76 | int s1=sig(cross(o,a,b)); 77 | int s2=sig(cross(o,c,d)); 78 | if(s1==0||s2==0)return 0.0;//退化,面积为0 79 | if(s1==-1) swap(a,b); 80 | if(s2==-1) swap(c,d); 81 | Point p[10]={o,a,b}; 82 | int n=3; 83 | Point pp[maxn]; 84 | polygon_cut(p,n,o,c, pp); 85 | polygon_cut(p,n,c,d, pp); 86 | polygon_cut(p,n,d,o, pp); 87 | double res=fabs(area(p,n)); 88 | if(s1*s2==-1) res=-res;return res; 89 | } 90 | //求两多边形的交面积 91 | double intersectArea(Point*ps1,int n1,Point*ps2,int n2){ 92 | if(area(ps1,n1)<0) reverse(ps1,ps1+n1); 93 | if(area(ps2,n2)<0) reverse(ps2,ps2+n2); 94 | ps1[n1]=ps1[0]; 95 | ps2[n2]=ps2[0]; 96 | double res=0; 97 | for(int i=0;i p, vector q) { 109 | Point ps1[maxn],ps2[maxn]; 110 | int n1 = 4; 111 | int n2 = 4; 112 | for (int i = 0; i < 4; i++) { 113 | ps1[i].x = p[i * 2]; 114 | ps1[i].y = p[i * 2 + 1]; 115 | 116 | ps2[i].x = q[i * 2]; 117 | ps2[i].y = q[i * 2 + 1]; 118 | } 119 | double inter_area = intersectArea(ps1, n1, ps2, n2); 120 | double union_area = fabs(area(ps1, n1)) + fabs(area(ps2, n2)) - inter_area; 121 | double iou = inter_area / union_area; 122 | 123 | // cout << "inter_area:" << inter_area << endl; 124 | // cout << "union_area:" << union_area << endl; 125 | // cout << "iou:" << iou << endl; 126 | 127 | return iou; 128 | } 129 | // 130 | //int main(){ 131 | // double p[8] = {0, 0, 1, 0, 1, 1, 0, 1}; 132 | // double q[8] = {0.5, 0.5, 1.5, 0.5, 1.5, 1.5, 0.5, 1.5}; 133 | // vector P(p, p + 8); 134 | // vector Q(q, q + 8); 135 | // iou_poly(P, Q); 136 | // return 0; 137 | //} 138 | 139 | //int main(){ 140 | // double p[8] = {0, 0, 1, 0, 1, 1, 0, 1}; 141 | // double q[8] = {0.5, 0.5, 1.5, 0.5, 1.5, 1.5, 0.5, 1.5}; 142 | // iou_poly(p, q); 143 | // return 0; 144 | //} -------------------------------------------------------------------------------- /DOTA_devkit/polyiou.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by dingjian on 18-2-3. 3 | // 4 | 5 | #ifndef POLYIOU_POLYIOU_H 6 | #define POLYIOU_POLYIOU_H 7 | 8 | #include 9 | double iou_poly(std::vector p, std::vector q); 10 | #endif //POLYIOU_POLYIOU_H 11 | -------------------------------------------------------------------------------- /DOTA_devkit/polyiou.i: -------------------------------------------------------------------------------- 1 | %module polyiou 2 | %include "std_vector.i" 3 | 4 | namespace std { 5 | %template(VectorDouble) vector; 6 | }; 7 | 8 | %{ 9 | #define SWIG_FILE_WITH_INIT 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "polyiou.h" 16 | %} 17 | 18 | %include "polyiou.h" 19 | 20 | -------------------------------------------------------------------------------- /DOTA_devkit/polyiou.py: -------------------------------------------------------------------------------- 1 | # This file was automatically generated by SWIG (http://www.swig.org). 2 | # Version 3.0.12 3 | # 4 | # Do not make changes to this file unless you know what you are doing--modify 5 | # the SWIG interface file instead. 6 | 7 | from sys import version_info as _swig_python_version_info 8 | if _swig_python_version_info >= (2, 7, 0): 9 | def swig_import_helper(): 10 | import importlib 11 | pkg = __name__.rpartition('.')[0] 12 | mname = '.'.join((pkg, '_polyiou')).lstrip('.') 13 | try: 14 | return importlib.import_module(mname) 15 | except ImportError: 16 | return importlib.import_module('_polyiou') 17 | _polyiou = swig_import_helper() 18 | del swig_import_helper 19 | elif _swig_python_version_info >= (2, 6, 0): 20 | def swig_import_helper(): 21 | from os.path import dirname 22 | import imp 23 | fp = None 24 | try: 25 | fp, pathname, description = imp.find_module('_polyiou', [dirname(__file__)]) 26 | except ImportError: 27 | import _polyiou 28 | return _polyiou 29 | try: 30 | _mod = imp.load_module('_polyiou', fp, pathname, description) 31 | finally: 32 | if fp is not None: 33 | fp.close() 34 | return _mod 35 | _polyiou = swig_import_helper() 36 | del swig_import_helper 37 | else: 38 | import _polyiou 39 | del _swig_python_version_info 40 | 41 | try: 42 | _swig_property = property 43 | except NameError: 44 | pass # Python < 2.2 doesn't have 'property'. 45 | 46 | try: 47 | import builtins as __builtin__ 48 | except ImportError: 49 | import __builtin__ 50 | 51 | def _swig_setattr_nondynamic(self, class_type, name, value, static=1): 52 | if (name == "thisown"): 53 | return self.this.own(value) 54 | if (name == "this"): 55 | if type(value).__name__ == 'SwigPyObject': 56 | self.__dict__[name] = value 57 | return 58 | method = class_type.__swig_setmethods__.get(name, None) 59 | if method: 60 | return method(self, value) 61 | if (not static): 62 | if _newclass: 63 | object.__setattr__(self, name, value) 64 | else: 65 | self.__dict__[name] = value 66 | else: 67 | raise AttributeError("You cannot add attributes to %s" % self) 68 | 69 | 70 | def _swig_setattr(self, class_type, name, value): 71 | return _swig_setattr_nondynamic(self, class_type, name, value, 0) 72 | 73 | 74 | def _swig_getattr(self, class_type, name): 75 | if (name == "thisown"): 76 | return self.this.own() 77 | method = class_type.__swig_getmethods__.get(name, None) 78 | if method: 79 | return method(self) 80 | raise AttributeError("'%s' object has no attribute '%s'" % (class_type.__name__, name)) 81 | 82 | 83 | def _swig_repr(self): 84 | try: 85 | strthis = "proxy of " + self.this.__repr__() 86 | except __builtin__.Exception: 87 | strthis = "" 88 | return "<%s.%s; %s >" % (self.__class__.__module__, self.__class__.__name__, strthis,) 89 | 90 | try: 91 | _object = object 92 | _newclass = 1 93 | except __builtin__.Exception: 94 | class _object: 95 | pass 96 | _newclass = 0 97 | 98 | class SwigPyIterator(_object): 99 | __swig_setmethods__ = {} 100 | __setattr__ = lambda self, name, value: _swig_setattr(self, SwigPyIterator, name, value) 101 | __swig_getmethods__ = {} 102 | __getattr__ = lambda self, name: _swig_getattr(self, SwigPyIterator, name) 103 | 104 | def __init__(self, *args, **kwargs): 105 | raise AttributeError("No constructor defined - class is abstract") 106 | __repr__ = _swig_repr 107 | __swig_destroy__ = _polyiou.delete_SwigPyIterator 108 | __del__ = lambda self: None 109 | 110 | def value(self): 111 | return _polyiou.SwigPyIterator_value(self) 112 | 113 | def incr(self, n=1): 114 | return _polyiou.SwigPyIterator_incr(self, n) 115 | 116 | def decr(self, n=1): 117 | return _polyiou.SwigPyIterator_decr(self, n) 118 | 119 | def distance(self, x): 120 | return _polyiou.SwigPyIterator_distance(self, x) 121 | 122 | def equal(self, x): 123 | return _polyiou.SwigPyIterator_equal(self, x) 124 | 125 | def copy(self): 126 | return _polyiou.SwigPyIterator_copy(self) 127 | 128 | def next(self): 129 | return _polyiou.SwigPyIterator_next(self) 130 | 131 | def __next__(self): 132 | return _polyiou.SwigPyIterator___next__(self) 133 | 134 | def previous(self): 135 | return _polyiou.SwigPyIterator_previous(self) 136 | 137 | def advance(self, n): 138 | return _polyiou.SwigPyIterator_advance(self, n) 139 | 140 | def __eq__(self, x): 141 | return _polyiou.SwigPyIterator___eq__(self, x) 142 | 143 | def __ne__(self, x): 144 | return _polyiou.SwigPyIterator___ne__(self, x) 145 | 146 | def __iadd__(self, n): 147 | return _polyiou.SwigPyIterator___iadd__(self, n) 148 | 149 | def __isub__(self, n): 150 | return _polyiou.SwigPyIterator___isub__(self, n) 151 | 152 | def __add__(self, n): 153 | return _polyiou.SwigPyIterator___add__(self, n) 154 | 155 | def __sub__(self, *args): 156 | return _polyiou.SwigPyIterator___sub__(self, *args) 157 | def __iter__(self): 158 | return self 159 | SwigPyIterator_swigregister = _polyiou.SwigPyIterator_swigregister 160 | SwigPyIterator_swigregister(SwigPyIterator) 161 | 162 | class VectorDouble(_object): 163 | __swig_setmethods__ = {} 164 | __setattr__ = lambda self, name, value: _swig_setattr(self, VectorDouble, name, value) 165 | __swig_getmethods__ = {} 166 | __getattr__ = lambda self, name: _swig_getattr(self, VectorDouble, name) 167 | __repr__ = _swig_repr 168 | 169 | def iterator(self): 170 | return _polyiou.VectorDouble_iterator(self) 171 | def __iter__(self): 172 | return self.iterator() 173 | 174 | def __nonzero__(self): 175 | return _polyiou.VectorDouble___nonzero__(self) 176 | 177 | def __bool__(self): 178 | return _polyiou.VectorDouble___bool__(self) 179 | 180 | def __len__(self): 181 | return _polyiou.VectorDouble___len__(self) 182 | 183 | def __getslice__(self, i, j): 184 | return _polyiou.VectorDouble___getslice__(self, i, j) 185 | 186 | def __setslice__(self, *args): 187 | return _polyiou.VectorDouble___setslice__(self, *args) 188 | 189 | def __delslice__(self, i, j): 190 | return _polyiou.VectorDouble___delslice__(self, i, j) 191 | 192 | def __delitem__(self, *args): 193 | return _polyiou.VectorDouble___delitem__(self, *args) 194 | 195 | def __getitem__(self, *args): 196 | return _polyiou.VectorDouble___getitem__(self, *args) 197 | 198 | def __setitem__(self, *args): 199 | return _polyiou.VectorDouble___setitem__(self, *args) 200 | 201 | def pop(self): 202 | return _polyiou.VectorDouble_pop(self) 203 | 204 | def append(self, x): 205 | return _polyiou.VectorDouble_append(self, x) 206 | 207 | def empty(self): 208 | return _polyiou.VectorDouble_empty(self) 209 | 210 | def size(self): 211 | return _polyiou.VectorDouble_size(self) 212 | 213 | def swap(self, v): 214 | return _polyiou.VectorDouble_swap(self, v) 215 | 216 | def begin(self): 217 | return _polyiou.VectorDouble_begin(self) 218 | 219 | def end(self): 220 | return _polyiou.VectorDouble_end(self) 221 | 222 | def rbegin(self): 223 | return _polyiou.VectorDouble_rbegin(self) 224 | 225 | def rend(self): 226 | return _polyiou.VectorDouble_rend(self) 227 | 228 | def clear(self): 229 | return _polyiou.VectorDouble_clear(self) 230 | 231 | def get_allocator(self): 232 | return _polyiou.VectorDouble_get_allocator(self) 233 | 234 | def pop_back(self): 235 | return _polyiou.VectorDouble_pop_back(self) 236 | 237 | def erase(self, *args): 238 | return _polyiou.VectorDouble_erase(self, *args) 239 | 240 | def __init__(self, *args): 241 | this = _polyiou.new_VectorDouble(*args) 242 | try: 243 | self.this.append(this) 244 | except __builtin__.Exception: 245 | self.this = this 246 | 247 | def push_back(self, x): 248 | return _polyiou.VectorDouble_push_back(self, x) 249 | 250 | def front(self): 251 | return _polyiou.VectorDouble_front(self) 252 | 253 | def back(self): 254 | return _polyiou.VectorDouble_back(self) 255 | 256 | def assign(self, n, x): 257 | return _polyiou.VectorDouble_assign(self, n, x) 258 | 259 | def resize(self, *args): 260 | return _polyiou.VectorDouble_resize(self, *args) 261 | 262 | def insert(self, *args): 263 | return _polyiou.VectorDouble_insert(self, *args) 264 | 265 | def reserve(self, n): 266 | return _polyiou.VectorDouble_reserve(self, n) 267 | 268 | def capacity(self): 269 | return _polyiou.VectorDouble_capacity(self) 270 | __swig_destroy__ = _polyiou.delete_VectorDouble 271 | __del__ = lambda self: None 272 | VectorDouble_swigregister = _polyiou.VectorDouble_swigregister 273 | VectorDouble_swigregister(VectorDouble) 274 | 275 | 276 | def iou_poly(p, q): 277 | return _polyiou.iou_poly(p, q) 278 | iou_poly = _polyiou.iou_poly 279 | # This file is compatible with both classic and new-style classes. 280 | 281 | 282 | -------------------------------------------------------------------------------- /DOTA_devkit/setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | setup.py file for SWIG example 3 | """ 4 | from distutils.core import setup, Extension 5 | import numpy 6 | 7 | polyiou_module = Extension('_polyiou', 8 | sources=['polyiou_wrap.cxx', 'polyiou.cpp'], 9 | ) 10 | setup(name = 'polyiou', 11 | version = '0.1', 12 | author = "SWIG Docs", 13 | description = """Simple swig example from docs""", 14 | ext_modules = [polyiou_module], 15 | py_modules = ["polyiou"], 16 | ) 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | # Rethinking Boundary Discontinuity Problem for Oriented Object Detection 4 | 5 | [Hang Xu](https://scholar.google.com/citations?user=HtBg_hwAAAAJ&hl=zh-CN)1*, [Xinyuan Liu](https://antxinyuan.github.io/)2*, [Haonan Xu](https://scholar.google.com/citations?user=Up_a2VAAAAAJ&hl=zh-CN)2, [Yike Ma](http://www.ict.cas.cn/sourcedb_ict_cas/cn/jssrck/201511/t20151119_4470413.html)2, [Zunjie Zhu]()1, [Chenggang Yan](https://cgyan-iipl.github.io/)1, [Feng Dai](http://www.ict.cas.cn/sourcedb_ict_cas/cn/jssrck/201404/t20140422_4096774.html)2† 6 | 7 |

1Hangzhou Dianzi University   2Institute of Computing Technology, Chinese Academy of Sciences 8 |
*Equal contribution   Corresponding author

9 | 10 | ### [Projectpage]() · [Paper](https://arxiv.org/pdf/2305.10061.pdf) · [Video](https://youtu.be/dXU1t7V8yi4) 11 | 12 |

13 | 14 | ## :fire: News 15 | - **[02/27/2024]** This paper is accepted by CVPR 2024. 16 | - **[03/20/2024]** Full code released. 17 | 18 | ## Introduction 19 | Oriented object detection has been developed rapidly in the past few years, where rotation equivariance is crucial for detectors to predict rotated boxes. It is expected that the prediction can maintain the corresponding rotation when objects rotate, but severe mutation in angular prediction is sometimes observed when objects rotate near the boundary angle, which is well-known boundary discontinuity problem. The problem has been long believed to be caused by the sharp loss increase at the angular boundary, and widely used joint-optim IoU-like methods deal with this problem by loss-smoothing. However, we experimentally find that even state-of-the-art IoU-like methods actually fail to solve the problem. On further analysis, we find that the key to solution lies in encoding mode of the smoothing function rather than in joint or independent optimization. In existing IoU-like methods, the model essentially attempts to fit the angular relationship between box and object, where the break point at angular boundary makes the predictions highly unstable.To deal with this issue, we propose a dual-optimization paradigm for angles. We decouple reversibility and joint-optim from single smoothing function into two distinct entities, which for the first time achieves the objectives of both correcting angular boundary and blending angle with other parameters.Extensive experiments on multiple datasets show that boundary discontinuity problem is well-addressed. Moreover, typical IoU-like methods are improved to the same level without obvious performance gap. 20 | ![](images/rotation_invarience.png) 21 | 22 | ## Installation 23 | > This repository is developed based on TGRS22 paper GF-CSL 's [offical code](https://github.com/WangJian981002/GF-CSL), which contains a CenterNet detector with minimalism style. By the way, some loss functions and modules are modified based on [mmdet](https://github.com/open-mmlab/mmdet) and [mmrotate](https://github.com/open-mmlab/mmrotate). 24 | 25 | ### Requirements 26 | * Linux 27 | * Python 3.7+ 28 | * Pytorch 1.7.0 or higher 29 | * mmcv 30 | * CUDA 11.0 31 | * GCC 7.5.0 32 | 33 | #### INSTALL 34 | 1. Create a conda virtual environment and activate it 35 | 36 | ``` 37 | conda create -n cvpr24acm python=3.7 -y 38 | conda activate cvpr24acm 39 | ``` 40 | 41 | 2. Install PyTorch and torchvision following the [official instructions](https://pytorch.org/), e.g. 42 | 43 | `conda install pytorch==1.7.1 torchvision==0.8.2 torchaudio==0.7.2 cudatoolkit=11.0 -c pytorch` 44 | 45 | 3. Install [mmcv](https://github.com/open-mmlab/mmcv) for DCNv2, e.g. 46 | 47 | ``` 48 | #pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html 49 | pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu110/torch1.7.1/index.html 50 | ``` 51 | 52 | 4. Install DOTA_devkit 53 | 54 | ``` 55 | sudo apt-get install swig 56 | cd DOTA_devkit 57 | swig -c++ -python polyiou.i 58 | python setup.py build_ext --inplace 59 | ``` 60 | 61 | ### Dataset Preparation 62 | For DOTA datasets, please refer [DOTA_devkit](https://github.com/CAPTAIN-WHU/DOTA_devkit) to crop the original images into patches. e.g. 1024×1024 pixels with overlap 256 px. 63 | 64 | Please organize the datasets in the following format. Note that the test set of DOTA does not provide annotations, so you can place the corresponding empty files in the test_split/labelTxt path. 65 | 66 | As described in the paper, we use a relatively large image resolution during the test, please crop the test image into a 4000×4000 px with overlap 2000 px. 67 | 68 | ``` 69 | cvpr24acm 70 | ├── DOTA_devkit 71 | │ ├── datasets 72 | │ │ ├── DOTA 73 | │ │ │ ├── trainvalsplit-1024-256 74 | │ │ │ │ ├── images 75 | │ │ │ │ ├── labelTxt 76 | │ │ │ ├── trainvalsplit-multiscale 77 | │ │ │ │ ├── images 78 | │ │ │ │ ├── labelTxt 79 | │ │ │ │── test4000 80 | │ │ │ │ ├── images 81 | │ │ │ │ ├── labelTxt 82 | │ │ │── HRSC2016 83 | │ │ │ │── train 84 | │ │ │ │ ├── images 85 | │ │ │ │ ├── labelTxt 86 | │ │ │ │── test 87 | │ │ │ │ ├── images 88 | │ │ │ │ ├── labelTxt 89 | │ │ │── UCAS_AOD 90 | │ │ │ │── train 91 | │ │ │ │ ├── images 92 | │ │ │ │ ├── labelTxt 93 | │ │ │ │── test 94 | │ │ │ │ ├── images 95 | │ │ │ │ ├── labelTxt 96 | ``` 97 | 98 | for each annotation file (.txt), each line represent an object following: 99 | 100 | x1, y1, x2, y2, x3, y3, x4, y4, class, difficult 101 | 102 | ``` 103 | e.g.: 104 | 2753 2408 2861 2385 2888 2468 2805 2502 plane 0 105 | 3445 3391 3484 3409 3478 3422 3437 3402 large-vehicle 0 106 | 3185 4158 3195 4161 3175 4204 3164 4199 large-vehicle 0 107 | 2870 4250 2916 4268 2912 4283 2866 4263 large-vehicle 0 108 | 630 1674 628 1666 640 1654 644 1666 small-vehicle 0 109 | 636 1713 633 1706 646 1698 650 1706 small-vehicle 0 110 | 717 76 726 78 722 95 714 90 small-vehicle 0 111 | 737 82 744 84 739 101 731 98 small-vehicle 0 112 | ... 113 | ``` 114 | 115 | ### Training 116 | For example, using 2 GPUs to training a ResNet50 model: 117 | ``` 118 | #DOTA 119 | CUDA_VISIBLE_DEVICES=0,1 python train_dota.py --datadir ./DOTA_devkit/datasets/DOTA/trainvalsplit-1024-256 --model 50 120 | #HRSC2016 121 | CUDA_VISIBLE_DEVICES=0,1 python train_hrsc.py --datadir ./DOTA_devkit/datasets/HRSC2016/train --model 50 --input_size 640 122 | #UCAS-AOD 123 | CUDA_VISIBLE_DEVICES=0,1 python train_ucas.py --datadir ./DOTA_devkit/datasets/UCAS_AOD/train --model 50 --input_size 640 124 | ``` 125 | `--input_size ` is the long side resolution during training, which must be divisible by 32. 126 | 127 | ### Testing 128 | 129 | For DOTA dataset: 130 | ``` 131 | # Single scale testing 132 | python evaluate.py --operation DOTA_test \ 133 | --model 50 --weight_path ./checkpoint/r50-scale=[0.5.1.0].pth \ 134 | --test_size 4000 --output_id 0 135 | ``` 136 | The results files will appear at "./result/test0/test0_final", which can be subsequently sent to the [DOTA server](http://bed4rs.net:8001/login/) to obtain the evaluation results. 137 | ``` 138 | # Multiscale scale testing 139 | python evaluate.py --operation DOTA_MS_test \ 140 | --model 50 --weight_path ./checkpoint/r50-scale=[0.5.1.0].pth \ 141 | --test_image_dir ./DOTA_devkit/datasets/DOTA/test4000/images 142 | ``` 143 | The results files will appear at "./result/test/test_final" 144 | 145 | For HRSC2016 or UCAS-AOD dataset: 146 | ``` 147 | # Single scale testing 148 | python evaluate.py --operation HRSC_test \ 149 | --model 50 --weight_path ${WEIGHT_PATH} \ 150 | --hrsc_test_size 640 151 | ``` 152 | ``` 153 | # Multiscale scale testing 154 | python evaluate.py --operation HRSC_MS_test \ 155 | --model 50 --weight_path ${WEIGHT_PATH} 156 | ``` 157 | 158 | ### Visualize 159 | For example, using pretrained ResNet50 model: 160 | ``` 161 | python evaluate.py --operation visualize \ 162 | --model 50 --weight_path ./checkpoint/r50-scale=[0.5.1.0].pth \ 163 | --img_path ./result/P1314.png 164 | ``` 165 | 166 | Optional arguments: 167 | * --score_thr :object confidence during detection. score greater than the confidence is considered to be a detected object. 168 | 169 | The visualization file appears in the current path as demo.jpg. 170 | 171 | ### More details 172 | To configure the model more finely in training/testing/visualizing, you can add some extra configuration items, e.g. 173 | ``` 174 | CUDA_VISIBLE_DEVICES=0,1 python train_dota.py \ 175 | --heads 15 \ 176 | --model 50 \ 177 | --coder acm \ 178 | --coder_cfg 1 \ 179 | --coder_mode model \ 180 | --box_loss riou 181 | ``` 182 | - heads: the number of classification headers, corresponding to the number of data set categories. 183 | - model: the version of ResNet backbone, e.g. 18 / 34 / 50 / 101 / 152 184 | - coder: the type of angle coder, e.g. none / acm / psc / csl 185 | - coder_cfg: the config of angle coder, 186 | 187 | for acm, coder_cfg >= 0 indicates that dual_freq=True. 188 | 189 | for psc, coder_cfg indicates the umber of phase steps. 190 | 191 | for csl, coder_cfg indicates the umber of encoding length. 192 | - coder_mode: coder mode, e.g. model / loss, where model indicates model outputs encoding while loss indicates model ouputs anlge itself. 193 | - box_loss: loss for box, e.g. none / kld / gwd / kfiou / riou. 194 | 195 | ## Citation 196 | 197 | If you find this code useful for your research, please consider citing: 198 | ``` 199 | @inproceedings{xu2024acm, 200 | title={Rethinking boundary discontinuity problem for oriented object detection}, 201 | author={Xu, Hang and Liu, Xinyuan and Xu, Haonan and Ma, Yike and Zhu, Zunjie and Yan, Chenggang and Dai, Feng}, 202 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, 203 | pages={17406--17415}, 204 | year={2024} 205 | } 206 | ``` 207 | -------------------------------------------------------------------------------- /datasets/HRSCDataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import json 4 | import cv2 5 | import os 6 | import math 7 | import random 8 | from matplotlib.collections import PatchCollection 9 | from matplotlib.patches import Polygon, Circle 10 | import matplotlib.pyplot as plt 11 | import sys 12 | from torch.utils.data import Dataset 13 | import torch.nn.functional as F 14 | from utils.utils import creat_label_heatmap 15 | from utils.smooth_label import gaussian_label 16 | from utils.aug import rotate_image 17 | from DOTA_devkit.DOTA import DOTA 18 | 19 | wordname_15 = ['ship'] 20 | 21 | 22 | class HRSCSetv1(Dataset): 23 | 24 | #rgb 25 | mean = np.array([[[0.485, 0.456, 0.406]]],dtype=np.float32) 26 | std = np.array([[[0.229, 0.224, 0.225]]],dtype=np.float32) 27 | 28 | def __init__(self, root_dir,img_size=640): 29 | self.numclasses = len(wordname_15) 30 | self.class2label = {} 31 | self.label2class = {} 32 | for i in range(self.numclasses): 33 | self.class2label[wordname_15[i]] = i 34 | self.label2class[i] = wordname_15[i] 35 | self.imgsize = img_size 36 | 37 | 38 | self.DOTA = DOTA(root_dir) 39 | self.imgids = self.DOTA.getImgIds() 40 | 41 | def __len__(self): 42 | return len(self.imgids) 43 | 44 | def __getitem__(self, idx): 45 | imgid = self.imgids[idx] 46 | img = self.DOTA.loadImgs(imgid)[0] #bgr 0~255 np 47 | ann = self.DOTA.loadAnns(imgId=imgid) 48 | 49 | if self.imgsize > 0: 50 | img,ann = self.flip_aug(img,ann) 51 | img,ann = self.rot_aug(img,ann) 52 | img,ann = self.resize(img, ann, self.imgsize) # 长边resize到imgsize, 再用(imgsize,imgsize)将其装下 53 | #img = self.gray_aug(img) 54 | 55 | 56 | converted_ann = self.convert_poly2cxcyhw(ann) #(N,6) np.float32 [cx,cy,h,w,theta(0~179),class] 57 | img = self.normalize(img) #rgb (h,w,c) 标准化 np 58 | 59 | if self.imgsize > 0: 60 | return torch.from_numpy(img.transpose(2,0,1)), converted_ann 61 | else: 62 | return torch.from_numpy(img.transpose(2,0,1)), converted_ann, imgid 63 | 64 | 65 | def convert_poly2cxcyhw(self,ann): 66 | #h 代表长边, w代表短边 67 | converted_ann = np.zeros((0,6),dtype=np.float32) #cx,cy,h,w,theta,class 68 | for i in range(len(ann)): 69 | p1,p2,p3,p4 = ann[i]['poly'] 70 | cx = ((p1[0]+p3[0])/2.0 + (p2[0]+p4[0])/2.0)/2.0 71 | cy = ((p1[1]+p3[1])/2.0 + (p2[1]+p4[1])/2.0)/2.0 72 | side1 = self.cal_line_length(p1,p2) 73 | side2 = self.cal_line_length(p2,p3) 74 | if side1>side2: 75 | r1,r2 = p1,p2 76 | long_side = side1 77 | short_side = side2 78 | else: 79 | r1,r2 = p2,p3 80 | long_side = side2 81 | short_side = side1 82 | if long_side < 4.0 or short_side < 4.0: 83 | continue 84 | 85 | if r1[1] loading pretrained model {}'.format(url)) 268 | self.load_state_dict(pretrained_state_dict, strict=False) 269 | 270 | print('=> init deconv weights from normal distribution') 271 | for m in [*self.DT5.modules(), *self.DT4.modules(), *self.DT3.modules(), *self.projectD4.modules(), *self.projectD3.modules()]: 272 | if isinstance(m, nn.BatchNorm2d): 273 | nn.init.constant_(m.weight, 1) 274 | nn.init.constant_(m.bias, 0) 275 | 276 | 277 | resnet_spec = {18: (BasicBlock, [2, 2, 2, 2],[256,128,64],1), 278 | 34: (BasicBlock, [3, 4, 6, 3],[256,128,64],1), 279 | 50: (Bottleneck, [3, 4, 6, 3],[256,128,64],1), 280 | 101: (Bottleneck, [3, 4, 23, 3],[256,192,128],16), 281 | 152: (Bottleneck, [3, 8, 36, 3],[256,256,256],16)} 282 | 283 | 284 | def get_pose_net(num_layers, heads): 285 | block_class, layers, mid_channel,d_groups = resnet_spec[num_layers] 286 | 287 | model = PoseResNet(block_class, layers, heads,mid_channel,d_groups) 288 | model.init_weights(num_layers) 289 | return model 290 | 291 | if __name__ == "__main__" : 292 | 293 | heads = {'hm': 80, 294 | 'wh': 2 , 295 | 'reg': 2} 296 | 297 | model=get_pose_net(num_layers=50,heads=heads).cuda() 298 | print(model) 299 | x=torch.randn(1,3,512,512).cuda() 300 | out=model(x) 301 | print(out[0]['hm'].size()) 302 | print(out[0]['wh'].size()) 303 | 304 | -------------------------------------------------------------------------------- /nets/resnet_dcn_DFPN_model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import math 4 | import sys 5 | sys.path.append('./nets') 6 | from utils.Gaussian_focal_loss import losses 7 | from resnet_dcn_DFPN import get_pose_net 8 | import torch.utils.model_zoo as model_zoo 9 | import torch.nn.functional as F 10 | from torch.nn import Softmax 11 | import numpy as np 12 | from utils.angle_coders import ACMCoder, PSCCoder, CSLCoder, PseudoAngleCoder 13 | 14 | class ResNet(nn.Module): 15 | 16 | def __init__(self, num_layers, num_cls=15, coder='acm', coder_cfg=1, coder_mode='model', box_loss='kfiou'): 17 | super(ResNet, self).__init__() 18 | 19 | self.coder_type = coder 20 | self.coder_mode = coder_mode 21 | if coder == 'acm': 22 | self.angle_coder = ACMCoder(dual_freq=(coder_cfg >= 0)) 23 | elif coder == 'psc': 24 | self.angle_coder = PSCCoder(N=coder_cfg) 25 | elif coder == 'csl': 26 | self.angle_coder = CSLCoder(N=coder_cfg) 27 | else: 28 | self.angle_coder = PseudoAngleCoder() 29 | 30 | heads = {'hm': num_cls,'wh': 2 ,'reg': 2, 'theta':self.angle_coder.encode_size if coder_mode == 'model' else 1} 31 | print(heads) 32 | self.num_cls = num_cls 33 | self.backbone=get_pose_net(num_layers=num_layers,heads=heads) 34 | self.losses=losses(self.angle_coder, coder_mode=coder_mode, box_loss=box_loss) 35 | 36 | self.sig = nn.Sigmoid() 37 | self.softmax = Softmax(dim=1) 38 | 39 | def forward(self, input): 40 | if self.training: 41 | x=input['img'] #cuda 42 | label=input['label'] 43 | heatmap_t=input['heatmap_t'] #cuda 44 | theta_g= input['theta'] 45 | else : 46 | x = input 47 | 48 | out=self.backbone(x)[0] 49 | heatmap=self.sig(out['hm'])#(N,15,H/4,W/4) 50 | scale=out['wh']#(N,2,H/4,W/4) 51 | offset=out['reg']#(N,2,H/4,W/4) 52 | 53 | if self.coder_type in ['acm', 'psc']: 54 | theta_p=torch.tanh(out['theta'])#(N,2,H/4,W/4) 55 | elif self.coder_type in ['csl']: 56 | theta_p=torch.sigmoid(out['theta'])#(N,2,H/4,W/4) 57 | else: 58 | theta_p=out['theta'] 59 | 60 | if self.training: 61 | return self.losses(heatmap,scale,offset,theta_p,heatmap_t,label,theta_g) 62 | else: 63 | return [heatmap,scale,offset,theta_p] 64 | 65 | def _decode(self, heatmap, scale, offset,theta, process_H, process_W, scorethr): 66 | ''' 67 | heatmap (process_H/4,process_W/4) tensor cpu 68 | scale (1,2,process_H/4,process_W/4) tensor cpu 69 | offset (1,2,process_H/4,process_W/4) tensor cpu 70 | theta (1,180,process_H/4,process_W/4) tensor cpu 71 | process_H,process_W 输入网络中的图片尺寸 72 | ''' 73 | heatmap = heatmap.squeeze().numpy() # (process_H/4,process_W/4) 74 | scale0, scale1 = scale[0, 0, :, :].numpy(), scale[0, 1, :, :].numpy() # (process_H/4,process_W/4) 75 | offset0, offset1 = offset[0, 0, :, :].numpy(), offset[0, 1, :,:].numpy() # (process_H/4,process_W/4) 76 | theta = theta[0,:, :, :] #(2,process_H/4,process_W/4) 77 | 78 | c0, c1 = np.where(heatmap > scorethr) 79 | boxes = [] 80 | if len(c0) > 0: 81 | for i in range(len(c0)): 82 | s0, s1 = np.exp(scale0[c0[i], c1[i]]) * 4, np.exp(scale1[c0[i], c1[i]]) * 4 83 | o0, o1 = offset0[c0[i], c1[i]], offset1[c0[i], c1[i]] 84 | s = heatmap[c0[i], c1[i]] 85 | cx, cy = max(0, (c1[i] + o1 + 0.5) * 4), max(0, (c0[i] + o0 + 0.5) * 4) 86 | cx, cy = min(cx, process_W), min(cy, process_H) 87 | 88 | if self.coder_mode == 'loss': 89 | angle = theta[0, c0[i], c1[i]].item() 90 | elif self.coder_mode == 'model': 91 | angle_ebd = theta[:, c0[i], c1[i]].reshape((1, -1)) #(1, emd_dim) 92 | angle = self.angle_coder.decode(angle_ebd).item() # (1,) 93 | else: 94 | raise NotImplementedError 95 | 96 | angle = angle / math.pi * 180 97 | 98 | boxes.append([cx, cy, s0, s1, angle, s]) 99 | 100 | boxes = np.asarray(boxes, dtype=np.float32) 101 | return boxes #boxes (num_objs,6) (cx,cy,h,w,theta,s) 均为process_H,process_W尺度上的预测结果 102 | 103 | def decode_per_img(self, heatmap,scale,offset,theta,H,W,scorethr): 104 | ''' 105 | :param heatmap: (1,NUM_CLASSES,H/4,W/4) CUDA //after sigmoid 106 | :param scale: (1,2,H/4,W/4) CUDA 107 | :param offset: (1,2,H/4,W/4) CUDA //after tanh 108 | :param theta: (1,180,H/4,W/4) CUDA //after sigmoid 109 | ''' 110 | pooling = torch.nn.MaxPool2d(kernel_size=3, stride=1, padding=1) 111 | h_p = pooling(heatmap) 112 | heatmap[heatmap != h_p] = 0 113 | 114 | results=[] 115 | for i in range(self.num_cls): 116 | bboxs=self._decode(heatmap[0,i,:,:].cpu(),scale.cpu(),offset.cpu(),theta.cpu(),H,W,scorethr)#(num_objs,6) (cx,cy,h,w,theta,s) 117 | if len(bboxs)>0: 118 | sigle_result = np.zeros((len(bboxs),7),dtype=np.float32) 119 | sigle_result[:,:5] = bboxs[:,:5] 120 | sigle_result[:,5] = i 121 | sigle_result[:,6] = bboxs[:,5] 122 | results.append(sigle_result) 123 | if len(results) > 0: 124 | results = np.concatenate(results, axis=0) 125 | return results 126 | #(total_objs,7) [cx,cy,h,w,theta,class,score] np.float32 127 | 128 | -------------------------------------------------------------------------------- /result/DOTA_results.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pandora-CV/cvpr24acm/769b0d4fdf1e595bad967a7f51303f9b4a41f52f/result/DOTA_results.pdf -------------------------------------------------------------------------------- /result/HRSC2016_results.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pandora-CV/cvpr24acm/769b0d4fdf1e595bad967a7f51303f9b4a41f52f/result/HRSC2016_results.pdf -------------------------------------------------------------------------------- /result/P0007.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pandora-CV/cvpr24acm/769b0d4fdf1e595bad967a7f51303f9b4a41f52f/result/P0007.png -------------------------------------------------------------------------------- /result/UCAS_AOD_results.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pandora-CV/cvpr24acm/769b0d4fdf1e595bad967a7f51303f9b4a41f52f/result/UCAS_AOD_results.pdf -------------------------------------------------------------------------------- /result/hrsc_testID.txt: -------------------------------------------------------------------------------- 1 | 100000856 2 | 100001549 3 | 100000685 4 | 100000794 5 | 100001014 6 | 100000815 7 | 100000634 8 | 100001059 9 | 100000676 10 | 100001391 11 | 100001162 12 | 100000850 13 | 100001520 14 | 100001379 15 | 100000965 16 | 100000790 17 | 100001101 18 | 100001332 19 | 100000783 20 | 100000938 21 | 100001036 22 | 100001430 23 | 100000626 24 | 100001581 25 | 100000747 26 | 100000907 27 | 100001056 28 | 100000805 29 | 100001211 30 | 100000927 31 | 100000663 32 | 100000792 33 | 100001051 34 | 100001032 35 | 100000722 36 | 100001429 37 | 100001645 38 | 100001548 39 | 100000991 40 | 100000825 41 | 100000804 42 | 100001422 43 | 100001533 44 | 100001172 45 | 100001175 46 | 100001597 47 | 100001409 48 | 100000862 49 | 100000828 50 | 100001674 51 | 100001484 52 | 100000858 53 | 100001274 54 | 100001111 55 | 100001441 56 | 100000696 57 | 100000996 58 | 100001595 59 | 100001205 60 | 100001331 61 | 100000826 62 | 100001061 63 | 100001562 64 | 100001534 65 | 100001325 66 | 100000863 67 | 100001347 68 | 100001116 69 | 100001635 70 | 100000814 71 | 100001241 72 | 100001486 73 | 100001448 74 | 100001517 75 | 100001134 76 | 100001477 77 | 100000650 78 | 100001206 79 | 100000934 80 | 100001424 81 | 100001202 82 | 100001268 83 | 100001490 84 | 100000672 85 | 100001359 86 | 100000970 87 | 100001454 88 | 100001203 89 | 100001555 90 | 100001339 91 | 100001270 92 | 100001105 93 | 100001594 94 | 100000771 95 | 100001402 96 | 100001013 97 | 100001141 98 | 100001294 99 | 100001149 100 | 100000836 101 | 100001333 102 | 100001075 103 | 100001011 104 | 100001528 105 | 100001315 106 | 100001662 107 | 100001109 108 | 100001410 109 | 100001406 110 | 100001412 111 | 100001087 112 | 100000735 113 | 100000892 114 | 100001616 115 | 100000740 116 | 100001531 117 | 100001596 118 | 100001509 119 | 100001369 120 | 100001171 121 | 100000817 122 | 100000952 123 | 100001538 124 | 100000643 125 | 100001461 126 | 100001384 127 | 100001280 128 | 100001568 129 | 100001452 130 | 100000865 131 | 100000700 132 | 100001108 133 | 100001117 134 | 100001423 135 | 100000857 136 | 100001147 137 | 100001257 138 | 100001652 139 | 100000919 140 | 100001065 141 | 100000725 142 | 100001123 143 | 100001574 144 | 100000739 145 | 100000834 146 | 100000647 147 | 100000630 148 | 100001178 149 | 100000697 150 | 100000713 151 | 100000776 152 | 100001416 153 | 100001356 154 | 100001537 155 | 100001248 156 | 100001392 157 | 100001505 158 | 100001179 159 | 100001076 160 | 100001158 161 | 100001381 162 | 100001443 163 | 100001672 164 | 100001589 165 | 100000653 166 | 100000731 167 | 100001342 168 | 100001309 169 | 100000773 170 | 100001584 171 | 100001326 172 | 100001255 173 | 100001615 174 | 100001573 175 | 100000654 176 | 100001455 177 | 100000807 178 | 100001072 179 | 100001160 180 | 100001567 181 | 100000645 182 | 100001560 183 | 100001565 184 | 100000946 185 | 100001272 186 | 100001389 187 | 100001164 188 | 100001468 189 | 100001487 190 | 100001010 191 | 100000831 192 | 100001048 193 | 100001644 194 | 100001385 195 | 100000894 196 | 100001153 197 | 100000656 198 | 100001667 199 | 100001444 200 | 100000942 201 | 100000842 202 | 100001133 203 | 100001091 204 | 100000751 205 | 100001431 206 | 100001471 207 | 100001322 208 | 100001249 209 | 100000824 210 | 100001542 211 | 100000925 212 | 100000844 213 | 100000659 214 | 100001628 215 | 100001120 216 | 100001512 217 | 100000873 218 | 100000720 219 | 100000830 220 | 100001532 221 | 100001467 222 | 100001288 223 | 100000763 224 | 100000789 225 | 100001185 226 | 100000835 227 | 100000931 228 | 100000957 229 | 100001297 230 | 100000718 231 | 100000793 232 | 100000690 233 | 100001646 234 | 100000778 235 | 100000661 236 | 100001220 237 | 100001258 238 | 100000984 239 | 100001417 240 | 100001004 241 | 100000812 242 | 100001609 243 | 100001527 244 | 100001186 245 | 100001642 246 | 100001442 247 | 100001458 248 | 100001554 249 | 100001060 250 | 100001045 251 | 100001572 252 | 100000674 253 | 100001043 254 | 100001102 255 | 100000889 256 | 100001221 257 | 100000698 258 | 100001077 259 | 100000854 260 | 100001658 261 | 100000950 262 | 100001308 263 | 100000779 264 | 100000803 265 | 100001491 266 | 100001176 267 | 100001387 268 | 100001377 269 | 100001540 270 | 100001503 271 | 100001449 272 | 100000867 273 | 100000832 274 | 100001544 275 | 100001602 276 | 100001633 277 | 100001582 278 | 100000732 279 | 100001515 280 | 100001006 281 | 100000003 282 | 100001232 283 | 100001099 284 | 100001046 285 | 100000959 286 | 100000005 287 | 100001457 288 | 100000929 289 | 100001188 290 | 100001166 291 | 100001399 292 | 100000989 293 | 100001592 294 | 100001472 295 | 100001236 296 | 100001543 297 | 100000945 298 | 100001570 299 | 100001237 300 | 100001127 301 | 100001085 302 | 100001529 303 | 100001591 304 | 100001092 305 | 100000954 306 | 100001066 307 | 100001073 308 | 100001301 309 | 100001469 310 | 100001552 311 | 100001453 312 | 100000785 313 | 100001599 314 | 100000909 315 | 100001198 316 | 100001215 317 | 100000679 318 | 100001513 319 | 100000729 320 | 100001180 321 | 100000686 322 | 100001049 323 | 100001561 324 | 100001103 325 | 100001427 326 | 100001022 327 | 100000666 328 | 100000736 329 | 100000939 330 | 100001040 331 | 100000855 332 | 100001313 333 | 100000871 334 | 100001659 335 | 100001483 336 | 100001140 337 | 100001545 338 | 100001481 339 | 100001126 340 | 100001112 341 | 100000967 342 | 100001029 343 | 100001638 344 | 100001269 345 | 100000677 346 | 100000898 347 | 100001143 348 | 100001151 349 | 100001177 350 | 100000913 351 | 100000625 352 | 100001440 353 | 100001414 354 | 100000975 355 | 100000706 356 | 100000658 357 | 100000707 358 | 100000802 359 | 100000912 360 | 100001017 361 | 100001161 362 | 100000716 363 | 100000890 364 | 100001394 365 | 100001608 366 | 100001000 367 | 100001340 368 | 100001316 369 | 100001439 370 | 100001054 371 | 100001282 372 | 100000774 373 | 100001119 374 | 100001445 375 | 100001156 376 | 100001244 377 | 100001035 378 | 100001041 379 | 100001639 380 | 100001144 381 | 100000708 382 | 100001495 383 | 100000876 384 | 100000833 385 | 100000920 386 | 100001675 387 | 100000673 388 | 100001465 389 | 100001058 390 | 100001104 391 | 100001502 392 | 100000983 393 | 100001610 394 | 100001018 395 | 100001482 396 | 100000800 397 | 100000811 398 | 100000635 399 | 100001145 400 | 100000999 401 | 100000728 402 | 100000914 403 | 100001664 404 | 100000688 405 | 100000648 406 | 100000710 407 | 100000623 408 | 100000881 409 | 100001507 410 | 100001636 411 | 100000971 412 | 100001181 413 | 100000680 414 | 100001039 415 | 100001343 416 | 100000848 417 | 100001583 418 | 100000846 419 | 100001152 420 | 100001499 421 | 100001541 422 | 100001106 423 | 100001673 424 | 100000849 425 | 100001286 426 | 100001252 427 | 100001660 428 | 100000746 429 | 100001669 430 | 100001349 431 | 100001005 432 | 100001649 433 | 100001380 434 | 100000951 435 | 100001067 436 | 100000745 437 | 100001375 438 | 100001305 439 | 100001613 440 | 100001433 441 | 100001480 442 | 100000904 443 | 100000845 444 | 100001107 445 | -------------------------------------------------------------------------------- /result/testID.txt: -------------------------------------------------------------------------------- 1 | P0747 2 | P1093 3 | P0797 4 | P1197 5 | P2400 6 | P2675 7 | P1820 8 | P0423 9 | P0429 10 | P1626 11 | P0006 12 | P1885 13 | P1826 14 | P1886 15 | P1202 16 | P1385 17 | P2424 18 | P2715 19 | P0511 20 | P2661 21 | P1324 22 | P1271 23 | P0924 24 | P1956 25 | P1681 26 | P2796 27 | P1723 28 | P2023 29 | P1477 30 | P1865 31 | P2688 32 | P1192 33 | P1760 34 | P2025 35 | P0500 36 | P0092 37 | P2784 38 | P0820 39 | P2711 40 | P0240 41 | P0752 42 | P1175 43 | P0574 44 | P1235 45 | P1041 46 | P1312 47 | P1858 48 | P1677 49 | P1485 50 | P1592 51 | P1011 52 | P2146 53 | P1855 54 | P2174 55 | P0253 56 | P0284 57 | P1129 58 | P1068 59 | P0287 60 | P2398 61 | P0312 62 | P2369 63 | P2391 64 | P1064 65 | P2785 66 | P2238 67 | P1781 68 | P1811 69 | P2265 70 | P2723 71 | P0694 72 | P1761 73 | P0585 74 | P1503 75 | P2762 76 | P1963 77 | P2147 78 | P1953 79 | P2521 80 | P1304 81 | P0680 82 | P0490 83 | P0311 84 | P1244 85 | P1424 86 | P0529 87 | P2493 88 | P0592 89 | P2537 90 | P0105 91 | P1777 92 | P0902 93 | P0243 94 | P0917 95 | P0231 96 | P2652 97 | P1690 98 | P0971 99 | P0180 100 | P0958 101 | P2312 102 | P1737 103 | P2314 104 | P2596 105 | P1624 106 | P0318 107 | P0863 108 | P0935 109 | P0742 110 | P2046 111 | P1379 112 | P0106 113 | P1085 114 | P0532 115 | P2137 116 | P2333 117 | P1974 118 | P1229 119 | P1024 120 | P2474 121 | P1796 122 | P2511 123 | P2069 124 | P2441 125 | P2451 126 | P0598 127 | P1815 128 | P0035 129 | P2233 130 | P0121 131 | P1160 132 | P1538 133 | P0600 134 | P1169 135 | P1254 136 | P1145 137 | P0576 138 | P0628 139 | P1196 140 | P2414 141 | P0824 142 | P1033 143 | P1436 144 | P0616 145 | P2358 146 | P2707 147 | P0758 148 | P0717 149 | P2113 150 | P1028 151 | P1415 152 | P2263 153 | P0449 154 | P0177 155 | P1628 156 | P1381 157 | P0793 158 | P1630 159 | P2623 160 | P1111 161 | P2567 162 | P0412 163 | P1511 164 | P0077 165 | P2347 166 | P1590 167 | P2749 168 | P2684 169 | P0214 170 | P2500 171 | P0976 172 | P1694 173 | P0536 174 | P1971 175 | P1226 176 | P2297 177 | P2161 178 | P2589 179 | P2386 180 | P1814 181 | P1423 182 | P0765 183 | P0808 184 | P2267 185 | P1611 186 | P0862 187 | P0798 188 | P2636 189 | P2643 190 | P1516 191 | P0599 192 | P2104 193 | P1975 194 | P0267 195 | P1146 196 | P0771 197 | P2772 198 | P0501 199 | P1729 200 | P0640 201 | P0836 202 | P2633 203 | P2425 204 | P1013 205 | P1532 206 | P2499 207 | P1637 208 | P2300 209 | P1644 210 | P1873 211 | P0912 212 | P0397 213 | P0114 214 | P2434 215 | P1355 216 | P0024 217 | P1654 218 | P1162 219 | P1802 220 | P0191 221 | P1280 222 | P0474 223 | P1921 224 | P0698 225 | P0277 226 | P0037 227 | P1296 228 | P1263 229 | P1183 230 | P0608 231 | P1454 232 | P1603 233 | P0211 234 | P1678 235 | P0672 236 | P1253 237 | P0090 238 | P0655 239 | P2562 240 | P1999 241 | P0709 242 | P1642 243 | P1803 244 | P2316 245 | P0247 246 | P1083 247 | P1792 248 | P0959 249 | P0157 250 | P1543 251 | P2171 252 | P1286 253 | P2323 254 | P1123 255 | P0679 256 | P1302 257 | P2078 258 | P0705 259 | P2799 260 | P0671 261 | P1400 262 | P1233 263 | P0636 264 | P2774 265 | P1248 266 | P0942 267 | P1684 268 | P0639 269 | P1048 270 | P0496 271 | P1733 272 | P1782 273 | P0877 274 | P1667 275 | P2051 276 | P1920 277 | P0268 278 | P1050 279 | P2208 280 | P1262 281 | P0403 282 | P1461 283 | P1718 284 | P1310 285 | P2170 286 | P0245 287 | P1034 288 | P0072 289 | P1218 290 | P0701 291 | P1112 292 | P1629 293 | P1567 294 | P0811 295 | P0112 296 | P1774 297 | P2176 298 | P2318 299 | P1172 300 | P0304 301 | P2357 302 | P2666 303 | P0181 304 | P1282 305 | P1664 306 | P2178 307 | P2640 308 | P2581 309 | P2341 310 | P0736 311 | P2367 312 | P2216 313 | P1576 314 | P0447 315 | P2767 316 | P0470 317 | P2559 318 | P1203 319 | P2525 320 | P1074 321 | P1762 322 | P0291 323 | P2140 324 | P0575 325 | P1941 326 | P1668 327 | P1722 328 | P0847 329 | P1520 330 | P1625 331 | P0854 332 | P1943 333 | P1494 334 | P2407 335 | P2195 336 | P1970 337 | P2524 338 | P0298 339 | P1402 340 | P2083 341 | P1025 342 | P0670 343 | P2299 344 | P0776 345 | P2484 346 | P0080 347 | P0472 348 | P1613 349 | P0774 350 | P1058 351 | P0182 352 | P2246 353 | P2473 354 | P0326 355 | P1405 356 | P2295 357 | P1362 358 | P0034 359 | P0508 360 | P1490 361 | P0756 362 | P0088 363 | P0026 364 | P2336 365 | P1831 366 | P2540 367 | P0137 368 | P0690 369 | P1946 370 | P1817 371 | P0009 372 | P0155 373 | P2193 374 | P1716 375 | P1441 376 | P1853 377 | P1648 378 | P2461 379 | P1710 380 | P2544 381 | P2573 382 | P1882 383 | P2031 384 | P2786 385 | P0307 386 | P2737 387 | P2165 388 | P1004 389 | P2081 390 | P2340 391 | P0107 392 | P0302 393 | P0566 394 | P0834 395 | P0048 396 | P1491 397 | P1755 398 | P2798 399 | P1985 400 | P0561 401 | P2284 402 | P0473 403 | P2635 404 | P1769 405 | P1993 406 | P1372 407 | P1901 408 | P0184 409 | P0710 410 | P0062 411 | P2698 412 | P1001 413 | P2405 414 | P2308 415 | P1510 416 | P2649 417 | P0523 418 | P1575 419 | P2360 420 | P1135 421 | P1190 422 | P0876 423 | P1527 424 | P0051 425 | P0033 426 | P0661 427 | P2530 428 | P2717 429 | P1883 430 | P0480 431 | P1284 432 | P2752 433 | P1367 434 | P2291 435 | P0937 436 | P1546 437 | P1767 438 | P2683 439 | P1862 440 | P1290 441 | P0675 442 | P2526 443 | P1766 444 | P0132 445 | P0208 446 | P0667 447 | P2040 448 | P1578 449 | P1363 450 | P1657 451 | P0633 452 | P1040 453 | P0153 454 | P0274 455 | P1345 456 | P2320 457 | P0739 458 | P1813 459 | P2268 460 | P2765 461 | P0333 462 | P1810 463 | P2200 464 | P1383 465 | P1417 466 | P1227 467 | P0055 468 | P2186 469 | P2209 470 | P1056 471 | P0108 472 | P1621 473 | P2009 474 | P1163 475 | P2588 476 | P1378 477 | P2699 478 | P0735 479 | P0988 480 | P2660 481 | P2273 482 | P1215 483 | P1330 484 | P1237 485 | P0645 486 | P0192 487 | P2298 488 | P1206 489 | P1711 490 | P1335 491 | P2119 492 | P2487 493 | P2325 494 | P0394 495 | P1695 496 | P0901 497 | P0271 498 | P0852 499 | P0297 500 | P0663 501 | P1435 502 | P1553 503 | P0788 504 | P1126 505 | P2795 506 | P1828 507 | P2782 508 | P1348 509 | P2624 510 | P2713 511 | P1622 512 | P0773 513 | P2105 514 | P0328 515 | P0545 516 | P1045 517 | P1568 518 | P1002 519 | P2776 520 | P0515 521 | P2024 522 | P2515 523 | P1313 524 | P0230 525 | P2315 526 | P1082 527 | P1504 528 | P1120 529 | P0996 530 | P0125 531 | P2338 532 | P2574 533 | P2510 534 | P0295 535 | P1655 536 | P0101 537 | P0784 538 | P1890 539 | P0399 540 | P2153 541 | P1764 542 | P0859 543 | P0519 544 | P1536 545 | P1897 546 | P1665 547 | P2317 548 | P0997 549 | P1922 550 | P0509 551 | P0341 552 | P0865 553 | P2037 554 | P2101 555 | P2345 556 | P0462 557 | P2296 558 | P1468 559 | P1472 560 | P2015 561 | P2697 562 | P2480 563 | P0528 564 | P2064 565 | P0812 566 | P0916 567 | P0265 568 | P2041 569 | P0316 570 | P2744 571 | P2508 572 | P2384 573 | P0657 574 | P2188 575 | P0830 576 | P0451 577 | P0985 578 | P0196 579 | P2274 580 | P2602 581 | P1836 582 | P2556 583 | P0408 584 | P0923 585 | P1318 586 | P0043 587 | P1301 588 | P1734 589 | P2568 590 | P0540 591 | P1889 592 | P1595 593 | P1293 594 | P0530 595 | P0903 596 | P1124 597 | P1847 598 | P2406 599 | P0084 600 | P1289 601 | P0740 602 | P1772 603 | P0751 604 | P2667 605 | P0772 606 | P1008 607 | P1952 608 | P1797 609 | P0720 610 | P1844 611 | P1884 612 | P2410 613 | P2498 614 | P1407 615 | P0319 616 | P1835 617 | P0982 618 | P0514 619 | P0028 620 | P1036 621 | P1588 622 | P0534 623 | P0467 624 | P2679 625 | P1406 626 | P2008 627 | P1771 628 | P1701 629 | P1303 630 | P2229 631 | P2043 632 | P2021 633 | P1100 634 | P2261 635 | P0120 636 | P0678 637 | P1721 638 | P0135 639 | P0669 640 | P1017 641 | P1500 642 | P0057 643 | P1347 644 | P1442 645 | P2463 646 | P0930 647 | P0927 648 | P2383 649 | P2307 650 | P1800 651 | P1968 652 | P1270 653 | P1044 654 | P2112 655 | P2096 656 | P0681 657 | P2252 658 | P0433 659 | P1285 660 | P1039 661 | P1593 662 | P2072 663 | P1937 664 | P0320 665 | P0266 666 | P2607 667 | P2290 668 | P2075 669 | P1448 670 | P2376 671 | P0686 672 | P0040 673 | P0931 674 | P2586 675 | P1236 676 | P0584 677 | P1914 678 | P0886 679 | P2546 680 | P1097 681 | P2564 682 | P0154 683 | P0205 684 | P1346 685 | P1151 686 | P0539 687 | P1898 688 | P2136 689 | P0955 690 | P0741 691 | P1967 692 | P0463 693 | P1252 694 | P1136 695 | P1042 696 | P1000 697 | P1264 698 | P0674 699 | P2117 700 | P2353 701 | P0492 702 | P2254 703 | P1102 704 | P1834 705 | P0166 706 | P1819 707 | P2619 708 | P0138 709 | P0682 710 | P0984 711 | P0983 712 | P0954 713 | P0498 714 | P0148 715 | P1176 716 | P0456 717 | P2128 718 | P1220 719 | P1944 720 | P2620 721 | P0718 722 | P1635 723 | P2187 724 | P1038 725 | P0195 726 | P0372 727 | P0992 728 | P2000 729 | P0185 730 | P1731 731 | P0767 732 | P0588 733 | P1334 734 | P0939 735 | P2402 736 | P2399 737 | P0016 738 | P1349 739 | P2757 740 | P1548 741 | P1824 742 | P2372 743 | P0919 744 | P1031 745 | P2486 746 | P1447 747 | P1422 748 | P0279 749 | P0842 750 | P2440 751 | P2223 752 | P2202 753 | P2658 754 | P0880 755 | P0015 756 | P1840 757 | P2139 758 | P1671 759 | P0802 760 | P0278 761 | P0513 762 | P0089 763 | P0889 764 | P2212 765 | P1605 766 | P1559 767 | P0965 768 | P0045 769 | P2120 770 | P1735 771 | P0956 772 | P1195 773 | P2575 774 | P0145 775 | P0708 776 | P1715 777 | P2654 778 | P1988 779 | P0644 780 | P1852 781 | P1692 782 | P1153 783 | P0127 784 | P0507 785 | P1404 786 | P2412 787 | P0410 788 | P1106 789 | P1081 790 | P1032 791 | P0606 792 | P0197 793 | P2292 794 | P1409 795 | P1663 796 | P0891 797 | P0073 798 | P1961 799 | P1411 800 | P0306 801 | P1544 802 | P1180 803 | P1294 804 | P1261 805 | P0390 806 | P1425 807 | P1712 808 | P0017 809 | P1612 810 | P2780 811 | P0807 812 | P0398 813 | P0662 814 | P2535 815 | P2736 816 | P1115 817 | P1177 818 | P1857 819 | P1925 820 | P1157 821 | P2447 822 | P0570 823 | P0234 824 | P0594 825 | P2350 826 | P2439 827 | P1104 828 | P2361 829 | P1127 830 | P1018 831 | P2603 832 | P1870 833 | P0323 834 | P1528 835 | P2677 836 | P0269 837 | P2806 838 | P0813 839 | P0356 840 | P1627 841 | P2329 842 | P1327 843 | P2366 844 | P0239 845 | P2647 846 | P0059 847 | P1204 848 | P1320 849 | P2108 850 | P2094 851 | P0427 852 | P1564 853 | P0134 854 | P1728 855 | P1488 856 | P2773 857 | P0216 858 | P2738 859 | P2102 860 | P0866 861 | P1125 862 | P1396 863 | P0502 864 | P0097 865 | P2445 866 | P2035 867 | P0165 868 | P0290 869 | P2448 870 | P2489 871 | P1530 872 | P0046 873 | P2763 874 | P1572 875 | P1830 876 | P2703 877 | P2183 878 | P0351 879 | P1316 880 | P0325 881 | P0115 882 | P2459 883 | P0994 884 | P0621 885 | P1072 886 | P0497 887 | P2803 888 | P1194 889 | P0031 890 | P2092 891 | P1706 892 | P0193 893 | P1687 894 | P0354 895 | P0369 896 | P1936 897 | P1326 898 | P2443 899 | P1188 900 | P0485 901 | P0635 902 | P2648 903 | P0943 904 | P0444 905 | P0560 906 | P1331 907 | P2694 908 | P2583 909 | P2276 910 | P2293 911 | P0069 912 | P1949 913 | P2277 914 | P1035 915 | P2084 916 | P0014 917 | P0748 918 | P0409 919 | P1798 920 | P1430 921 | P1916 922 | P1496 923 | P2760 924 | P1525 925 | P1549 926 | P0102 927 | P2431 928 | P1979 929 | P2555 930 | P2172 931 | P0118 932 | P2272 933 | P0156 934 | P0506 935 | P1523 936 | P0721 937 | P0552 938 | -------------------------------------------------------------------------------- /scripts/evaluate_dota.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=7 python evaluate.py --operation DOTA_test \ 2 | --heads 15 \ 3 | --model 50 \ 4 | --coder none \ 5 | --coder_cfg 1 \ 6 | --coder_mode loss \ 7 | --box_loss riou \ 8 | --weight_path checkpoint/dota/KLD_140.pth \ 9 | --test_image_dir /shared/datasets/DOTA/test4000/images \ 10 | --output_id 1 11 | -------------------------------------------------------------------------------- /scripts/evaluate_hrsc_ms.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=7 python evaluate.py --operation HRSC_MS_test \ 2 | --heads 1 \ 3 | --model 50 \ 4 | --coder acm \ 5 | --coder_cfg -1 \ 6 | --coder_mode model \ 7 | --box_loss riou \ 8 | --weight_path checkpoint/hrsc/RIoU_Our_140.pth \ 9 | --hrsc_test_size 640 \ 10 | --use_07_metric \ 11 | --ap_thres 0.75 12 | -------------------------------------------------------------------------------- /scripts/evaluate_hrsc_ss.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=2 python evaluate.py --operation HRSC_test \ 2 | --heads 1 \ 3 | --model 50 \ 4 | --coder acm \ 5 | --coder_cfg -1 \ 6 | --coder_mode model \ 7 | --box_loss riou \ 8 | --weight_path checkpoint/riou_acm_-1_model_140.pth \ 9 | --hrsc_test_size 640 \ 10 | --use_07_metric \ 11 | --ap_thres 0.50 12 | -------------------------------------------------------------------------------- /scripts/train_dota.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0,1,2,3 python train_dota.py \ 2 | --datadir /shared/datasets/DOTA/split_ss_dota/trainval \ 3 | --heads 1 \ 4 | --model 50 \ 5 | --coder acm \ 6 | --coder_cfg 1 \ 7 | --coder_mode model \ 8 | --box_loss riou \ 9 | --input_size 1024 \ 10 | --batch_size 24 \ 11 | --dist-url tcp://127.0.0.1:2543 12 | -------------------------------------------------------------------------------- /scripts/train_hrsc.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=4,5,6,7 python train_hrsc.py \ 2 | --datadir /data/shared/datasets/HRSC2016/train \ 3 | --heads 1 \ 4 | --model 50 \ 5 | --coder acm \ 6 | --coder_cfg 1 \ 7 | --coder_mode model \ 8 | --box_loss riou \ 9 | --input_size 640 \ 10 | --batch_size 16 \ 11 | --dist-url tcp://127.0.0.1:2543 -------------------------------------------------------------------------------- /scripts/visualize.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=7 python evaluate.py --operation visualize \ 2 | --heads 1 \ 3 | --model 50 \ 4 | --coder acm \ 5 | --coder_cfg -1 \ 6 | --coder_mode model \ 7 | --box_loss riou \ 8 | --weight_path checkpoint/riou_acm_-1_model_140.pth \ 9 | --img_path image/result/172.png 10 | -------------------------------------------------------------------------------- /train_dota.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import division 3 | from __future__ import absolute_import 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | import torch.nn.functional as F 9 | import torch.backends.cudnn as cudnn 10 | import torchvision.transforms as transforms 11 | import torchvision.datasets as datasets 12 | import numpy as np 13 | import cv2 14 | import random 15 | import os 16 | import yaml 17 | import collections 18 | import argparse 19 | import sys 20 | sys.path.append('DOTA_devkit') 21 | from tqdm import tqdm 22 | 23 | import torch.distributed as dist 24 | import torch.utils.data.distributed 25 | import torch.multiprocessing as mp 26 | 27 | 28 | from nets.resnet_dcn_DFPN_model import ResNet 29 | from datasets.DotaDataset import DotaSetv1,collater 30 | 31 | 32 | 33 | parser = argparse.ArgumentParser() 34 | 35 | parser.add_argument("--input_size", default = 1024 , type=int) 36 | parser.add_argument("--datadir", type=str, default='../DOTA/datasets/trainvalsplit-1024-256') 37 | #parser.add_argument("--heads", default={'hm': 15,'wh': 2 ,'reg': 2, 'theta':4}) 38 | parser.add_argument("--heads", type=int, default=15) 39 | parser.add_argument("--coder", default='acm') 40 | parser.add_argument("--coder_cfg", default=1, type=int) 41 | parser.add_argument("--coder_mode", default='model') 42 | parser.add_argument("--box_loss", default='kfiou') 43 | parser.add_argument("--model", type=int, default=50) 44 | 45 | parser.add_argument("--num_workers", type=int, default=8) 46 | parser.add_argument('--seed', default=2021, type=int,help='random seed') 47 | 48 | 49 | 50 | parser.add_argument("--epochs", type=int, default=140) 51 | parser.add_argument("--start_epoch", type=int, default=0) 52 | parser.add_argument("--batch_size", type=int, default = 12, help="size of each image batch") 53 | parser.add_argument("--lr", default=1.25e-4) 54 | parser.add_argument("--lr_decay", default=[100,130]) 55 | parser.add_argument('--resume', '-r', action='store_true',help='resume from checkpoint') 56 | parser.add_argument('--resume_weight_path', default="") 57 | parser.add_argument("--save_interval", type=int, default=10) 58 | parser.add_argument('--log_path', default="./result/debug.txt") 59 | 60 | parser.add_argument('--dist-url', default='tcp://127.0.0.1:2500', type=str, 61 | help='url used to set up distributed training') 62 | parser.add_argument('--dist-backend', default='nccl', type=str, 63 | help='distributed backend') 64 | parser.add_argument('--nodes', default=1, type=int, 65 | help='total number of nodes for distributed training') 66 | parser.add_argument('--rank', default=0, type=int, 67 | help='node rank for distributed training') 68 | parser.add_argument('--gpu', default=None, type=int, 69 | help='GPU id to use.') 70 | parser.add_argument('--world-size', default=-1, type=int, 71 | help='total number of process for distributed training') 72 | parser.add_argument('--local_rank', default=0, type=int) 73 | 74 | 75 | 76 | def main(): 77 | args = parser.parse_args() 78 | print(args) 79 | 80 | """random seed""" 81 | random.seed(args.seed) 82 | torch.manual_seed(args.seed) 83 | torch.cuda.manual_seed(args.seed) 84 | torch.cuda.manual_seed_all(args.seed) 85 | cudnn.deterministic = True 86 | 87 | 88 | ngpus_per_node = torch.cuda.device_count() 89 | args.world_size = ngpus_per_node * args.nodes 90 | 91 | mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) 92 | 93 | 94 | def main_worker(gpu, ngpus_per_node, args): 95 | random.seed(args.seed) 96 | np.random.seed(args.seed+gpu) 97 | torch.manual_seed(args.seed) 98 | torch.cuda.manual_seed(args.seed) 99 | torch.cuda.manual_seed_all(args.seed) 100 | cudnn.deterministic = True 101 | 102 | args.gpu = gpu 103 | print("Use GPU: {} for training".format(args.gpu)) 104 | args.rank = args.rank * ngpus_per_node + gpu 105 | dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, 106 | world_size=args.world_size, rank=args.rank) 107 | 108 | torch.cuda.set_device(args.gpu) 109 | 110 | train_loss = collections.deque(maxlen=1000) 111 | 112 | print("=> creating model.") 113 | #print(args.heads) 114 | 115 | model = ResNet(num_layers=args.model, num_cls=args.heads, coder=args.coder, coder_cfg=args.coder_cfg, coder_mode=args.coder_mode, box_loss=args.box_loss).cuda() 116 | optimizer = optim.Adam(model.parameters(), lr=args.lr) 117 | 118 | model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).cuda(args.gpu) 119 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu],find_unused_parameters=False) 120 | 121 | 122 | if args.resume: 123 | model.load_state_dict(torch.load(args.resume_weight_path, map_location={"cuda:0": "cuda:{}".format(args.gpu)})) 124 | print("==>finished loading weight from: ",args.resume_weight_path) 125 | 126 | cudnn.benchmark = True 127 | 128 | print("=> preparing data") 129 | args.batch_size = int(args.batch_size / ngpus_per_node) 130 | args.num_workers = int((args.num_workers + ngpus_per_node - 1) / ngpus_per_node) 131 | trainset = DotaSetv1(root_dir=args.datadir,img_size=args.input_size) 132 | print("training images: {}".format(len(trainset))) 133 | train_sampler = torch.utils.data.distributed.DistributedSampler(trainset) 134 | train_loader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=(train_sampler is None),collate_fn=collater,num_workers=args.num_workers, pin_memory=True, sampler=train_sampler) 135 | 136 | if args.rank == 0 : f = open(args.log_path, 'w') 137 | 138 | for epoch in range(args.start_epoch,args.epochs) : 139 | 140 | model.train() 141 | train_sampler.set_epoch(epoch) 142 | 143 | for batch_idx, data in enumerate(train_loader): 144 | img,label,heatmap_t,theta=data['img'].cuda(args.gpu, non_blocking=True),data['label'],data['heatmap_t'].cuda(args.gpu, non_blocking=True),data['theta'] 145 | 146 | if 1: 147 | center_loss, scale_loss, offset_loss, theta_loss = model( 148 | {'img': img, 'label': label, 'heatmap_t': heatmap_t, 'theta': theta}) 149 | total_loss = center_loss + scale_loss + offset_loss + 1.0*theta_loss 150 | 151 | optimizer.zero_grad() 152 | total_loss.backward() 153 | for p in model.parameters(): 154 | torch.nn.utils.clip_grad_norm_(p,10) 155 | optimizer.step() 156 | 157 | 158 | train_loss.append(float(total_loss)) 159 | if args.rank==0: 160 | print( 161 | '{}\{} | Center loss: {:1.5f} | scale loss: {:1.5f} | offset loss: {:1.5f}| theta loss:{:1.5f} | running loss: {:1.5f}'.format( 162 | epoch, batch_idx, float(center_loss), float(scale_loss), float(offset_loss),float(theta_loss), np.mean(train_loss)) 163 | ) 164 | 165 | f.write(str(float(center_loss))) , f.write(" ") , f.write(str(float(scale_loss))) , f.write(" ") , f.write(str(float(offset_loss))), f.write(" ") ,f.write(str(float(theta_loss))), f.write(" "), f.write(str(float(np.mean(train_loss)))) 166 | f.write('\n') 167 | 168 | 169 | 170 | if (epoch+1) in args.lr_decay : 171 | args.lr = args.lr/10 172 | for param_group in optimizer.param_groups: 173 | param_group['lr'] = args.lr 174 | 175 | if args.rank == 0 and (epoch + 1) % args.save_interval == 0: 176 | if not os.path.isdir('checkpoint'): 177 | os.mkdir('checkpoint') 178 | if not os.path.isdir('checkpoint/dota'): 179 | os.mkdir('checkpoint/dota') 180 | print("Saving...") 181 | torch.save(model.state_dict(), f"checkpoint/dota/{args.box_loss}_{args.coder}_{args.coder_cfg}_{args.coder_mode}_{args.batch_size}%d.pth" % (epoch+1)) 182 | 183 | torch.cuda.empty_cache() 184 | 185 | if args.rank == 0 : f.close() 186 | 187 | 188 | 189 | 190 | 191 | if __name__ == "__main__": 192 | main() 193 | 194 | 195 | -------------------------------------------------------------------------------- /train_hrsc.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import division 3 | from __future__ import absolute_import 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | import torch.nn.functional as F 9 | import torch.backends.cudnn as cudnn 10 | import torchvision.transforms as transforms 11 | import torchvision.datasets as datasets 12 | import numpy as np 13 | import cv2 14 | import random 15 | import os 16 | import yaml 17 | import collections 18 | import argparse 19 | import sys 20 | sys.path.append('DOTA_devkit') 21 | from tqdm import tqdm 22 | 23 | import torch.distributed as dist 24 | import torch.utils.data.distributed 25 | import torch.multiprocessing as mp 26 | 27 | 28 | from nets.resnet_dcn_DFPN_model import ResNet 29 | from datasets.HRSCDataset import HRSCSetv1,collater 30 | 31 | 32 | 33 | parser = argparse.ArgumentParser() 34 | 35 | parser.add_argument("--input_size", default = 640 , type=int) 36 | parser.add_argument("--datadir", type=str, default='/home/data/xuhang/datasets/HRSC2016/train') 37 | #parser.add_argument("--heads", default={'hm': 1,'wh': 2 ,'reg': 2, 'theta':4}) 38 | parser.add_argument("--heads", type=int, default=1) 39 | parser.add_argument("--coder", default='acm') 40 | parser.add_argument("--coder_cfg", default=1, type=int) 41 | parser.add_argument("--coder_mode", default='model') 42 | parser.add_argument("--box_loss", default='kfiou') 43 | parser.add_argument("--model", type=int, default=50) 44 | 45 | parser.add_argument("--num_workers", type=int, default=16) 46 | parser.add_argument('--seed', default=2021, type=int,help='random seed') 47 | 48 | 49 | 50 | parser.add_argument("--epochs", type=int, default=140) 51 | parser.add_argument("--start_epoch", type=int, default=0) 52 | parser.add_argument("--batch_size", type=int, default = 32, help="size of each image batch") 53 | parser.add_argument("--lr", type=float, default=2e-4) 54 | parser.add_argument("--lr_decay", default=[100,130]) 55 | parser.add_argument('--resume', '-r', action='store_true',help='resume from checkpoint') 56 | parser.add_argument('--resume_weight_path', default="") 57 | parser.add_argument("--save_interval", type=int, default=70) 58 | parser.add_argument('--log_path', default="./result/debug.txt") 59 | 60 | parser.add_argument('--dist-url', default='tcp://127.0.0.1:2556', type=str, 61 | help='url used to set up distributed training') 62 | parser.add_argument('--dist-backend', default='nccl', type=str, 63 | help='distributed backend') 64 | parser.add_argument('--nodes', default=1, type=int, 65 | help='total number of nodes for distributed training') 66 | parser.add_argument('--rank', default=0, type=int, 67 | help='node rank for distributed training') 68 | parser.add_argument('--gpu', default=None, type=int, 69 | help='GPU id to use.') 70 | parser.add_argument('--world-size', default=-1, type=int, 71 | help='total number of process for distributed training') 72 | parser.add_argument('--local_rank', default=0, type=int) 73 | 74 | def cal(epoch): 75 | if epoch < 10: 76 | return 0.0 77 | else: 78 | return 1.0 79 | 80 | def main(): 81 | args = parser.parse_args() 82 | print(args) 83 | 84 | """随机数种子""" 85 | random.seed(args.seed) 86 | #np.random.seed(args.seed) 87 | torch.manual_seed(args.seed) 88 | torch.cuda.manual_seed(args.seed) 89 | torch.cuda.manual_seed_all(args.seed) 90 | cudnn.deterministic = True 91 | 92 | 93 | ngpus_per_node = torch.cuda.device_count() 94 | args.world_size = ngpus_per_node * args.nodes 95 | 96 | mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) 97 | 98 | 99 | def main_worker(gpu, ngpus_per_node, args): 100 | 101 | #torch.multiprocessing.set_sharing_strategy('file_system') 102 | random.seed(args.seed) 103 | np.random.seed(args.seed+gpu) 104 | torch.manual_seed(args.seed) 105 | torch.cuda.manual_seed(args.seed) 106 | torch.cuda.manual_seed_all(args.seed) 107 | cudnn.deterministic = True 108 | 109 | args.gpu = gpu 110 | print("Use GPU: {} for training".format(args.gpu)) 111 | args.rank = args.rank * ngpus_per_node + gpu 112 | dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, 113 | world_size=args.world_size, rank=args.rank) 114 | 115 | torch.cuda.set_device(args.gpu) 116 | 117 | train_loss = collections.deque(maxlen=10) 118 | 119 | print("=> creating model.") 120 | #print(args.heads) 121 | 122 | model = ResNet(num_layers=args.model, num_cls=args.heads, coder=args.coder, coder_cfg=args.coder_cfg, coder_mode=args.coder_mode, box_loss=args.box_loss).cuda() 123 | optimizer = optim.Adam(model.parameters(), lr=args.lr) 124 | 125 | model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).cuda(args.gpu) 126 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu],find_unused_parameters=True) 127 | 128 | 129 | if args.resume: 130 | model.load_state_dict(torch.load(args.resume_weight_path,map_location={"cuda:0":"cuda:{}".format(args.gpu)})) 131 | print("==>finished loading weight") 132 | 133 | cudnn.benchmark = True 134 | 135 | print("=> preparing data") 136 | args.batch_size = int(args.batch_size / ngpus_per_node) 137 | args.num_workers = int((args.num_workers + ngpus_per_node - 1) / ngpus_per_node) 138 | trainset = HRSCSetv1(root_dir=args.datadir,img_size=args.input_size) 139 | print("training images: {}".format(len(trainset))) 140 | train_sampler = torch.utils.data.distributed.DistributedSampler(trainset) 141 | train_loader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=(train_sampler is None),collate_fn=collater,num_workers=args.num_workers, pin_memory=True, sampler=train_sampler) 142 | 143 | if args.rank == 0 : f = open(args.log_path, 'w') 144 | 145 | for epoch in range(args.start_epoch,args.epochs) : 146 | 147 | model.train() 148 | train_sampler.set_epoch(epoch) 149 | 150 | 151 | for batch_idx, data in enumerate(train_loader): 152 | img,label,heatmap_t,theta=data['img'].cuda(args.gpu, non_blocking=True),data['label'],data['heatmap_t'].cuda(args.gpu, non_blocking=True),data['theta'] 153 | 154 | 155 | if 1: 156 | center_loss, scale_loss, offset_loss, theta_loss = model({'img':img , 'label':label , 'heatmap_t':heatmap_t,'theta':theta}) 157 | total_loss = center_loss + scale_loss + offset_loss + theta_loss 158 | 159 | optimizer.zero_grad() 160 | total_loss.backward() 161 | for p in model.parameters(): 162 | torch.nn.utils.clip_grad_norm_(p,10) 163 | optimizer.step() 164 | 165 | train_loss.append(float(total_loss)) 166 | if args.rank==0: 167 | print( 168 | '{}\{} | Center loss: {:1.5f} | scale loss: {:1.5f} | offset loss: {:1.5f}| theta loss:{:1.5f} | running loss: {:1.5f}'.format( 169 | epoch, batch_idx, float(center_loss), float(scale_loss), float(offset_loss),float(theta_loss), np.mean(train_loss)) 170 | ) 171 | 172 | f.write(str(float(center_loss))) , f.write(" ") , f.write(str(float(scale_loss))) , f.write(" ") , f.write(str(float(offset_loss))), f.write(" ") ,f.write(str(float(theta_loss))), f.write(" "), f.write(str(float(np.mean(train_loss)))) 173 | f.write('\n') 174 | 175 | 176 | 177 | if (epoch+1) in args.lr_decay : 178 | args.lr = args.lr/10 179 | for param_group in optimizer.param_groups: 180 | param_group['lr'] = args.lr 181 | 182 | if args.rank == 0 and (epoch + 1) % args.save_interval == 0: 183 | if not os.path.isdir('checkpoint'): 184 | os.mkdir('checkpoint') 185 | if not os.path.isdir('checkpoint/hrsc'): 186 | os.mkdir('checkpoint/hrsc') 187 | print("Saving...") 188 | torch.save(model.state_dict(), f"checkpoint/hrsc/{args.box_loss}_{args.coder}_{args.coder_cfg}_{args.coder_mode}_mon_%d.pth" % (epoch+1)) 189 | 190 | if args.rank == 0 : f.close() 191 | 192 | 193 | 194 | 195 | 196 | if __name__ == "__main__": 197 | main() 198 | 199 | 200 | -------------------------------------------------------------------------------- /utils/aug.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import math 4 | 5 | 6 | def rotate_image(image, label_box_list=[], angle=90, color=(0, 0, 0), img_scale=1.0): 7 | """ 8 | rotate with angle, background filled with color, default black (0, 0, 0) 9 | label_box = (cls_type, box) 10 | box = [x0, y0, x1, y1, x2, y2, x3, y3] 11 | """ 12 | # grab the rotation matrix (applying the negative of the angle to rotate clockwise), 13 | # then grab the sine and cosine (i.e., the rotation components of the matrix) 14 | # if angle < 0, counterclockwise rotation; if angle > 0, clockwise rotation 15 | # 1.0 - scale, to adjust the size scale (image scaling parameter), recommended 0.75 16 | height_ori, width_ori = image.shape[:2] 17 | x_center_ori, y_center_ori = (width_ori // 2, height_ori // 2) 18 | 19 | rotation_matrix = cv2.getRotationMatrix2D((x_center_ori, y_center_ori), angle, img_scale) 20 | cos = np.abs(rotation_matrix[0, 0]) 21 | sin = np.abs(rotation_matrix[0, 1]) 22 | 23 | # compute the new bounding dimensions of the image 24 | width_new = int((height_ori * sin) + (width_ori * cos)) 25 | height_new = int((height_ori * cos) + (width_ori * sin)) 26 | 27 | # adjust the rotation matrix to take into account translation 28 | rotation_matrix[0, 2] += (width_new / 2) - x_center_ori 29 | rotation_matrix[1, 2] += (height_new / 2) - y_center_ori 30 | 31 | # perform the actual rotation and return the image 32 | # borderValue - color to fill missing background, default black, customizable 33 | image_new = cv2.warpAffine(image, rotation_matrix, (width_new, height_new), borderValue=color) 34 | 35 | # each point coordinates 36 | angle = angle / 180 * math.pi 37 | box_rot_list = cal_rotate_box(label_box_list, angle, (x_center_ori, y_center_ori), 38 | (width_new // 2, height_new // 2)) 39 | box_new_list = [] 40 | for cls_type, box_rot in box_rot_list: 41 | for index in range(len(box_rot) // 2): 42 | box_rot[index * 2] = int(box_rot[index * 2]) 43 | box_rot[index * 2] = max(min(box_rot[index * 2], width_new), 0) 44 | box_rot[index * 2 + 1] = int(box_rot[index * 2 + 1]) 45 | box_rot[index * 2 + 1] = max(min(box_rot[index * 2 + 1], height_new), 0) 46 | box_new_list.append((cls_type, box_rot)) 47 | 48 | image_with_boxes = [image_new, box_new_list] 49 | return image_with_boxes 50 | 51 | 52 | def cal_rotate_box(box_list, angle, ori_center, new_center): 53 | # box = [x0, y0, x1, y1, x2, y2, x3, y3] 54 | # image_shape - [width, height] 55 | box_list_new = [] 56 | for (cls_type, box) in box_list: 57 | box_new = [] 58 | for index in range(len(box) // 2): 59 | box_new.extend(cal_rotate_coordinate(box[index * 2], box[index * 2 + 1], angle, ori_center, new_center)) 60 | label_box = (cls_type, box_new) 61 | box_list_new.append(label_box) 62 | return box_list_new 63 | 64 | 65 | def cal_rotate_coordinate(x_ori, y_ori, angle, ori_center, new_center): 66 | # box = [x0, y0, x1, y1, x2, y2, x3, y3] 67 | # image_shape - [width, height] 68 | x_0 = x_ori - ori_center[0] 69 | y_0 = ori_center[1] - y_ori 70 | x_new = x_0 * math.cos(angle) - y_0 * math.sin(angle) + new_center[0] 71 | y_new = new_center[1] - (y_0 * math.cos(angle) + x_0 * math.sin(angle)) 72 | return (x_new, y_new) -------------------------------------------------------------------------------- /utils/smooth_label.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import numpy as np 3 | import math 4 | 5 | 6 | def gaussian_label(label, num_class, u=0, sig=4.0): 7 | x = np.array(range(math.floor(-num_class/2), math.ceil(num_class/2), 1)) 8 | y_sig = np.exp(-(x - u) ** 2 / (2 * sig ** 2)) 9 | return np.concatenate([y_sig[math.ceil(num_class/2)-label:], 10 | y_sig[:math.ceil(num_class/2)-label]], axis=0) 11 | 12 | 13 | def rectangular_label(label, num_class, raduius=4): 14 | x = np.zeros([num_class]) 15 | x[:raduius+1] = 1 16 | x[-raduius:] = 1 17 | y_sig = np.concatenate([x[-label:], x[:-label]], axis=0) 18 | return y_sig 19 | 20 | 21 | def pulse_label(label, num_class): 22 | x = np.zeros([num_class]) 23 | x[label] = 1 24 | return x 25 | 26 | 27 | def triangle_label(label, num_class, raduius=4): 28 | y_sig = np.zeros([num_class]) 29 | x = np.array(range(raduius+1)) 30 | y = -1/(raduius+1) * x + 1 31 | y_sig[:raduius+1] = y 32 | y_sig[-raduius:] = y[-1:0:-1] 33 | 34 | return np.concatenate([y_sig[-label:], y_sig[:-label]], axis=0) 35 | 36 | 37 | def get_all_smooth_label(num_label, label_type=0, raduius=4): 38 | all_smooth_label = [] 39 | 40 | if label_type == 0: 41 | for i in range(num_label): 42 | all_smooth_label.append(gaussian_label(i, num_label, sig=raduius)) 43 | elif label_type == 1: 44 | for i in range(num_label): 45 | all_smooth_label.append(rectangular_label(i, num_label, raduius=raduius)) 46 | elif label_type == 2: 47 | for i in range(num_label): 48 | all_smooth_label.append(pulse_label(i, num_label)) 49 | elif label_type == 3: 50 | for i in range(num_label): 51 | all_smooth_label.append(triangle_label(i, num_label, raduius=raduius)) 52 | else: 53 | raise Exception('Only support gaussian, rectangular, triangle and pulse label') 54 | return np.array(all_smooth_label) 55 | 56 | 57 | def angle_smooth_label(angle_label, angle_range=90, label_type=0, raduius=4, omega=1): 58 | """ 59 | :param angle_label: [-90,0) or [-90, 0) 60 | :param angle_range: 90 or 180 61 | :return: 62 | """ 63 | 64 | assert angle_range % omega == 0, 'wrong omega' 65 | 66 | angle_range /= omega 67 | angle_label /= omega 68 | 69 | angle_label = np.array(-np.round(angle_label), np.int32) 70 | all_smooth_label = get_all_smooth_label(int(angle_range), label_type, raduius) 71 | inx = angle_label == angle_range 72 | angle_label[inx] = angle_range - 1 73 | smooth_label = all_smooth_label[angle_label] 74 | return np.array(smooth_label, np.float32) 75 | 76 | 77 | if __name__ == '__main__': 78 | import matplotlib.pyplot as plt 79 | 80 | # angle_label = np.array([-89.9, -45.2, -0.3, -1.9]) 81 | # smooth_label = angle_smooth_label(angle_label) 82 | 83 | # y_sig = triangle_label(30, 180, raduius=8) 84 | y_sig = gaussian_label(30, 180, sig=6) 85 | # y_sig = pulse_label(30, 180) 86 | # y_sig = triangle_label(0, 90) 87 | x = np.array(range(0, 180, 1)) 88 | plt.plot(x, y_sig, "r-", linewidth=2) 89 | plt.grid(True) 90 | plt.show() 91 | print(y_sig) 92 | 93 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | import cv2 4 | import os 5 | import datetime 6 | import torch 7 | import random 8 | from contextlib import contextmanager 9 | from easydict import EasyDict 10 | 11 | def gaussian_radius(det_size, min_overlap): 12 | height, width = det_size 13 | 14 | a1 = 1 15 | b1 = (height + width) 16 | c1 = width * height * (1 - min_overlap) / (1 + min_overlap) 17 | sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1) 18 | r1 = (b1 + sq1) / 2 19 | 20 | a2 = 4 21 | b2 = 2 * (height + width) 22 | c2 = (1 - min_overlap) * width * height 23 | sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2) 24 | r2 = (b2 + sq2) / 2 25 | 26 | a3 = 4 * min_overlap 27 | b3 = -2 * min_overlap * (height + width) 28 | c3 = (min_overlap - 1) * width * height 29 | sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3) 30 | r3 = (b3 + sq3) / 2 31 | return min(r1, r2, r3) 32 | 33 | def gaussian2D(shape, sigma=1): 34 | m, n = [(ss - 1.) / 2. for ss in shape] 35 | y, x = np.ogrid[-m:m+1,-n:n+1] 36 | 37 | h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) 38 | h[h < np.finfo(h.dtype).eps * h.max()] = 0 39 | return h 40 | 41 | def draw_umich_gaussian(heatmap, center, radius, k=1): 42 | diameter = 2 * radius + 1 43 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) 44 | 45 | x, y = int(center[0]), int(center[1]) 46 | 47 | height, width = heatmap.shape[0:2] 48 | 49 | left, right = min(x, radius), min(width - x, radius + 1) 50 | top, bottom = min(y, radius), min(height - y, radius + 1) 51 | 52 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] 53 | masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right] 54 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug 55 | np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) 56 | return heatmap 57 | 58 | def creat_label_heatmap(img,label,num_classes=15,min_overlap=0.5): 59 | ''' 60 | img (N,C,H,W) tensor 61 | label:list list中的每个元素是(num_obj,6)[cx,cy,h,w,theta,class] np 62 | ''' 63 | N = img.size(0) 64 | heatmap_t = np.zeros((N, num_classes, int(img.size(2) / 4), int(img.size(3) / 4))).astype(np.float32) 65 | for i in range(N): 66 | for j in range(len(label[i])): 67 | cx, cy, h, w, theta, c = label[i][j] 68 | #xmin, ymin, h, w, c = label[i][j][0], label[i][j][1], label[i][j][3], label[i][j][2], int(label[i][j][4]) 69 | radius = gaussian_radius((math.ceil(h / 4.0), math.ceil(w / 4.0)),min_overlap=min_overlap) 70 | radius = max(0, int(radius)) 71 | 72 | ct = np.array([cx / 4.0, cy / 4.0], dtype=np.float32) 73 | ct_int = ct.astype(np.int32) 74 | 75 | heatmap_t[i, int(c), :, :] = draw_umich_gaussian(heatmap_t[i, int(c), :, :], ct_int, radius) 76 | return torch.from_numpy(heatmap_t) #(N,15,H/4,W/4) tensor 77 | 78 | #postprocess 79 | 80 | 81 | ''' 82 | def decode(heatmap, scale, offset,theta, process_H, process_W, scorethr): 83 | # 84 | #heatmap (process_H/4,process_W/4) tensor cpu 85 | #scale (1,2,process_H/4,process_W/4) tensor cpu 86 | #offset (1,2,process_H/4,process_W/4) tensor cpu 87 | #theta (1,180,process_H/4,process_W/4) tensor cpu 88 | #process_H,process_W 输入网络中的图片尺寸 89 | # 90 | dual_freq = True 91 | freq = 2 92 | heatmap = heatmap.squeeze().numpy() # (process_H/4,process_W/4) 93 | scale0, scale1 = scale[0, 0, :, :].numpy(), scale[0, 1, :, :].numpy() # (process_H/4,process_W/4) 94 | offset0, offset1 = offset[0, 0, :, :].numpy(), offset[0, 1, :,:].numpy() # (process_H/4,process_W/4) 95 | theta = theta.squeeze().numpy() #(2,process_H/4,process_W/4) 96 | 97 | c0, c1 = np.where(heatmap > scorethr) 98 | boxes = [] 99 | if len(c0) > 0: 100 | for i in range(len(c0)): 101 | s0, s1 = np.exp(scale0[c0[i], c1[i]]) * 4, np.exp(scale1[c0[i], c1[i]]) * 4 102 | o0, o1 = offset0[c0[i], c1[i]], offset1[c0[i], c1[i]] 103 | s = heatmap[c0[i], c1[i]] 104 | cx, cy = max(0, (c1[i] + o1 + 0.5) * 4), max(0, (c0[i] + o0 + 0.5) * 4) 105 | cx, cy = min(cx, process_W), min(cy, process_H) 106 | # angle = theta[:, c0[i], c1[i]].argmax() 107 | if dual_freq: 108 | cos_p2 = theta[0, c0[i], c1[i]] 109 | sin_p2 = theta[1, c0[i], c1[i]] 110 | cos_p4 = theta[2, c0[i], c1[i]] 111 | sin_p4 = theta[3, c0[i], c1[i]] 112 | # import pdb; pdb.set_trace() 113 | phase = torch.atan2(torch.tensor(sin_p2), torch.tensor(cos_p2)) 114 | phase2 = torch.atan2(torch.tensor(sin_p4), torch.tensor(cos_p4)) / 2 115 | idx = torch.cos(phase) * torch.cos(phase2) + torch.sin( 116 | phase) * torch.sin(phase2) < 0 117 | # Add pi to phase2 and keep it in range [-pi,pi) 118 | phase2[idx] = phase2[idx] % (2 * np.pi) - np.pi 119 | angle = phase2.numpy() / 2 120 | else: 121 | cos_p = theta[0, c0[i], c1[i]] 122 | sin_p = theta[1, c0[i], c1[i]] 123 | angle = np.arctan2(sin_p, cos_p) / freq 124 | 125 | angle = angle / math.pi * 180 126 | # print(theta[0, c0[i], c1[i]].shape) 127 | # import pdb; pdb.set_trace() 128 | # angle = theta[c0[i], c1[i]] / math.pi * 180 129 | #print(angle) 130 | #print(torch.from_numpy(theta[:, c0[i], c1[i]])) 131 | 132 | boxes.append([cx, cy, s0, s1, angle, s]) 133 | 134 | boxes = np.asarray(boxes, dtype=np.float32) 135 | return boxes 136 | #boxes (num_objs,6) (cx,cy,h,w,theta,s) 均为process_H,process_W尺度上的预测结果 137 | 138 | def decode_per_img(heatmap,scale,offset,theta,H,W,scorethr,NUM_CLASSES = 15): 139 | # 140 | #:param heatmap: (1,NUM_CLASSES,H/4,W/4) CUDA //after sigmoid 141 | #:param scale: (1,2,H/4,W/4) CUDA 142 | #param offset: (1,2,H/4,W/4) CUDA //after tanh 143 | #:param theta: (1,180,H/4,W/4) CUDA //after sigmoid 144 | # 145 | pooling = torch.nn.MaxPool2d(kernel_size=3, stride=1, padding=1) 146 | h_p = pooling(heatmap) 147 | heatmap[heatmap != h_p] = 0 148 | 149 | results=[] 150 | for i in range(NUM_CLASSES): 151 | bboxs=decode(heatmap[0,i,:,:].cpu(),scale.cpu(),offset.cpu(),theta.cpu(),H,W,scorethr)#(num_objs,6) (cx,cy,h,w,theta,s) 152 | if len(bboxs)>0: 153 | sigle_result = np.zeros((len(bboxs),7),dtype=np.float32) 154 | sigle_result[:,:5] = bboxs[:,:5] 155 | sigle_result[:,5] = i 156 | sigle_result[:,6] = bboxs[:,5] 157 | results.append(sigle_result) 158 | if len(results) > 0: 159 | results = np.concatenate(results, axis=0) 160 | return results 161 | #(total_objs,7) [cx,cy,h,w,theta,class,score] np.float32 162 | 163 | ''' 164 | 165 | 166 | if __name__ == "__main__" : 167 | x=torch.randn(1,3,512,512) 168 | label=[np.array([[24,44,80,40,0],[192,192,128,128,0]])] 169 | ht=creat_label_heatmap(x,label) 170 | 171 | 172 | cv2.imshow('ht', (ht.numpy()[0,0,:,:]*255).astype(np.uint8) ) 173 | cv2.waitKey(0) 174 | cv2.destroyAllWindows() --------------------------------------------------------------------------------