├── DataAugmentForObejctDetection.py ├── README.md ├── densenet.py ├── merge_box ├── csv_2_txt_for_merge.py └── merge_res.py ├── nms.py ├── resnext.py └── show_boundingbox_on_pic.py /DataAugmentForObejctDetection.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | ############################################################## 3 | # description: 4 | # data augmentation for obeject detection 5 | # author: 6 | # maozezhong 2018-6-27 7 | ############################################################## 8 | 9 | # 包括: 10 | # 1. 裁剪(需改变bbox) 11 | # 2. 平移(需改变bbox) 12 | # 3. 改变亮度 13 | # 4. 加噪声 14 | # 5. 旋转角度(需要改变bbox) 15 | # 注意: 16 | # random.seed(),相同的seed,产生的随机数是一样的!! 17 | 18 | import time 19 | import random 20 | import cv2 21 | import os 22 | import math 23 | import numpy as np 24 | from skimage.util import random_noise 25 | from skimage import exposure 26 | 27 | def show_pic(img, bboxes, names): 28 | ''' 29 | 输入: 30 | img:图像array 31 | bboxes:图像的所有boudning box list, 格式为[[x_min, y_min, x_max, y_max]....] 32 | names:每个box对应的名称 33 | ''' 34 | cv2.imwrite('./1.jpg', img) 35 | img = cv2.imread('./1.jpg') 36 | for i in range(len(bboxes)): 37 | bbox = bboxes[i] 38 | name = names[i] 39 | x_min = bbox[0] 40 | y_min = bbox[1] 41 | x_max = bbox[2] 42 | y_max = bbox[3] 43 | cv2.rectangle(img,(int(x_min),int(y_min)),(int(x_max),int(y_max)),(0,255,0),3) 44 | cv2.putText(img,name,(int(x_min),int(y_min+20)),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2) 45 | cv2.imshow('pic', img) 46 | cv2.waitKey(0) 47 | cv2.destroyAllWindows() 48 | os.remove('./1.jpg') 49 | 50 | # 图像均为cv2读取 51 | class DataAugmentForObjectDetection(): 52 | def __init__(self, rotation_rate=0.5, max_rotation_angle=5, 53 | crop_rate=0.5, shift_rate=0.5, change_light_rate=0.5, 54 | add_noise_rate=0.5): 55 | self.rotation_rate = rotation_rate 56 | self.max_rotation_angle = max_rotation_angle 57 | self.crop_rate = crop_rate 58 | self.shift_rate = shift_rate 59 | self.change_light_rate = change_light_rate 60 | self.add_noise_rate = add_noise_rate 61 | 62 | # 加噪声 63 | def _addNoise(self, img): 64 | ''' 65 | 输入: 66 | img:图像array 67 | 输出: 68 | 加噪声后的图像array,由于输出的像素是在[0,1]之间,所以得乘以255 69 | ''' 70 | # random.seed(int(time.time())) 71 | # return random_noise(img, mode='gaussian', seed=int(time.time()), clip=True)*255 72 | return random_noise(img, mode='gaussian', clip=True)*255 73 | 74 | 75 | # 调整亮度 76 | def _changeLight(self, img): 77 | # random.seed(int(time.time())) 78 | flag = random.uniform(0.5, 1.5) #flag>1为调暗,小于1为调亮 79 | return exposure.adjust_gamma(img, flag) 80 | 81 | # 旋转 82 | def _rotate_img_bbox(self, img, bboxes, angle=5, scale=1.): 83 | ''' 84 | 参考:https://blog.csdn.net/u014540717/article/details/53301195 85 | 输入: 86 | img:图像array,(h,w,c) 87 | bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值 88 | angle:旋转角度 89 | scale:默认1 90 | 输出: 91 | rot_img:旋转后的图像array 92 | rot_bboxes:旋转后的boundingbox坐标list 93 | ''' 94 | #---------------------- 旋转图像 ---------------------- 95 | w = img.shape[1] 96 | h = img.shape[0] 97 | # 角度变弧度 98 | rangle = np.deg2rad(angle) # angle in radians 99 | # now calculate new image width and height 100 | nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale 101 | nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale 102 | # ask OpenCV for the rotation matrix 103 | rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale) 104 | # calculate the move from the old center to the new center combined 105 | # with the rotation 106 | rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5,0])) 107 | # the move only affects the translation, so update the translation 108 | # part of the transform 109 | rot_mat[0,2] += rot_move[0] 110 | rot_mat[1,2] += rot_move[1] 111 | # 仿射变换 112 | rot_img = cv2.warpAffine(img, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))), flags=cv2.INTER_LANCZOS4) 113 | 114 | #---------------------- 矫正bbox坐标 ---------------------- 115 | # rot_mat是最终的旋转矩阵 116 | # 获取原始bbox的四个中点,然后将这四个点转换到旋转后的坐标系下 117 | rot_bboxes = list() 118 | for bbox in bboxes: 119 | xmin = bbox[0] 120 | ymin = bbox[1] 121 | xmax = bbox[2] 122 | ymax = bbox[3] 123 | point1 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymin, 1])) 124 | point2 = np.dot(rot_mat, np.array([xmax, (ymin+ymax)/2, 1])) 125 | point3 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymax, 1])) 126 | point4 = np.dot(rot_mat, np.array([xmin, (ymin+ymax)/2, 1])) 127 | # 合并np.array 128 | concat = np.vstack((point1, point2, point3, point4)) 129 | # 改变array类型 130 | concat = concat.astype(np.int32) 131 | # 得到旋转后的坐标 132 | rx, ry, rw, rh = cv2.boundingRect(concat) 133 | rx_min = rx 134 | ry_min = ry 135 | rx_max = rx+rw 136 | ry_max = ry+rh 137 | # 加入list中 138 | rot_bboxes.append([rx_min, ry_min, rx_max, ry_max]) 139 | 140 | return rot_img, rot_bboxes 141 | 142 | # 裁剪 143 | def _crop_img_bboxes(self, img, bboxes): 144 | ''' 145 | 裁剪后的图片要包含所有的框 146 | 输入: 147 | img:图像array 148 | bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值 149 | 输出: 150 | crop_img:裁剪后的图像array 151 | crop_bboxes:裁剪后的bounding box的坐标list 152 | ''' 153 | #---------------------- 裁剪图像 ---------------------- 154 | w = img.shape[1] 155 | h = img.shape[0] 156 | x_min = w #裁剪后的包含所有目标框的最小的框 157 | x_max = 0 158 | y_min = h 159 | y_max = 0 160 | for bbox in bboxes: 161 | x_min = min(x_min, bbox[0]) 162 | y_min = min(y_min, bbox[1]) 163 | x_max = max(x_max, bbox[2]) 164 | y_max = max(y_max, bbox[3]) 165 | 166 | d_to_left = x_min #包含所有目标框的最小框到左边的距离 167 | d_to_right = w - x_max #包含所有目标框的最小框到右边的距离 168 | d_to_top = y_min #包含所有目标框的最小框到顶端的距离 169 | d_to_bottom = h - y_max #包含所有目标框的最小框到底部的距离 170 | 171 | #随机扩展这个最小框 172 | crop_x_min = int(x_min - random.uniform(0, d_to_left)) 173 | crop_y_min = int(y_min - random.uniform(0, d_to_top)) 174 | crop_x_max = int(x_max + random.uniform(0, d_to_right)) 175 | crop_y_max = int(y_max + random.uniform(0, d_to_bottom)) 176 | 177 | #确保不要越界 178 | crop_x_min = max(0, crop_x_min) 179 | crop_y_min = max(0, crop_y_min) 180 | crop_x_max = min(w, crop_x_max) 181 | crop_y_max = min(h, crop_y_max) 182 | 183 | crop_img = img[crop_y_min:crop_y_max, crop_x_min:crop_x_max] 184 | 185 | #---------------------- 裁剪boundingbox ---------------------- 186 | #裁剪后的boundingbox坐标计算 187 | crop_bboxes = list() 188 | for bbox in bboxes: 189 | crop_bboxes.append([bbox[0]-crop_x_min, bbox[1]-crop_y_min, bbox[2]-crop_x_min, bbox[3]-crop_y_min]) 190 | 191 | return crop_img, crop_bboxes 192 | 193 | # 平移 194 | def _shift_pic_bboxes(self, img, bboxes): 195 | ''' 196 | 参考:https://blog.csdn.net/sty945/article/details/79387054 197 | 平移后的图片要包含所有的框 198 | 输入: 199 | img:图像array 200 | bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值 201 | 输出: 202 | shift_img:平移后的图像array 203 | shift_bboxes:平移后的bounding box的坐标list 204 | ''' 205 | #---------------------- 平移图像 ---------------------- 206 | w = img.shape[1] 207 | h = img.shape[0] 208 | x_min = w #裁剪后的包含所有目标框的最小的框 209 | x_max = 0 210 | y_min = h 211 | y_max = 0 212 | for bbox in bboxes: 213 | x_min = min(x_min, bbox[0]) 214 | y_min = min(y_min, bbox[1]) 215 | x_max = max(x_max, bbox[2]) 216 | y_max = max(y_max, bbox[3]) 217 | 218 | d_to_left = x_min #包含所有目标框的最大左移动距离 219 | d_to_right = w - x_max #包含所有目标框的最大右移动距离 220 | d_to_top = y_min #包含所有目标框的最大上移动距离 221 | d_to_bottom = h - y_max #包含所有目标框的最大下移动距离 222 | 223 | x = random.uniform(-(d_to_left-1) / 3, (d_to_right-1) / 3) 224 | y = random.uniform(-(d_to_top-1) / 3, (d_to_bottom-1) / 3) 225 | 226 | M = np.float32([[1, 0, x], [0, 1, y]]) #x为向左或右移动的像素值,正为向右负为向左; y为向上或者向下移动的像素值,正为向下负为向上 227 | shift_img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0])) 228 | 229 | #---------------------- 平移boundingbox ---------------------- 230 | shift_bboxes = list() 231 | for bbox in bboxes: 232 | shift_bboxes.append([bbox[0]+x, bbox[1]+y, bbox[2]+x, bbox[3]+y]) 233 | 234 | return shift_img, shift_bboxes 235 | 236 | def dataAugment(self, img, bboxes): 237 | ''' 238 | 图像增强 239 | 输入: 240 | img:图像array 241 | bboxes:该图像的所有框坐标 242 | 输出: 243 | img:增强后的图像 244 | bboxes:增强后图片对应的box 245 | ''' 246 | change_num = 0 #改变的次数 247 | # print('------') 248 | # random.seed(int(time.time())) 249 | if random.random() < self.crop_rate: #裁剪 250 | # print('裁剪') 251 | change_num += 1 252 | img, bboxes = self._crop_img_bboxes(img, bboxes) 253 | 254 | if random.random() > self.rotation_rate: #旋转 255 | # print('旋转') 256 | change_num += 1 257 | angle = random.uniform(-self.max_rotation_angle, self.max_rotation_angle) 258 | scale = random.uniform(0.7, 0.8) 259 | img, bboxes = self._rotate_img_bbox(img, bboxes, angle, scale) 260 | 261 | if random.random() < self.shift_rate: #平移 262 | # print('平移') 263 | change_num += 1 264 | img, bboxes = self._shift_pic_bboxes(img, bboxes) 265 | 266 | if random.random() > self.change_light_rate: #改变亮度 267 | # print('亮度') 268 | change_num += 1 269 | img = self._changeLight(img) 270 | 271 | if random.random() < self.add_noise_rate: #加噪声 272 | # print('加噪声') 273 | change_num += 1 274 | img = self._addNoise(img) 275 | # print('------') 276 | return img, bboxes, change_num 277 | 278 | 279 | if __name__ == '__main__': 280 | import shutil 281 | # test 282 | dataAug = DataAugmentForObjectDetection() 283 | agument_num = 1 284 | source_txt_root_path = '/home/maozezhong/Desktop/baidu_fusai/data/datasets/txt' 285 | source_pic_root_path = '/home/maozezhong/Desktop/baidu_fusai/data/datasets/train' 286 | target_pic_root_path = './data_augment/JPEGImages' 287 | target_txt_root_path = './data_augment/txt' 288 | if os.path.exists(target_pic_root_path): 289 | shutil.rmtree(target_pic_root_path) 290 | if os.path.exists(target_txt_root_path): 291 | shutil.rmtree(target_txt_root_path) 292 | os.mkdir(target_pic_root_path) 293 | os.mkdir(target_txt_root_path) 294 | cnt = 0 295 | for parent, _, files in os.walk(source_txt_root_path): 296 | for file in files: 297 | cnt += 1 298 | pic_path = os.path.join(source_pic_root_path, file.split('.')[0]+'.jpg') 299 | txt_path = os.path.join(parent, file) 300 | txt_file = open(txt_path, 'r') 301 | contents = txt_file.readlines() 302 | bboxes = list() 303 | names = list() 304 | for content in contents: 305 | content = content.strip() 306 | x_min = int(float(content.split(' ')[1])) 307 | y_min = int(float(content.split(' ')[2])) 308 | x_max = int(float(content.split(' ')[3])) 309 | y_max = int(float(content.split(' ')[4])) 310 | bboxes.append([x_min, y_min, x_max, y_max]) 311 | names.append(content.split(' ')[0]) 312 | img = cv2.imread(pic_path) 313 | # 原图可视化 314 | # show_pic(img, bboxes, names) 315 | 316 | i = 0 317 | while i < agument_num: 318 | 319 | # 数据增强后的图 320 | changed_img, changed_bboxes, change_num = dataAug.dataAugment(img, bboxes) 321 | # show_pic(changed_img, changed_bboxes, names) 322 | 323 | #必须得有一个改变 324 | if change_num == 0: 325 | continue 326 | i += 1 327 | 328 | # 写入txt 329 | target_txt_path = os.path.join(target_txt_root_path, file.split('.')[0]+'_'+str(i)+'.txt') 330 | target_txt_file = open(target_txt_path, 'w') 331 | for ii in range(len(changed_bboxes)): 332 | bbox = changed_bboxes[ii] 333 | content = names[ii] + ' ' + str(int(bbox[0]))+' ' + str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n' 334 | target_txt_file.write(content) 335 | # 写入pic 336 | target_pic_path = os.path.join(target_pic_root_path, file.split('.')[0]+'_'+str(i)+'.jpg') 337 | cv2.imwrite(target_pic_path, changed_img) 338 | 339 | print(str(cnt)+'/'+str(len(files))) 340 | print('done!') 341 | 342 | 343 | 344 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## 比赛介绍 2 | - 针对检测+分类任务,我们提供9000张带有位置信息和类别信息的图像数据用于训练,4351张图像用于评估测试。该数据集全部来源于百度地图淘金,选取了60类常见品牌类别。比如,肯德基,星巴克,耐克等。 3 | 4 | ## 比赛历程 5 | - 7月13号结束,7月初开始做.中间尝试了 6 | - [yolo](https://github.com/pjreddie/darknet), 7 | - [faster-rcnn pytorch版本](https://github.com/jwyang/faster-rcnn.pytorch) 8 | - [dcn](https://github.com/msracver/Deformable-ConvNets) 9 | - [retinanet keras版本](https://github.com/fizyr/keras-retinanet) 10 | - fpn等 11 | - 最后还是tow-stage的faster-rcnn正确度高(至少在我的实验中是这样,当然时间有限,设备有限单卡1080,实验结论不完备) 12 | - 数据处理方面 : 使用了针对检测的数据增强,包括旋转,平移,加噪,改亮度,具体实现见DataAugmentForObejctDetection.py这个脚本 13 | - trick方面 : 1)softnms, 2)模型融合(具体见merge_box中的脚本) 14 | - batchsize基本上是1,设备受限上不去了; lr初始一般设的0.001, 每5轮降为原来的十分之一; 输入尺度试过600和800 15 | - 最后线上为0.8576,排名23,没苟进决赛,哎... 16 | 17 | ## 脚本说明: 18 | - merge_box: 19 | - csv_2_txt_for_merge.py : 根据结果csv产生中间txt文件 20 | - merge_res.py : 融合并产生最终csv结果文件 21 | - show_boundingbox_on_pic.py : 可视化脚本 22 | - DataAugmentForObejctDetection.py : 数据增强脚本 23 | - densenet.py : pytorch, 基于densenet backbone的faster rcnn模型结构(未实验), 参考[vision/torchvision/models/densenet.py](https://github.com/pytorch/vision/blob/master/torchvision/models/densenet.py) 24 | - resnext.py : pytorch, 基于resnext backbone的faster rcnn模型结构, 参考[ResNeXt-PyTorch/resnext.py](https://github.com/miraclewkf/ResNeXt-PyTorch/blob/master/resnext.py), 其实就是在resnet的基础上加了多通道并行. 25 | - nms.py : 常规nms,以及softnms 26 | 27 | ## 学习姿势 28 | - faster**多尺度训练,多尺度预测**,**tesnsorlayer数据增强**,结果ensemble可以到89 29 | - 基于fpn的faster-rcnn 30 | - detectron单模型可以到89 31 | - predict的时候augment, detectron中的config文件下有例子 32 | - 调参方法,何凯明论文,detctron有论文链接 33 | - **增强后的数据作为验证集**,把60类验证集AP保存,取每一类最好的ap的模型进行集成 34 | - 数据增强库 : [imgaug](https://blog.csdn.net/u012897374/article/details/80142744), emmm...应该比我自己整的靠谱点 35 | - 调整分类和bbox的loss权重 36 | - 使用sniper模型 37 | - ssd上89!!!但是没说用了啥技巧.... 38 | 39 | ## 所有代码链接 40 | - [baiduyun(part1:models部分)](https://pan.baidu.com/s/1BaXyPzJkpRCMlsC2saDnnA) 41 | - [baiduyun(part2:data)](https://pan.baidu.com/s/1k9E_KsEtz5f0lbzX_2OjBg) 42 | 43 | ## to do list 44 | - [x] 使用detectron 45 | - [X] [transfer to coco data](https://blog.csdn.net/qq_15969343/article/details/80848175) 46 | - [ ] test aug 47 | - [ ] 数据增强,[sampleParing and mixup](https://kexue.fm/) 48 | 49 | -------------------------------------------------------------------------------- /densenet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from model.utils.config import cfg 6 | from model.faster_rcnn.faster_rcnn import _fasterRCNN 7 | 8 | from torch.autograd import Variable 9 | import math 10 | import pdb 11 | 12 | import re 13 | import torch 14 | import torch.nn as nn 15 | import torch.nn.functional as F 16 | import torch.utils.model_zoo as model_zoo 17 | from collections import OrderedDict 18 | import os 19 | 20 | __all__ = ['DenseNet', 'densenet121', 'densenet169', 'densenet201', 'densenet161'] 21 | 22 | 23 | model_urls = { 24 | 'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth', 25 | 'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth', 26 | 'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth', 27 | 'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth', 28 | } 29 | 30 | 31 | def densenet121(pretrained=False, **kwargs): 32 | r"""Densenet-121 model from 33 | `"Densely Connected Convolutional Networks" `_ 34 | Args: 35 | pretrained (bool): If True, returns a model pre-trained on ImageNet 36 | """ 37 | model_path = 'data/pretrained_model/densenet121.pth' 38 | model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16), 39 | **kwargs) 40 | if pretrained: 41 | # '.'s are no longer allowed in module names, but pervious _DenseLayer 42 | # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'. 43 | # They are also in the checkpoints in model_urls. This pattern is used 44 | # to find such keys. 45 | pattern = re.compile( 46 | r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$') 47 | if os.path.exists(model_path): 48 | state_dict = torch.load(model_path) 49 | else: 50 | state_dict = model_zoo.load_url(model_urls['densenet121']) 51 | for key in list(state_dict.keys()): 52 | res = pattern.match(key) 53 | if res: 54 | new_key = res.group(1) + res.group(2) 55 | state_dict[new_key] = state_dict[key] 56 | del state_dict[key] 57 | model.load_state_dict(state_dict) 58 | return model 59 | 60 | 61 | def densenet169(pretrained=False, **kwargs): 62 | r"""Densenet-169 model from 63 | `"Densely Connected Convolutional Networks" `_ 64 | Args: 65 | pretrained (bool): If True, returns a model pre-trained on ImageNet 66 | """ 67 | model_path = 'data/pretrained_model/densenet169.pth' 68 | model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 32, 32), 69 | **kwargs) 70 | if pretrained: 71 | # '.'s are no longer allowed in module names, but pervious _DenseLayer 72 | # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'. 73 | # They are also in the checkpoints in model_urls. This pattern is used 74 | # to find such keys. 75 | pattern = re.compile( 76 | r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$') 77 | if os.path.exists(model_path): 78 | state_dict = torch.load(model_path) 79 | else: 80 | state_dict = model_zoo.load_url(model_urls['densenet169']) 81 | for key in list(state_dict.keys()): 82 | res = pattern.match(key) 83 | if res: 84 | new_key = res.group(1) + res.group(2) 85 | state_dict[new_key] = state_dict[key] 86 | del state_dict[key] 87 | model.load_state_dict(state_dict) 88 | return model 89 | 90 | 91 | def densenet201(pretrained=False, **kwargs): 92 | r"""Densenet-201 model from 93 | `"Densely Connected Convolutional Networks" `_ 94 | Args: 95 | pretrained (bool): If True, returns a model pre-trained on ImageNet 96 | """ 97 | model_path = 'data/pretrained_model/densenet201.pth' 98 | model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 48, 32), 99 | **kwargs) 100 | if pretrained: 101 | # '.'s are no longer allowed in module names, but pervious _DenseLayer 102 | # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'. 103 | # They are also in the checkpoints in model_urls. This pattern is used 104 | # to find such keys. 105 | pattern = re.compile( 106 | r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$') 107 | if os.path.exists(model_path): 108 | state_dict = torch.load(model_path) 109 | else: 110 | state_dict = model_zoo.load_url(model_urls['densenet201']) 111 | for key in list(state_dict.keys()): 112 | res = pattern.match(key) 113 | if res: 114 | new_key = res.group(1) + res.group(2) 115 | state_dict[new_key] = state_dict[key] 116 | del state_dict[key] 117 | model.load_state_dict(state_dict) 118 | return model 119 | 120 | 121 | def densenet161(pretrained=False, **kwargs): 122 | r"""Densenet-161 model from 123 | `"Densely Connected Convolutional Networks" `_ 124 | Args: 125 | pretrained (bool): If True, returns a model pre-trained on ImageNet 126 | """ 127 | model_path = 'data/pretrained_model/densenet161.pth' 128 | model = DenseNet(num_init_features=96, growth_rate=48, block_config=(6, 12, 36, 24), 129 | **kwargs) 130 | if pretrained: 131 | # '.'s are no longer allowed in module names, but pervious _DenseLayer 132 | # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'. 133 | # They are also in the checkpoints in model_urls. This pattern is used 134 | # to find such keys. 135 | pattern = re.compile( 136 | r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$') 137 | if os.path.exists(model_path): 138 | state_dict = torch.load(model_path) 139 | else: 140 | state_dict = model_zoo.load_url(model_urls['densenet161']) 141 | for key in list(state_dict.keys()): 142 | res = pattern.match(key) 143 | if res: 144 | new_key = res.group(1) + res.group(2) 145 | state_dict[new_key] = state_dict[key] 146 | del state_dict[key] 147 | model.load_state_dict(state_dict) 148 | return model 149 | 150 | 151 | class _DenseLayer(nn.Sequential): 152 | def __init__(self, num_input_features, growth_rate, bn_size, drop_rate): 153 | super(_DenseLayer, self).__init__() 154 | self.add_module('norm1', nn.BatchNorm2d(num_input_features)), 155 | self.add_module('relu1', nn.ReLU(inplace=True)), 156 | self.add_module('conv1', nn.Conv2d(num_input_features, bn_size * 157 | growth_rate, kernel_size=1, stride=1, bias=False)), 158 | self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)), 159 | self.add_module('relu2', nn.ReLU(inplace=True)), 160 | self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate, 161 | kernel_size=3, stride=1, padding=1, bias=False)), 162 | self.drop_rate = drop_rate 163 | 164 | def forward(self, x): 165 | new_features = super(_DenseLayer, self).forward(x) 166 | if self.drop_rate > 0: 167 | new_features = F.dropout(new_features, p=self.drop_rate, training=self.training) 168 | return torch.cat([x, new_features], 1) 169 | 170 | 171 | class _DenseBlock(nn.Sequential): 172 | def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate): 173 | super(_DenseBlock, self).__init__() 174 | for i in range(num_layers): 175 | layer = _DenseLayer(num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate) 176 | self.add_module('denselayer%d' % (i + 1), layer) 177 | 178 | 179 | class _Transition(nn.Sequential): 180 | def __init__(self, num_input_features, num_output_features): 181 | super(_Transition, self).__init__() 182 | self.add_module('norm', nn.BatchNorm2d(num_input_features)) 183 | self.add_module('relu', nn.ReLU(inplace=True)) 184 | self.add_module('conv', nn.Conv2d(num_input_features, num_output_features, 185 | kernel_size=1, stride=1, bias=False)) 186 | self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2)) 187 | 188 | 189 | class DenseNet(nn.Module): 190 | r"""Densenet-BC model class, based on 191 | `"Densely Connected Convolutional Networks" `_ 192 | Args: 193 | growth_rate (int) - how many filters to add each layer (`k` in paper) 194 | block_config (list of 4 ints) - how many layers in each pooling block 195 | num_init_features (int) - the number of filters to learn in the first convolution layer 196 | bn_size (int) - multiplicative factor for number of bottle neck layers 197 | (i.e. bn_size * k features in the bottleneck layer) 198 | drop_rate (float) - dropout rate after each dense layer 199 | num_classes (int) - number of classification classes 200 | """ 201 | 202 | def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), 203 | num_init_features=64, bn_size=4, drop_rate=0, num_classes=1000): 204 | 205 | super(DenseNet, self).__init__() 206 | 207 | # First convolution 208 | self.features = nn.Sequential(OrderedDict([ 209 | ('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)), 210 | ('norm0', nn.BatchNorm2d(num_init_features)), 211 | ('relu0', nn.ReLU(inplace=True)), 212 | ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), 213 | ])) 214 | 215 | # Each denseblock 216 | num_features = num_init_features 217 | for i, num_layers in enumerate(block_config): 218 | block = _DenseBlock(num_layers=num_layers, num_input_features=num_features, 219 | bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate) 220 | self.features.add_module('denseblock%d' % (i + 1), block) 221 | num_features = num_features + num_layers * growth_rate 222 | if i != len(block_config) - 1: 223 | trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2) 224 | self.features.add_module('transition%d' % (i + 1), trans) 225 | num_features = num_features // 2 226 | 227 | # Final batch norm 228 | self.features.add_module('norm5', nn.BatchNorm2d(num_features)) 229 | 230 | # Linear layer 231 | self.classifier = nn.Linear(num_features, num_classes) 232 | 233 | # Official init from torch repo. 234 | for m in self.modules(): 235 | if isinstance(m, nn.Conv2d): 236 | nn.init.kaiming_normal_(m.weight) 237 | elif isinstance(m, nn.BatchNorm2d): 238 | nn.init.constant_(m.weight, 1) 239 | nn.init.constant_(m.bias, 0) 240 | elif isinstance(m, nn.Linear): 241 | nn.init.constant_(m.bias, 0) 242 | 243 | def forward(self, x): 244 | features = self.features(x) 245 | out = F.relu(features, inplace=True) 246 | out = F.avg_pool2d(out, kernel_size=7, stride=1).view(features.size(0), -1) 247 | out = self.classifier(out) 248 | return out 249 | 250 | class densenet(_fasterRCNN): 251 | def __init__(self, classes, num_layers=101, pretrained=False, class_agnostic=False): 252 | self.model_path = 'data/pretrained_model/densenet121.pth' 253 | self.dout_base_model = 1024 254 | self.pretrained = pretrained 255 | self.class_agnostic = class_agnostic 256 | 257 | _fasterRCNN.__init__(self, classes, class_agnostic) 258 | 259 | def _init_modules(self): 260 | densenet = densenet121(pretrained=True) 261 | 262 | # if self.pretrained == True: 263 | # print("Loading pretrained weights from %s" %(self.model_path)) 264 | # state_dict = torch.load(self.model_path) 265 | # densenet.load_state_dict({k:v for k,v in state_dict.items() if k in densenet.state_dict()}) 266 | 267 | # Build densenet. 268 | ''' 269 | feature = [init, block1, trans1, block2, trans2, block3, tran3, block4, trans4, norm] 270 | ''' 271 | self.RCNN_base = nn.Sequential(densenet.features[:-2]) 272 | 273 | self.RCNN_top = nn.Sequential(densenet.features[-2:]) 274 | 275 | self.RCNN_cls_score = nn.Linear(2048, self.n_classes) 276 | if self.class_agnostic: 277 | self.RCNN_bbox_pred = nn.Linear(2048, 4) 278 | else: 279 | self.RCNN_bbox_pred = nn.Linear(2048, 4 * self.n_classes) 280 | 281 | # Fix blocks 282 | for p in self.RCNN_base[0].parameters(): p.requires_grad=False 283 | 284 | assert (0 <= cfg.DENSENET.FIXED_LAYERS < 8) 285 | if cfg.DENSENET.FIXED_LAYERS: 286 | for i in range(1,1+cfg.DENSENET.FIXED_LAYERS): 287 | for p in self.RCNN_base[i].parameters(): p.requires_grad=False 288 | 289 | def set_bn_fix(m): 290 | classname = m.__class__.__name__ 291 | if classname.find('BatchNorm') != -1: 292 | for p in m.parameters(): p.requires_grad=False 293 | 294 | self.RCNN_base.apply(set_bn_fix) 295 | self.RCNN_top.apply(set_bn_fix) 296 | 297 | def train(self, mode=True): 298 | # Override train so that the training mode is set as we want 299 | nn.Module.train(self, mode) 300 | if mode: 301 | # Set fixed blocks to be in eval mode 302 | self.RCNN_base.eval() 303 | # Set unfixed blocks to be in train mode 304 | i = 9 305 | while i > cfg.DENSENET.FIXED_LAYERS: 306 | self.RCNN_base[i].train() 307 | 308 | def set_bn_eval(m): 309 | classname = m.__class__.__name__ 310 | if classname.find('BatchNorm') != -1: 311 | m.eval() 312 | 313 | self.RCNN_base.apply(set_bn_eval) 314 | self.RCNN_top.apply(set_bn_eval) 315 | 316 | # def _head_to_tail(self, pool5): 317 | # fc7 = self.RCNN_top(pool5).mean(3).mean(2) 318 | # return fc7 -------------------------------------------------------------------------------- /merge_box/csv_2_txt_for_merge.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import shutil 4 | 5 | target_root_path = './txt_for_merge' 6 | if os.path.exists(target_root_path): 7 | shutil.rmtree(target_root_path) 8 | os.mkdir(target_root_path) 9 | 10 | len_list = list() 11 | for i in range(1,5): 12 | csv_path = './res'+str(i)+'.csv' 13 | if i==1: 14 | data = pd.read_csv(csv_path) 15 | else: 16 | data_temp = pd.read_csv(csv_path) 17 | data = pd.concat([data, data_temp]) 18 | len_list.append(len(data['filename'])) 19 | 20 | # print(len_list) 21 | # print(len(data['filename'])) 22 | data.to_csv('./noIndex.csv', index=False) 23 | data = pd.read_csv('./noIndex.csv') 24 | 25 | index = -1 26 | indexx = 0 27 | xishu = [1, 1, 1, 1] #加权系数 28 | for i in range(len(data['filename'])): 29 | pic_name = data['filename'][i] 30 | label = data['label'][i] 31 | if i%len_list[indexx]==0: 32 | index += 1 33 | indexx += 1 34 | score = float(data['score'][i]) * xishu[index] 35 | x_min = data['x_min'][i] 36 | y_min = data['y_min'][i] 37 | x_max = data['x_max'][i] 38 | y_max = data['y_max'][i] 39 | txt_path = os.path.join(target_root_path, pic_name.split('.')[0]+'.txt') 40 | with open(txt_path, 'a+') as f: 41 | content = str(label)+' '+str(score)+ ' '+str(x_min)+' '+str(y_min)+' '+str(x_max)+' '+str(y_max)+'\n' 42 | f.write(content) 43 | print('write to txt '+str(i+1)+'/'+str(len(data['filename']))) 44 | 45 | print('done') 46 | -------------------------------------------------------------------------------- /merge_box/merge_res.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | from nms import nms 4 | import numpy as np 5 | import cv2 6 | 7 | csv_path = '/home/maozezhong/Desktop/baidu_fusai/data/class_name.csv' 8 | data = pd.read_csv(csv_path) 9 | label2name = dict() 10 | for i in range(len(data['label'])): 11 | label2name[int(data['label'][i])] = data['prefix'][i] 12 | 13 | def showPicResult(image, coords): 14 | img = cv2.imread(image) 15 | for i in range(len(coords)): 16 | x1=coords[i][0] 17 | y1=coords[i][1] 18 | x2=coords[i][2] 19 | y2=coords[i][3] 20 | score = coords[i][4] 21 | score = round(score,2) 22 | label = coords[i][5] 23 | name = label2name[label] + ' ' + str(score) 24 | cv2.rectangle(img,(int(x1),int(y1)),(int(x2),int(y2)),(0,255,0),3) 25 | cv2.putText(img,name,(int(x1),int(y1+20)),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2) 26 | cv2.namedWindow("retinanet_image_detector", 0) #1的时候是原图 27 | cv2.moveWindow("retinanet_image_detector",0,0) 28 | cv2.resizeWindow("retinanet_image_detector", 640, 960); 29 | cv2.imshow('retinanet_image_detector', img) 30 | cv2.waitKey(0) #表示等待500ms,0表示一直等待直到按键 31 | cv2.destroyAllWindows() 32 | 33 | file_names = list() 34 | labels_ = list() 35 | scores_ = list() 36 | x_mins = list() 37 | y_mins = list() 38 | x_maxs = list() 39 | y_maxs = list() 40 | 41 | txt_path = './txt_for_merge' 42 | test_txt_path = '/home/maozezhong/Desktop/baidu_fusai/data/datasets/test.txt' 43 | with open(test_txt_path, 'r') as f: 44 | for line in f.readlines(): 45 | pic_name = line.strip() 46 | img_path = os.path.join('/home/maozezhong/Desktop/baidu_fusai/data/datasets/test',pic_name) 47 | txt_pic_path = os.path.join(txt_path, pic_name.split('.')[0]+'.txt') 48 | dets = list() 49 | with open(txt_pic_path, 'r') as ff: 50 | for ll in ff.readlines(): 51 | ll = ll.strip() 52 | label = int(ll.split(' ')[0]) 53 | score = float(ll.split(' ')[1]) 54 | x_min = int(ll.split(' ')[2]) 55 | y_min = int(ll.split(' ')[3]) 56 | x_max = int(ll.split(' ')[4]) 57 | y_max = int(ll.split(' ')[5]) 58 | dets.append([x_min,y_min,x_max,y_max,score,label]) 59 | dets = np.array(dets) 60 | dets = nms(dets) 61 | for det in dets: 62 | file_names.append(pic_name) 63 | labels_.append(int(det[5])) 64 | scores_.append(det[4]) 65 | x_mins.append(int(det[0])) 66 | y_mins.append(int(det[1])) 67 | x_maxs.append(int(det[2])) 68 | y_maxs.append(int(det[3])) 69 | 70 | # 可视化 71 | # showPicResult(img_path, dets) 72 | 73 | column = ['filename', 'label', 'score', 'x_min', 'y_min', 'x_max', 'y_max'] 74 | dataframe = pd.DataFrame({'filename': file_names, 'label': labels_, 'score' : scores_, 'x_min' : x_mins, 'y_min' : y_mins, 'x_max' : x_maxs, 'y_max' : y_maxs}) 75 | dataframe.to_csv('./res_merge.csv', index=False, header=False, columns=column, sep=' ') -------------------------------------------------------------------------------- /nms.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import numpy as np 3 | 4 | def rescore(overlap, scores, thresh, type='gaussian'): 5 | assert overlap.shape[0] == scores.shape[0] 6 | if type == 'linear': 7 | inds = np.where(overlap >= thresh)[0] 8 | scores[inds] = scores[inds] * (1 - overlap[inds]) 9 | else: 10 | scores = scores * np.exp(- overlap**2 / thresh) 11 | 12 | return scores 13 | 14 | # dets = [x_min, y_min, x_max, y_max, score, label],n行6列的array 15 | def soft_nms(dets, thresh=.3, max_dets=300): 16 | # print('soft') 17 | if dets.shape[0] == 0: 18 | return np.zeros((0, 5)) 19 | 20 | x1 = dets[:, 0] 21 | y1 = dets[:, 1] 22 | x2 = dets[:, 2] 23 | y2 = dets[:, 3] 24 | scores = dets[:, 4] 25 | 26 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 27 | order = scores.argsort()[::-1] 28 | scores = scores[order] 29 | 30 | if max_dets == -1: 31 | max_dets = order.size 32 | 33 | keep = np.zeros(max_dets, dtype=np.intp) 34 | keep_cnt = 0 35 | 36 | while order.size > 0 and keep_cnt < max_dets: 37 | i = order[0] 38 | dets[i, 4] = scores[0] 39 | xx1 = np.maximum(x1[i], x1[order[1:]]) 40 | yy1 = np.maximum(y1[i], y1[order[1:]]) 41 | xx2 = np.minimum(x2[i], x2[order[1:]]) 42 | yy2 = np.minimum(y2[i], y2[order[1:]]) 43 | 44 | w = np.maximum(0.0, xx2 - xx1 + 1) 45 | h = np.maximum(0.0, yy2 - yy1 + 1) 46 | inter = w * h 47 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 48 | 49 | order = order[1:] 50 | scores = rescore(ovr, scores[1:], thresh) 51 | 52 | tmp = scores.argsort()[::-1] 53 | order = order[tmp] 54 | scores = scores[tmp] 55 | 56 | keep[keep_cnt] = i 57 | keep_cnt += 1 58 | 59 | keep = keep[:keep_cnt] 60 | dets = dets[keep, :] 61 | return dets 62 | 63 | def nms(dets, thresh=.3): 64 | """ 65 | greedily select boxes with high confidence and overlap with current maximum <= thresh 66 | rule out overlap >= thresh 67 | :param dets: [[x1, y1, x2, y2 score]] 68 | :param thresh: retain overlap < thresh 69 | :return: indexes to keep 70 | """ 71 | # print('none soft') 72 | if dets.shape[0] == 0: 73 | return [] 74 | 75 | x1 = dets[:, 0] 76 | y1 = dets[:, 1] 77 | x2 = dets[:, 2] 78 | y2 = dets[:, 3] 79 | scores = dets[:, 4] 80 | 81 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 82 | order = scores.argsort()[::-1] 83 | 84 | keep = [] 85 | while order.size > 0: 86 | i = order[0] 87 | keep.append(i) 88 | xx1 = np.maximum(x1[i], x1[order[1:]]) 89 | yy1 = np.maximum(y1[i], y1[order[1:]]) 90 | xx2 = np.minimum(x2[i], x2[order[1:]]) 91 | yy2 = np.minimum(y2[i], y2[order[1:]]) 92 | 93 | w = np.maximum(0.0, xx2 - xx1 + 1) 94 | h = np.maximum(0.0, yy2 - yy1 + 1) 95 | inter = w * h 96 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 97 | 98 | inds = np.where(ovr <= thresh)[0] 99 | order = order[inds + 1] 100 | 101 | dets = dets[keep, :] 102 | return dets -------------------------------------------------------------------------------- /resnext.py: -------------------------------------------------------------------------------- 1 | ''' 2 | New for ResNeXt: 3 | 1. Wider bottleneck 4 | 2. Add group for conv2 5 | ''' 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | from model.utils.config import cfg 11 | from model.faster_rcnn.faster_rcnn import _fasterRCNN 12 | 13 | import torch 14 | import torch.nn as nn 15 | import torch.nn.functional as F 16 | from torch.autograd import Variable 17 | import math 18 | import torch.utils.model_zoo as model_zoo 19 | import pdb 20 | 21 | __all__ = ['ResNeXt', 'resnext18', 'resnext34', 'resnext50', 'resnext101', 22 | 'resnext152'] 23 | 24 | # model_urls = { 25 | # 'resnext101_32x4d': 'https://data.lip6.fr/cadene/pretrainedmodels/resnext101_32x4d-29e315fa.pth', 26 | # 'resnext101_64x4d': 'https://data.lip6.fr/cadene/pretrainedmodels/resnext101_64x4d-e77a0586.pth', 27 | # } 28 | 29 | def conv3x3(in_planes, out_planes, stride=1): 30 | """3x3 convolution with padding""" 31 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 32 | padding=1, bias=False) 33 | 34 | class BasicBlock(nn.Module): 35 | expansion = 1 36 | 37 | def __init__(self, inplanes, planes, stride=1, downsample=None, num_group=32): 38 | super(BasicBlock, self).__init__() 39 | self.conv1 = conv3x3(inplanes, planes*2, stride) 40 | self.bn1 = nn.BatchNorm2d(planes*2) 41 | self.relu = nn.ReLU(inplace=True) 42 | self.conv2 = conv3x3(planes*2, planes*2, groups=num_group) 43 | self.bn2 = nn.BatchNorm2d(planes*2) 44 | self.downsample = downsample 45 | self.stride = stride 46 | 47 | def forward(self, x): 48 | residual = x 49 | 50 | out = self.conv1(x) 51 | out = self.bn1(out) 52 | out = self.relu(out) 53 | 54 | out = self.conv2(out) 55 | out = self.bn2(out) 56 | 57 | if self.downsample is not None: 58 | residual = self.downsample(x) 59 | 60 | out += residual 61 | out = self.relu(out) 62 | 63 | return out 64 | 65 | 66 | class Bottleneck(nn.Module): 67 | expansion = 4 68 | 69 | def __init__(self, inplanes, planes, stride=1, downsample=None, num_group=32): 70 | super(Bottleneck, self).__init__() 71 | self.conv1 = nn.Conv2d(inplanes, planes*2, kernel_size=1, bias=False) 72 | self.bn1 = nn.BatchNorm2d(planes*2) 73 | self.conv2 = nn.Conv2d(planes*2, planes*2, kernel_size=3, stride=stride, 74 | padding=1, bias=False, groups=num_group) 75 | self.bn2 = nn.BatchNorm2d(planes*2) 76 | self.conv3 = nn.Conv2d(planes*2, planes * 4, kernel_size=1, bias=False) 77 | self.bn3 = nn.BatchNorm2d(planes * 4) 78 | self.relu = nn.ReLU(inplace=True) 79 | self.downsample = downsample 80 | self.stride = stride 81 | 82 | def forward(self, x): 83 | residual = x 84 | 85 | out = self.conv1(x) 86 | out = self.bn1(out) 87 | out = self.relu(out) 88 | 89 | out = self.conv2(out) 90 | out = self.bn2(out) 91 | out = self.relu(out) 92 | 93 | out = self.conv3(out) 94 | out = self.bn3(out) 95 | 96 | if self.downsample is not None: 97 | residual = self.downsample(x) 98 | 99 | out += residual 100 | out = self.relu(out) 101 | 102 | return out 103 | 104 | 105 | class ResNeXt(nn.Module): 106 | 107 | def __init__(self, block, layers, num_classes=1000, num_group=32): 108 | self.inplanes = 64 109 | super(ResNeXt, self).__init__() 110 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 111 | bias=False) 112 | self.bn1 = nn.BatchNorm2d(64) 113 | self.relu = nn.ReLU(inplace=True) 114 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 115 | self.layer1 = self._make_layer(block, 64, layers[0], num_group) 116 | self.layer2 = self._make_layer(block, 128, layers[1], num_group, stride=2) 117 | self.layer3 = self._make_layer(block, 256, layers[2], num_group, stride=2) 118 | self.layer4 = self._make_layer(block, 512, layers[3], num_group, stride=2) 119 | self.avgpool = nn.AvgPool2d(7, stride=1) 120 | self.fc = nn.Linear(512 * block.expansion, num_classes) 121 | 122 | for m in self.modules(): 123 | if isinstance(m, nn.Conv2d): 124 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 125 | m.weight.data.normal_(0, math.sqrt(2. / n)) 126 | elif isinstance(m, nn.BatchNorm2d): 127 | m.weight.data.fill_(1) 128 | m.bias.data.zero_() 129 | 130 | def _make_layer(self, block, planes, blocks, num_group, stride=1): 131 | downsample = None 132 | if stride != 1 or self.inplanes != planes * block.expansion: 133 | downsample = nn.Sequential( 134 | nn.Conv2d(self.inplanes, planes * block.expansion, 135 | kernel_size=1, stride=stride, bias=False), 136 | nn.BatchNorm2d(planes * block.expansion), 137 | ) 138 | 139 | layers = [] 140 | layers.append(block(self.inplanes, planes, stride, downsample, num_group=num_group)) 141 | self.inplanes = planes * block.expansion 142 | for i in range(1, blocks): 143 | layers.append(block(self.inplanes, planes, num_group=num_group)) 144 | 145 | return nn.Sequential(*layers) 146 | 147 | def forward(self, x): 148 | x = self.conv1(x) 149 | x = self.bn1(x) 150 | x = self.relu(x) 151 | x = self.maxpool(x) 152 | 153 | x = self.layer1(x) 154 | x = self.layer2(x) 155 | x = self.layer3(x) 156 | x = self.layer4(x) 157 | 158 | x = self.avgpool(x) 159 | x = x.view(x.size(0), -1) 160 | x = self.fc(x) 161 | 162 | return x 163 | 164 | 165 | def resnext18(**kwargs): 166 | """Constructs a ResNeXt-18 model. 167 | """ 168 | model = ResNeXt(BasicBlock, [2, 2, 2, 2], **kwargs) 169 | return model 170 | 171 | 172 | def resnext34(**kwargs): 173 | """Constructs a ResNeXt-34 model. 174 | """ 175 | model = ResNeXt(BasicBlock, [3, 4, 6, 3], **kwargs) 176 | return model 177 | 178 | 179 | def resnext50(**kwargs): 180 | """Constructs a ResNeXt-50 model. 181 | """ 182 | model = ResNeXt(Bottleneck, [3, 4, 6, 3], **kwargs) 183 | return model 184 | 185 | 186 | def resnext101_32x4d(**kwargs): 187 | """Constructs a ResNeXt-101 model. 188 | """ 189 | model = ResNeXt(Bottleneck, [3, 4, 23, 3], num_group=32, **kwargs) 190 | return model 191 | 192 | def resnext101_64x4d(**kwargs): 193 | """Constructs a ResNeXt-101 model. 194 | """ 195 | model = ResNeXt(Bottleneck, [3, 4, 23, 3], num_group=64, **kwargs) 196 | return model 197 | 198 | 199 | def resnext152(**kwargs): 200 | """Constructs a ResNeXt-152 model. 201 | """ 202 | model = ResNeXt(Bottleneck, [3, 8, 36, 3], **kwargs) 203 | return model 204 | 205 | class resnext(_fasterRCNN): 206 | def __init__(self, classes, num_layers=101, pretrained=False, class_agnostic=False): 207 | #self.model_path = 'data/pretrained_model/resnext101_64x4d-e77a0586.pth' 208 | self.model_path = 'data/pretrained_model/resnext101_32x4d-29e315fa.pth' 209 | self.dout_base_model = 1024 210 | self.pretrained = pretrained 211 | self.class_agnostic = class_agnostic 212 | 213 | _fasterRCNN.__init__(self, classes, class_agnostic) 214 | 215 | def _init_modules(self): 216 | #resnext = resnext101_64x4d() 217 | resnext = resnext101_32x4d() 218 | 219 | if self.pretrained == True: 220 | print("Loading pretrained weights from %s" %(self.model_path)) 221 | #state_dict = torch.load(self.model_path) 222 | #resnext.load_state_dict({k:v for k,v in state_dict.items() if k in resnext.state_dict()}) 223 | pretrained_dict = torch.load(self.model_path) 224 | new = list(pretrained_dict.items()) 225 | my_model_kvpair = resnext.state_dict() 226 | cnt = 0 227 | for key, value in my_model_kvpair.items(): 228 | layer_name, weights = new[cnt] 229 | my_model_kvpair[key] = weights 230 | cnt += 1 231 | resnext.load_state_dict(my_model_kvpair) 232 | 233 | # Build resnext. 234 | self.RCNN_base = nn.Sequential(resnext.conv1, resnext.bn1,resnext.relu, 235 | resnext.maxpool,resnext.layer1,resnext.layer2,resnext.layer3) 236 | 237 | self.RCNN_top = nn.Sequential(resnext.layer4) 238 | 239 | self.RCNN_cls_score = nn.Linear(2048, self.n_classes) 240 | if self.class_agnostic: 241 | self.RCNN_bbox_pred = nn.Linear(2048, 4) 242 | else: 243 | self.RCNN_bbox_pred = nn.Linear(2048, 4 * self.n_classes) 244 | 245 | # Fix blocks 246 | for p in self.RCNN_base[0].parameters(): p.requires_grad=False 247 | for p in self.RCNN_base[1].parameters(): p.requires_grad=False 248 | 249 | assert (0 <= cfg.RESNEXT.FIXED_BLOCKS < 4) 250 | if cfg.RESNEXT.FIXED_BLOCKS >= 3: 251 | for p in self.RCNN_base[6].parameters(): p.requires_grad=False 252 | if cfg.RESNEXT.FIXED_BLOCKS >= 2: 253 | for p in self.RCNN_base[5].parameters(): p.requires_grad=False 254 | if cfg.RESNEXT.FIXED_BLOCKS >= 1: 255 | for p in self.RCNN_base[4].parameters(): p.requires_grad=False 256 | 257 | def set_bn_fix(m): 258 | classname = m.__class__.__name__ 259 | if classname.find('BatchNorm') != -1: 260 | for p in m.parameters(): p.requires_grad=False 261 | 262 | self.RCNN_base.apply(set_bn_fix) 263 | self.RCNN_top.apply(set_bn_fix) 264 | 265 | def train(self, mode=True): 266 | # Override train so that the training mode is set as we want 267 | nn.Module.train(self, mode) 268 | if mode: 269 | # Set fixed blocks to be in eval mode 270 | self.RCNN_base.eval() 271 | self.RCNN_base[4].train() 272 | self.RCNN_base[5].train() 273 | self.RCNN_base[6].train() 274 | 275 | def set_bn_eval(m): 276 | classname = m.__class__.__name__ 277 | if classname.find('BatchNorm') != -1: 278 | m.eval() 279 | 280 | self.RCNN_base.apply(set_bn_eval) 281 | self.RCNN_top.apply(set_bn_eval) 282 | 283 | def _head_to_tail(self, pool5): 284 | fc7 = self.RCNN_top(pool5).mean(3).mean(2) 285 | return fc7 286 | -------------------------------------------------------------------------------- /show_boundingbox_on_pic.py: -------------------------------------------------------------------------------- 1 | #!coding=utf-8 2 | ##################################### 3 | # 对训练集合进行可视化 4 | ##################################### 5 | import cv2 6 | import pandas as pd 7 | 8 | #在图上画框 9 | def showPicResult(image, coords, name_set): 10 | img = cv2.imread(image) 11 | for i in range(len(coords)): 12 | x1=coords[i][0] - coords[i][2]/2 13 | y1=coords[i][1] - coords[i][3]/2 14 | x2=coords[i][0] + coords[i][2]/2 15 | y2=coords[i][1] + coords[i][3]/2 16 | name = name_set[i] 17 | cv2.rectangle(img,(int(x1),int(y1)),(int(x2),int(y2)),(0,255,0),3) 18 | cv2.putText(img,name,(int(x1),int(y1+20)),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2) 19 | cv2.namedWindow("image_detector", 1) #1表示原图 20 | cv2.moveWindow("image_detector",0,0) 21 | cv2.resizeWindow("image_detector", 256,192) #可视化的图片大小 22 | cv2.imshow('image_detector', img) 23 | cv2.waitKey(0) #表示等待500ms,0表示一直等待直到按键 24 | cv2.destroyAllWindows() 25 | 26 | #转化为预测的框格式 27 | def transfer(coords): 28 | ''' 29 | 输入: 30 | coords:坐标,形式为"x_min_y_min_x_max_y_max" 31 | 输出: 32 | 转换后的坐标:(x,y,w,h) x和y分别为中心横纵坐标,w为框的宽度,h为高度 33 | ''' 34 | coords = coords.split('_') 35 | x_min = float(coords[0]) 36 | y_min = float(coords[1]) 37 | x_max = float(coords[2]) 38 | y_max = float(coords[3]) 39 | 40 | transfered_x = (x_min + x_max)/2 41 | transfered_y = (y_min + y_max)/2 42 | transfered_w = x_max - x_min 43 | transfered_h = y_max - y_min 44 | 45 | return (transfered_x,transfered_y,transfered_w,transfered_h) 46 | 47 | if __name__ == "__main__": 48 | import os 49 | txt_path = './txt' 50 | pic_root_path = './train' 51 | for parent, _, files in os.walk(txt_path): 52 | for file in files: 53 | pic_file = open(os.path.join(parent, file), 'r') 54 | ori_img = pic_root_path+'/'+file.split('.')[0]+'.jpg' 55 | coords_set = [] 56 | name_set = [] 57 | for line in pic_file.readlines(): 58 | name = line.split(' ')[0] 59 | coord = line.split(' ')[1]+'_'+line.split(' ')[2]+'_'+line.split(' ')[3]+'_'+line.split(' ')[4] 60 | coords_set.append(transfer(coord)) 61 | name_set.append(name) 62 | print(file) 63 | showPicResult(ori_img, coords_set, name_set) 64 | 65 | 66 | --------------------------------------------------------------------------------