├── DataAugmentForObejctDetection.py
├── README.md
├── densenet.py
├── merge_box
    ├── csv_2_txt_for_merge.py
    └── merge_res.py
├── nms.py
├── resnext.py
└── show_boundingbox_on_pic.py


/DataAugmentForObejctDetection.py:
--------------------------------------------------------------------------------
  1 | # -*- coding=utf-8 -*-
  2 | ##############################################################
  3 | # description:
  4 | #     data augmentation for obeject detection
  5 | # author:
  6 | #     maozezhong 2018-6-27
  7 | ##############################################################
  8 | 
  9 | # 包括:
 10 | #     1. 裁剪(需改变bbox)
 11 | #     2. 平移(需改变bbox)
 12 | #     3. 改变亮度
 13 | #     4. 加噪声
 14 | #     5. 旋转角度(需要改变bbox)
 15 | # 注意:   
 16 | #     random.seed(),相同的seed,产生的随机数是一样的!!
 17 | 
 18 | import time
 19 | import random
 20 | import cv2
 21 | import os
 22 | import math
 23 | import numpy as np
 24 | from skimage.util import random_noise
 25 | from skimage import exposure
 26 | 
 27 | def show_pic(img, bboxes, names):
 28 |     '''
 29 |     输入:
 30 |         img:图像array
 31 |         bboxes:图像的所有boudning box list, 格式为[[x_min, y_min, x_max, y_max]....]
 32 |         names:每个box对应的名称
 33 |     '''
 34 |     cv2.imwrite('./1.jpg', img)
 35 |     img = cv2.imread('./1.jpg')
 36 |     for i in range(len(bboxes)):
 37 |         bbox = bboxes[i]
 38 |         name = names[i]
 39 |         x_min = bbox[0]
 40 |         y_min = bbox[1]
 41 |         x_max = bbox[2]
 42 |         y_max = bbox[3]
 43 |         cv2.rectangle(img,(int(x_min),int(y_min)),(int(x_max),int(y_max)),(0,255,0),3) 
 44 |         cv2.putText(img,name,(int(x_min),int(y_min+20)),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2)
 45 |     cv2.imshow('pic', img)
 46 |     cv2.waitKey(0)
 47 |     cv2.destroyAllWindows() 
 48 |     os.remove('./1.jpg')
 49 | 
 50 | # 图像均为cv2读取
 51 | class DataAugmentForObjectDetection():
 52 |     def __init__(self, rotation_rate=0.5, max_rotation_angle=5, 
 53 |                 crop_rate=0.5, shift_rate=0.5, change_light_rate=0.5,
 54 |                 add_noise_rate=0.5):
 55 |         self.rotation_rate = rotation_rate
 56 |         self.max_rotation_angle = max_rotation_angle
 57 |         self.crop_rate = crop_rate
 58 |         self.shift_rate = shift_rate
 59 |         self.change_light_rate = change_light_rate
 60 |         self.add_noise_rate = add_noise_rate
 61 |     
 62 |     # 加噪声
 63 |     def _addNoise(self, img):
 64 |         '''
 65 |         输入:
 66 |             img:图像array
 67 |         输出:
 68 |             加噪声后的图像array,由于输出的像素是在[0,1]之间,所以得乘以255
 69 |         '''
 70 |         # random.seed(int(time.time())) 
 71 |         # return random_noise(img, mode='gaussian', seed=int(time.time()), clip=True)*255
 72 |         return random_noise(img, mode='gaussian', clip=True)*255
 73 | 
 74 |     
 75 |     # 调整亮度
 76 |     def _changeLight(self, img):
 77 |         # random.seed(int(time.time()))
 78 |         flag = random.uniform(0.5, 1.5) #flag>1为调暗,小于1为调亮
 79 |         return exposure.adjust_gamma(img, flag)
 80 |     
 81 |     # 旋转
 82 |     def _rotate_img_bbox(self, img, bboxes, angle=5, scale=1.):
 83 |         '''
 84 |         参考:https://blog.csdn.net/u014540717/article/details/53301195
 85 |         输入:
 86 |             img:图像array,(h,w,c)
 87 |             bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值
 88 |             angle:旋转角度
 89 |             scale:默认1
 90 |         输出:
 91 |             rot_img:旋转后的图像array
 92 |             rot_bboxes:旋转后的boundingbox坐标list
 93 |         '''
 94 |         #---------------------- 旋转图像 ----------------------
 95 |         w = img.shape[1]
 96 |         h = img.shape[0]
 97 |         # 角度变弧度
 98 |         rangle = np.deg2rad(angle)  # angle in radians
 99 |         # now calculate new image width and height
100 |         nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
101 |         nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
102 |         # ask OpenCV for the rotation matrix
103 |         rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
104 |         # calculate the move from the old center to the new center combined
105 |         # with the rotation
106 |         rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5,0]))
107 |         # the move only affects the translation, so update the translation
108 |         # part of the transform
109 |         rot_mat[0,2] += rot_move[0]
110 |         rot_mat[1,2] += rot_move[1]
111 |         # 仿射变换
112 |         rot_img = cv2.warpAffine(img, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))), flags=cv2.INTER_LANCZOS4)
113 | 
114 |         #---------------------- 矫正bbox坐标 ----------------------
115 |         # rot_mat是最终的旋转矩阵
116 |         # 获取原始bbox的四个中点，然后将这四个点转换到旋转后的坐标系下
117 |         rot_bboxes = list()
118 |         for bbox in bboxes:
119 |             xmin = bbox[0]
120 |             ymin = bbox[1]
121 |             xmax = bbox[2]
122 |             ymax = bbox[3]
123 |             point1 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymin, 1]))
124 |             point2 = np.dot(rot_mat, np.array([xmax, (ymin+ymax)/2, 1]))
125 |             point3 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymax, 1]))
126 |             point4 = np.dot(rot_mat, np.array([xmin, (ymin+ymax)/2, 1]))
127 |             # 合并np.array
128 |             concat = np.vstack((point1, point2, point3, point4))
129 |             # 改变array类型
130 |             concat = concat.astype(np.int32)
131 |             # 得到旋转后的坐标
132 |             rx, ry, rw, rh = cv2.boundingRect(concat)
133 |             rx_min = rx
134 |             ry_min = ry
135 |             rx_max = rx+rw
136 |             ry_max = ry+rh
137 |             # 加入list中
138 |             rot_bboxes.append([rx_min, ry_min, rx_max, ry_max])
139 |         
140 |         return rot_img, rot_bboxes
141 | 
142 |     # 裁剪
143 |     def _crop_img_bboxes(self, img, bboxes):
144 |         '''
145 |         裁剪后的图片要包含所有的框
146 |         输入:
147 |             img:图像array
148 |             bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值
149 |         输出:
150 |             crop_img:裁剪后的图像array
151 |             crop_bboxes:裁剪后的bounding box的坐标list
152 |         '''
153 |         #---------------------- 裁剪图像 ----------------------
154 |         w = img.shape[1]
155 |         h = img.shape[0]
156 |         x_min = w   #裁剪后的包含所有目标框的最小的框
157 |         x_max = 0
158 |         y_min = h
159 |         y_max = 0
160 |         for bbox in bboxes:
161 |             x_min = min(x_min, bbox[0])
162 |             y_min = min(y_min, bbox[1])
163 |             x_max = max(x_max, bbox[2])
164 |             y_max = max(y_max, bbox[3])
165 |         
166 |         d_to_left = x_min           #包含所有目标框的最小框到左边的距离
167 |         d_to_right = w - x_max      #包含所有目标框的最小框到右边的距离
168 |         d_to_top = y_min            #包含所有目标框的最小框到顶端的距离
169 |         d_to_bottom = h - y_max     #包含所有目标框的最小框到底部的距离
170 | 
171 |         #随机扩展这个最小框
172 |         crop_x_min = int(x_min - random.uniform(0, d_to_left))
173 |         crop_y_min = int(y_min - random.uniform(0, d_to_top))
174 |         crop_x_max = int(x_max + random.uniform(0, d_to_right))
175 |         crop_y_max = int(y_max + random.uniform(0, d_to_bottom))
176 | 
177 |         #确保不要越界
178 |         crop_x_min = max(0, crop_x_min)
179 |         crop_y_min = max(0, crop_y_min)
180 |         crop_x_max = min(w, crop_x_max)
181 |         crop_y_max = min(h, crop_y_max)
182 | 
183 |         crop_img = img[crop_y_min:crop_y_max, crop_x_min:crop_x_max]
184 |         
185 |         #---------------------- 裁剪boundingbox ----------------------
186 |         #裁剪后的boundingbox坐标计算
187 |         crop_bboxes = list()
188 |         for bbox in bboxes:
189 |             crop_bboxes.append([bbox[0]-crop_x_min, bbox[1]-crop_y_min, bbox[2]-crop_x_min, bbox[3]-crop_y_min])
190 |         
191 |         return crop_img, crop_bboxes
192 |   
193 |     # 平移
194 |     def _shift_pic_bboxes(self, img, bboxes):
195 |         '''
196 |         参考:https://blog.csdn.net/sty945/article/details/79387054
197 |         平移后的图片要包含所有的框
198 |         输入:
199 |             img:图像array
200 |             bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值
201 |         输出:
202 |             shift_img:平移后的图像array
203 |             shift_bboxes:平移后的bounding box的坐标list
204 |         '''
205 |         #---------------------- 平移图像 ----------------------
206 |         w = img.shape[1]
207 |         h = img.shape[0]
208 |         x_min = w   #裁剪后的包含所有目标框的最小的框
209 |         x_max = 0
210 |         y_min = h
211 |         y_max = 0
212 |         for bbox in bboxes:
213 |             x_min = min(x_min, bbox[0])
214 |             y_min = min(y_min, bbox[1])
215 |             x_max = max(x_max, bbox[2])
216 |             y_max = max(y_max, bbox[3])
217 |         
218 |         d_to_left = x_min           #包含所有目标框的最大左移动距离
219 |         d_to_right = w - x_max      #包含所有目标框的最大右移动距离
220 |         d_to_top = y_min            #包含所有目标框的最大上移动距离
221 |         d_to_bottom = h - y_max     #包含所有目标框的最大下移动距离
222 | 
223 |         x = random.uniform(-(d_to_left-1) / 3, (d_to_right-1) / 3)
224 |         y = random.uniform(-(d_to_top-1) / 3, (d_to_bottom-1) / 3)
225 |         
226 |         M = np.float32([[1, 0, x], [0, 1, y]])  #x为向左或右移动的像素值,正为向右负为向左; y为向上或者向下移动的像素值,正为向下负为向上
227 |         shift_img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))
228 | 
229 |         #---------------------- 平移boundingbox ----------------------
230 |         shift_bboxes = list()
231 |         for bbox in bboxes:
232 |             shift_bboxes.append([bbox[0]+x, bbox[1]+y, bbox[2]+x, bbox[3]+y])
233 | 
234 |         return shift_img, shift_bboxes
235 | 
236 |     def dataAugment(self, img, bboxes):
237 |         '''
238 |         图像增强
239 |         输入:
240 |             img:图像array
241 |             bboxes:该图像的所有框坐标
242 |         输出:
243 |             img:增强后的图像
244 |             bboxes:增强后图片对应的box
245 |         '''
246 |         change_num = 0  #改变的次数
247 |         # print('------')
248 |         # random.seed(int(time.time()))
249 |         if random.random() < self.crop_rate:        #裁剪
250 |             # print('裁剪')
251 |             change_num += 1
252 |             img, bboxes = self._crop_img_bboxes(img, bboxes)
253 | 
254 |         if random.random() > self.rotation_rate:    #旋转
255 |             # print('旋转')
256 |             change_num += 1
257 |             angle = random.uniform(-self.max_rotation_angle, self.max_rotation_angle)
258 |             scale = random.uniform(0.7, 0.8)
259 |             img, bboxes = self._rotate_img_bbox(img, bboxes, angle, scale)
260 |         
261 |         if random.random() < self.shift_rate:        #平移
262 |             # print('平移')
263 |             change_num += 1
264 |             img, bboxes = self._shift_pic_bboxes(img, bboxes)
265 |         
266 |         if random.random() > self.change_light_rate: #改变亮度
267 |             # print('亮度')
268 |             change_num += 1
269 |             img = self._changeLight(img)
270 | 
271 |         if random.random() < self.add_noise_rate:    #加噪声
272 |             # print('加噪声')
273 |             change_num += 1
274 |             img = self._addNoise(img)
275 |         # print('------')
276 |         return img, bboxes, change_num
277 |             
278 | 
279 | if __name__ == '__main__':
280 |     import shutil
281 |     # test
282 |     dataAug = DataAugmentForObjectDetection()
283 |     agument_num = 1
284 |     source_txt_root_path = '/home/maozezhong/Desktop/baidu_fusai/data/datasets/txt'
285 |     source_pic_root_path = '/home/maozezhong/Desktop/baidu_fusai/data/datasets/train'
286 |     target_pic_root_path = './data_augment/JPEGImages'
287 |     target_txt_root_path = './data_augment/txt'
288 |     if os.path.exists(target_pic_root_path):
289 |         shutil.rmtree(target_pic_root_path)
290 |     if os.path.exists(target_txt_root_path):
291 |         shutil.rmtree(target_txt_root_path)
292 |     os.mkdir(target_pic_root_path)
293 |     os.mkdir(target_txt_root_path)
294 |     cnt = 0
295 |     for parent, _, files in os.walk(source_txt_root_path):
296 |         for file in files:
297 |             cnt += 1
298 |             pic_path = os.path.join(source_pic_root_path, file.split('.')[0]+'.jpg')
299 |             txt_path = os.path.join(parent, file)
300 |             txt_file = open(txt_path, 'r')
301 |             contents = txt_file.readlines()
302 |             bboxes = list()
303 |             names = list()
304 |             for content in contents:
305 |                 content = content.strip()
306 |                 x_min = int(float(content.split(' ')[1]))
307 |                 y_min = int(float(content.split(' ')[2]))
308 |                 x_max = int(float(content.split(' ')[3]))
309 |                 y_max = int(float(content.split(' ')[4]))
310 |                 bboxes.append([x_min, y_min, x_max, y_max])
311 |                 names.append(content.split(' ')[0])
312 |             img = cv2.imread(pic_path)
313 |             # 原图可视化
314 |             # show_pic(img, bboxes, names)
315 | 
316 |             i = 0
317 |             while i < agument_num:
318 | 
319 |                 # 数据增强后的图
320 |                 changed_img, changed_bboxes, change_num = dataAug.dataAugment(img, bboxes)
321 |                 # show_pic(changed_img, changed_bboxes, names)
322 | 
323 |                 #必须得有一个改变
324 |                 if change_num == 0:
325 |                     continue
326 |                 i += 1
327 |                 
328 |                 # 写入txt
329 |                 target_txt_path = os.path.join(target_txt_root_path, file.split('.')[0]+'_'+str(i)+'.txt')
330 |                 target_txt_file = open(target_txt_path, 'w')
331 |                 for ii in range(len(changed_bboxes)):
332 |                     bbox = changed_bboxes[ii]
333 |                     content = names[ii] + ' ' + str(int(bbox[0]))+' ' + str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n'
334 |                     target_txt_file.write(content)
335 |                 # 写入pic
336 |                 target_pic_path = os.path.join(target_pic_root_path, file.split('.')[0]+'_'+str(i)+'.jpg')
337 |                 cv2.imwrite(target_pic_path, changed_img)
338 | 
339 |             print(str(cnt)+'/'+str(len(files)))
340 |         print('done!')
341 | 
342 | 
343 | 
344 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## 比赛介绍
 2 | - 针对检测+分类任务，我们提供9000张带有位置信息和类别信息的图像数据用于训练，4351张图像用于评估测试。该数据集全部来源于百度地图淘金，选取了60类常见品牌类别。比如，肯德基，星巴克，耐克等。
 3 | 
 4 | ## 比赛历程
 5 | - 7月13号结束,7月初开始做.中间尝试了
 6 |     - [yolo](https://github.com/pjreddie/darknet),
 7 |     - [faster-rcnn pytorch版本](https://github.com/jwyang/faster-rcnn.pytorch)
 8 |     - [dcn](https://github.com/msracver/Deformable-ConvNets)
 9 |     - [retinanet keras版本](https://github.com/fizyr/keras-retinanet)
10 |     - fpn等
11 | - 最后还是tow-stage的faster-rcnn正确度高(至少在我的实验中是这样,当然时间有限,设备有限单卡1080,实验结论不完备)
12 | - 数据处理方面 : 使用了针对检测的数据增强,包括旋转,平移,加噪,改亮度,具体实现见DataAugmentForObejctDetection.py这个脚本
13 | - trick方面 : 1)softnms, 2)模型融合(具体见merge_box中的脚本)
14 | - batchsize基本上是1,设备受限上不去了; lr初始一般设的0.001, 每5轮降为原来的十分之一; 输入尺度试过600和800
15 | - 最后线上为0.8576,排名23,没苟进决赛,哎...
16 | 
17 | ## 脚本说明:
18 | - merge_box:
19 |     - csv_2_txt_for_merge.py : 根据结果csv产生中间txt文件
20 |     - merge_res.py : 融合并产生最终csv结果文件
21 | - show_boundingbox_on_pic.py : 可视化脚本
22 | - DataAugmentForObejctDetection.py : 数据增强脚本
23 | - densenet.py : pytorch, 基于densenet backbone的faster rcnn模型结构(未实验), 参考[vision/torchvision/models/densenet.py](https://github.com/pytorch/vision/blob/master/torchvision/models/densenet.py)
24 | - resnext.py : pytorch, 基于resnext backbone的faster rcnn模型结构, 参考[ResNeXt-PyTorch/resnext.py](https://github.com/miraclewkf/ResNeXt-PyTorch/blob/master/resnext.py), 其实就是在resnet的基础上加了多通道并行.
25 | - nms.py : 常规nms,以及softnms
26 | 
27 | ## 学习姿势
28 | - faster**多尺度训练,多尺度预测**,**tesnsorlayer数据增强**,结果ensemble可以到89
29 | - 基于fpn的faster-rcnn
30 | - detectron单模型可以到89
31 | - predict的时候augment, detectron中的config文件下有例子
32 | - 调参方法,何凯明论文,detctron有论文链接
33 | - **增强后的数据作为验证集**,把60类验证集AP保存,取每一类最好的ap的模型进行集成
34 | - 数据增强库 : [imgaug](https://blog.csdn.net/u012897374/article/details/80142744), emmm...应该比我自己整的靠谱点
35 | - 调整分类和bbox的loss权重
36 | - 使用sniper模型
37 | - ssd上89!!!但是没说用了啥技巧....
38 | 
39 | ## 所有代码链接
40 | - [baiduyun(part1:models部分)](https://pan.baidu.com/s/1BaXyPzJkpRCMlsC2saDnnA)
41 | - [baiduyun(part2:data)](https://pan.baidu.com/s/1k9E_KsEtz5f0lbzX_2OjBg)
42 | 
43 | ## to do list
44 | - [x] 使用detectron
45 | - [X] [transfer to coco data](https://blog.csdn.net/qq_15969343/article/details/80848175)
46 | - [ ] test aug
47 | - [ ] 数据增强,[sampleParing and mixup](https://kexue.fm/)
48 | 
49 | 


--------------------------------------------------------------------------------
/densenet.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | from model.utils.config import cfg
  6 | from model.faster_rcnn.faster_rcnn import _fasterRCNN
  7 | 
  8 | from torch.autograd import Variable
  9 | import math
 10 | import pdb
 11 | 
 12 | import re
 13 | import torch
 14 | import torch.nn as nn
 15 | import torch.nn.functional as F
 16 | import torch.utils.model_zoo as model_zoo
 17 | from collections import OrderedDict
 18 | import os
 19 | 
 20 | __all__ = ['DenseNet', 'densenet121', 'densenet169', 'densenet201', 'densenet161']
 21 | 
 22 | 
 23 | model_urls = {
 24 |     'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth',
 25 |     'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth',
 26 |     'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth',
 27 |     'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth',
 28 | }
 29 | 
 30 | 
 31 | def densenet121(pretrained=False, **kwargs):
 32 |     r"""Densenet-121 model from
 33 |     `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
 34 |     Args:
 35 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
 36 |     """
 37 |     model_path = 'data/pretrained_model/densenet121.pth'
 38 |     model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16),
 39 |                      **kwargs)
 40 |     if pretrained:
 41 |         # '.'s are no longer allowed in module names, but pervious _DenseLayer
 42 |         # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
 43 |         # They are also in the checkpoints in model_urls. This pattern is used
 44 |         # to find such keys.
 45 |         pattern = re.compile(
 46 |             r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
 47 |         if os.path.exists(model_path):
 48 |             state_dict = torch.load(model_path)
 49 |         else:
 50 |             state_dict = model_zoo.load_url(model_urls['densenet121'])
 51 |         for key in list(state_dict.keys()):
 52 |             res = pattern.match(key)
 53 |             if res:
 54 |                 new_key = res.group(1) + res.group(2)
 55 |                 state_dict[new_key] = state_dict[key]
 56 |                 del state_dict[key]
 57 |         model.load_state_dict(state_dict)
 58 |     return model
 59 | 
 60 | 
 61 | def densenet169(pretrained=False, **kwargs):
 62 |     r"""Densenet-169 model from
 63 |     `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
 64 |     Args:
 65 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
 66 |     """
 67 |     model_path = 'data/pretrained_model/densenet169.pth'
 68 |     model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 32, 32),
 69 |                      **kwargs)
 70 |     if pretrained:
 71 |         # '.'s are no longer allowed in module names, but pervious _DenseLayer
 72 |         # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
 73 |         # They are also in the checkpoints in model_urls. This pattern is used
 74 |         # to find such keys.
 75 |         pattern = re.compile(
 76 |             r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
 77 |         if os.path.exists(model_path):
 78 |             state_dict = torch.load(model_path)
 79 |         else:
 80 |             state_dict = model_zoo.load_url(model_urls['densenet169'])
 81 |         for key in list(state_dict.keys()):
 82 |             res = pattern.match(key)
 83 |             if res:
 84 |                 new_key = res.group(1) + res.group(2)
 85 |                 state_dict[new_key] = state_dict[key]
 86 |                 del state_dict[key]
 87 |         model.load_state_dict(state_dict)
 88 |     return model
 89 | 
 90 | 
 91 | def densenet201(pretrained=False, **kwargs):
 92 |     r"""Densenet-201 model from
 93 |     `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
 94 |     Args:
 95 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
 96 |     """
 97 |     model_path = 'data/pretrained_model/densenet201.pth'
 98 |     model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 48, 32),
 99 |                      **kwargs)
100 |     if pretrained:
101 |         # '.'s are no longer allowed in module names, but pervious _DenseLayer
102 |         # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
103 |         # They are also in the checkpoints in model_urls. This pattern is used
104 |         # to find such keys.
105 |         pattern = re.compile(
106 |             r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
107 |         if os.path.exists(model_path):
108 |             state_dict = torch.load(model_path)
109 |         else:
110 |             state_dict = model_zoo.load_url(model_urls['densenet201'])
111 |         for key in list(state_dict.keys()):
112 |             res = pattern.match(key)
113 |             if res:
114 |                 new_key = res.group(1) + res.group(2)
115 |                 state_dict[new_key] = state_dict[key]
116 |                 del state_dict[key]
117 |         model.load_state_dict(state_dict)
118 |     return model
119 | 
120 | 
121 | def densenet161(pretrained=False, **kwargs):
122 |     r"""Densenet-161 model from
123 |     `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
124 |     Args:
125 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
126 |     """
127 |     model_path = 'data/pretrained_model/densenet161.pth'
128 |     model = DenseNet(num_init_features=96, growth_rate=48, block_config=(6, 12, 36, 24),
129 |                      **kwargs)
130 |     if pretrained:
131 |         # '.'s are no longer allowed in module names, but pervious _DenseLayer
132 |         # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
133 |         # They are also in the checkpoints in model_urls. This pattern is used
134 |         # to find such keys.
135 |         pattern = re.compile(
136 |             r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
137 |         if os.path.exists(model_path):
138 |             state_dict = torch.load(model_path)
139 |         else:
140 |             state_dict = model_zoo.load_url(model_urls['densenet161'])
141 |         for key in list(state_dict.keys()):
142 |             res = pattern.match(key)
143 |             if res:
144 |                 new_key = res.group(1) + res.group(2)
145 |                 state_dict[new_key] = state_dict[key]
146 |                 del state_dict[key]
147 |         model.load_state_dict(state_dict)
148 |     return model
149 | 
150 | 
151 | class _DenseLayer(nn.Sequential):
152 |     def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):
153 |         super(_DenseLayer, self).__init__()
154 |         self.add_module('norm1', nn.BatchNorm2d(num_input_features)),
155 |         self.add_module('relu1', nn.ReLU(inplace=True)),
156 |         self.add_module('conv1', nn.Conv2d(num_input_features, bn_size *
157 |                         growth_rate, kernel_size=1, stride=1, bias=False)),
158 |         self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)),
159 |         self.add_module('relu2', nn.ReLU(inplace=True)),
160 |         self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate,
161 |                         kernel_size=3, stride=1, padding=1, bias=False)),
162 |         self.drop_rate = drop_rate
163 | 
164 |     def forward(self, x):
165 |         new_features = super(_DenseLayer, self).forward(x)
166 |         if self.drop_rate > 0:
167 |             new_features = F.dropout(new_features, p=self.drop_rate, training=self.training)
168 |         return torch.cat([x, new_features], 1)
169 | 
170 | 
171 | class _DenseBlock(nn.Sequential):
172 |     def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate):
173 |         super(_DenseBlock, self).__init__()
174 |         for i in range(num_layers):
175 |             layer = _DenseLayer(num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate)
176 |             self.add_module('denselayer%d' % (i + 1), layer)
177 | 
178 | 
179 | class _Transition(nn.Sequential):
180 |     def __init__(self, num_input_features, num_output_features):
181 |         super(_Transition, self).__init__()
182 |         self.add_module('norm', nn.BatchNorm2d(num_input_features))
183 |         self.add_module('relu', nn.ReLU(inplace=True))
184 |         self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
185 |                                           kernel_size=1, stride=1, bias=False))
186 |         self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))
187 | 
188 | 
189 | class DenseNet(nn.Module):
190 |     r"""Densenet-BC model class, based on
191 |     `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
192 |     Args:
193 |         growth_rate (int) - how many filters to add each layer (`k` in paper)
194 |         block_config (list of 4 ints) - how many layers in each pooling block
195 |         num_init_features (int) - the number of filters to learn in the first convolution layer
196 |         bn_size (int) - multiplicative factor for number of bottle neck layers
197 |           (i.e. bn_size * k features in the bottleneck layer)
198 |         drop_rate (float) - dropout rate after each dense layer
199 |         num_classes (int) - number of classification classes
200 |     """
201 | 
202 |     def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16),
203 |                  num_init_features=64, bn_size=4, drop_rate=0, num_classes=1000):
204 | 
205 |         super(DenseNet, self).__init__()
206 | 
207 |         # First convolution
208 |         self.features = nn.Sequential(OrderedDict([
209 |             ('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)),
210 |             ('norm0', nn.BatchNorm2d(num_init_features)),
211 |             ('relu0', nn.ReLU(inplace=True)),
212 |             ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
213 |         ]))
214 | 
215 |         # Each denseblock
216 |         num_features = num_init_features
217 |         for i, num_layers in enumerate(block_config):
218 |             block = _DenseBlock(num_layers=num_layers, num_input_features=num_features,
219 |                                 bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)
220 |             self.features.add_module('denseblock%d' % (i + 1), block)
221 |             num_features = num_features + num_layers * growth_rate
222 |             if i != len(block_config) - 1:
223 |                 trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2)
224 |                 self.features.add_module('transition%d' % (i + 1), trans)
225 |                 num_features = num_features // 2
226 | 
227 |         # Final batch norm
228 |         self.features.add_module('norm5', nn.BatchNorm2d(num_features))
229 | 
230 |         # Linear layer
231 |         self.classifier = nn.Linear(num_features, num_classes)
232 | 
233 |         # Official init from torch repo.
234 |         for m in self.modules():
235 |             if isinstance(m, nn.Conv2d):
236 |                 nn.init.kaiming_normal_(m.weight)
237 |             elif isinstance(m, nn.BatchNorm2d):
238 |                 nn.init.constant_(m.weight, 1)
239 |                 nn.init.constant_(m.bias, 0)
240 |             elif isinstance(m, nn.Linear):
241 |                 nn.init.constant_(m.bias, 0)
242 | 
243 |     def forward(self, x):
244 |         features = self.features(x)
245 |         out = F.relu(features, inplace=True)
246 |         out = F.avg_pool2d(out, kernel_size=7, stride=1).view(features.size(0), -1)
247 |         out = self.classifier(out)
248 |         return out
249 | 
250 | class densenet(_fasterRCNN):
251 |   def __init__(self, classes, num_layers=101, pretrained=False, class_agnostic=False):
252 |     self.model_path = 'data/pretrained_model/densenet121.pth'
253 |     self.dout_base_model = 1024
254 |     self.pretrained = pretrained
255 |     self.class_agnostic = class_agnostic
256 | 
257 |     _fasterRCNN.__init__(self, classes, class_agnostic)
258 | 
259 |   def _init_modules(self):
260 |     densenet = densenet121(pretrained=True)
261 | 
262 |     # if self.pretrained == True:
263 |     #   print("Loading pretrained weights from %s" %(self.model_path))
264 |     #   state_dict = torch.load(self.model_path)
265 |     #   densenet.load_state_dict({k:v for k,v in state_dict.items() if k in densenet.state_dict()})
266 | 
267 |     # Build densenet.
268 |     '''
269 |     feature = [init, block1, trans1, block2, trans2, block3, tran3, block4, trans4, norm]
270 |     '''
271 |     self.RCNN_base = nn.Sequential(densenet.features[:-2])
272 | 
273 |     self.RCNN_top = nn.Sequential(densenet.features[-2:])
274 | 
275 |     self.RCNN_cls_score = nn.Linear(2048, self.n_classes)
276 |     if self.class_agnostic:
277 |       self.RCNN_bbox_pred = nn.Linear(2048, 4)
278 |     else:
279 |       self.RCNN_bbox_pred = nn.Linear(2048, 4 * self.n_classes)
280 | 
281 |     # Fix blocks
282 |     for p in self.RCNN_base[0].parameters(): p.requires_grad=False
283 |     
284 |     assert (0 <= cfg.DENSENET.FIXED_LAYERS < 8)
285 |     if cfg.DENSENET.FIXED_LAYERS:
286 |         for i in range(1,1+cfg.DENSENET.FIXED_LAYERS):
287 |             for p in self.RCNN_base[i].parameters(): p.requires_grad=False
288 | 
289 |     def set_bn_fix(m):
290 |       classname = m.__class__.__name__
291 |       if classname.find('BatchNorm') != -1:
292 |         for p in m.parameters(): p.requires_grad=False
293 | 
294 |     self.RCNN_base.apply(set_bn_fix)
295 |     self.RCNN_top.apply(set_bn_fix)
296 | 
297 |   def train(self, mode=True):
298 |     # Override train so that the training mode is set as we want
299 |     nn.Module.train(self, mode)
300 |     if mode:
301 |       # Set fixed blocks to be in eval mode
302 |       self.RCNN_base.eval()
303 |       # Set unfixed blocks to be in train mode
304 |       i = 9
305 |       while i > cfg.DENSENET.FIXED_LAYERS:
306 |           self.RCNN_base[i].train()
307 | 
308 |       def set_bn_eval(m):
309 |         classname = m.__class__.__name__
310 |         if classname.find('BatchNorm') != -1:
311 |           m.eval()
312 | 
313 |       self.RCNN_base.apply(set_bn_eval)
314 |       self.RCNN_top.apply(set_bn_eval)
315 | 
316 | #   def _head_to_tail(self, pool5):
317 | #     fc7 = self.RCNN_top(pool5).mean(3).mean(2)
318 | #     return fc7


--------------------------------------------------------------------------------
/merge_box/csv_2_txt_for_merge.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import os
 3 | import shutil
 4 | 
 5 | target_root_path = './txt_for_merge'
 6 | if os.path.exists(target_root_path):
 7 |     shutil.rmtree(target_root_path)
 8 | os.mkdir(target_root_path)
 9 | 
10 | len_list = list()
11 | for i in range(1,5):
12 |     csv_path = './res'+str(i)+'.csv'
13 |     if i==1:
14 |         data = pd.read_csv(csv_path)
15 |     else:
16 |         data_temp = pd.read_csv(csv_path)
17 |         data = pd.concat([data, data_temp])
18 |     len_list.append(len(data['filename']))
19 | 
20 | # print(len_list)
21 | # print(len(data['filename']))
22 | data.to_csv('./noIndex.csv', index=False)
23 | data = pd.read_csv('./noIndex.csv')
24 | 
25 | index = -1
26 | indexx = 0
27 | xishu = [1, 1, 1, 1]    #加权系数
28 | for i in range(len(data['filename'])):
29 |     pic_name = data['filename'][i]
30 |     label = data['label'][i]
31 |     if i%len_list[indexx]==0:
32 |         index += 1
33 |         indexx += 1
34 |     score = float(data['score'][i]) * xishu[index]
35 |     x_min = data['x_min'][i]
36 |     y_min = data['y_min'][i]
37 |     x_max = data['x_max'][i]
38 |     y_max = data['y_max'][i]
39 |     txt_path = os.path.join(target_root_path, pic_name.split('.')[0]+'.txt')
40 |     with open(txt_path, 'a+') as f:
41 |         content = str(label)+' '+str(score)+ ' '+str(x_min)+' '+str(y_min)+' '+str(x_max)+' '+str(y_max)+'\n'
42 |         f.write(content)
43 |     print('write to txt '+str(i+1)+'/'+str(len(data['filename'])))
44 | 
45 | print('done')
46 | 


--------------------------------------------------------------------------------
/merge_box/merge_res.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | from nms import nms
 4 | import numpy as np
 5 | import cv2
 6 | 
 7 | csv_path = '/home/maozezhong/Desktop/baidu_fusai/data/class_name.csv'
 8 | data = pd.read_csv(csv_path)
 9 | label2name = dict()
10 | for i in range(len(data['label'])):
11 |     label2name[int(data['label'][i])] = data['prefix'][i]
12 | 
13 | def showPicResult(image, coords):  
14 |     img = cv2.imread(image)  
15 |     for i in range(len(coords)):  
16 |         x1=coords[i][0]
17 |         y1=coords[i][1]
18 |         x2=coords[i][2]
19 |         y2=coords[i][3]
20 |         score = coords[i][4]
21 |         score = round(score,2)
22 |         label = coords[i][5]
23 |         name = label2name[label] + ' ' + str(score)
24 |         cv2.rectangle(img,(int(x1),int(y1)),(int(x2),int(y2)),(0,255,0),3) 
25 |         cv2.putText(img,name,(int(x1),int(y1+20)),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2)
26 |     cv2.namedWindow("retinanet_image_detector", 0)  #1的时候是原图
27 |     cv2.moveWindow("retinanet_image_detector",0,0)
28 |     cv2.resizeWindow("retinanet_image_detector", 640, 960);
29 |     cv2.imshow('retinanet_image_detector', img)  
30 |     cv2.waitKey(0)    #表示等待500ms，0表示一直等待直到按键
31 |     cv2.destroyAllWindows()
32 | 
33 | file_names = list()
34 | labels_ = list()
35 | scores_ = list()
36 | x_mins = list()
37 | y_mins = list()
38 | x_maxs = list()
39 | y_maxs = list()
40 | 
41 | txt_path = './txt_for_merge'
42 | test_txt_path = '/home/maozezhong/Desktop/baidu_fusai/data/datasets/test.txt'
43 | with open(test_txt_path, 'r') as f:
44 |     for line in f.readlines():
45 |         pic_name = line.strip()
46 |         img_path = os.path.join('/home/maozezhong/Desktop/baidu_fusai/data/datasets/test',pic_name)
47 |         txt_pic_path = os.path.join(txt_path, pic_name.split('.')[0]+'.txt')
48 |         dets = list()
49 |         with open(txt_pic_path, 'r') as ff:
50 |             for ll in ff.readlines():
51 |                 ll = ll.strip()
52 |                 label = int(ll.split(' ')[0])
53 |                 score = float(ll.split(' ')[1])
54 |                 x_min = int(ll.split(' ')[2])
55 |                 y_min = int(ll.split(' ')[3])
56 |                 x_max = int(ll.split(' ')[4])
57 |                 y_max = int(ll.split(' ')[5])
58 |                 dets.append([x_min,y_min,x_max,y_max,score,label])
59 |         dets = np.array(dets)
60 |         dets = nms(dets)
61 |         for det in dets:
62 |             file_names.append(pic_name)
63 |             labels_.append(int(det[5]))
64 |             scores_.append(det[4])
65 |             x_mins.append(int(det[0]))
66 |             y_mins.append(int(det[1]))
67 |             x_maxs.append(int(det[2]))
68 |             y_maxs.append(int(det[3]))
69 | 
70 |         # 可视化
71 |         # showPicResult(img_path, dets)
72 | 
73 | column = ['filename', 'label', 'score', 'x_min', 'y_min', 'x_max', 'y_max']
74 | dataframe = pd.DataFrame({'filename': file_names, 'label': labels_, 'score' : scores_, 'x_min' : x_mins, 'y_min' : y_mins, 'x_max' : x_maxs, 'y_max' : y_maxs})
75 | dataframe.to_csv('./res_merge.csv', index=False, header=False, columns=column, sep=' ') 


--------------------------------------------------------------------------------
/nms.py:
--------------------------------------------------------------------------------
  1 | # -*- coding=utf-8 -*-
  2 | import numpy as np
  3 | 
  4 | def rescore(overlap, scores, thresh, type='gaussian'):
  5 |     assert overlap.shape[0] == scores.shape[0]
  6 |     if type == 'linear':
  7 |         inds = np.where(overlap >= thresh)[0]
  8 |         scores[inds] = scores[inds] * (1 - overlap[inds])
  9 |     else:
 10 |         scores = scores * np.exp(- overlap**2 / thresh)
 11 | 
 12 |     return scores
 13 | 
 14 | # dets = [x_min, y_min, x_max, y_max, score, label],n行6列的array
 15 | def soft_nms(dets, thresh=.3, max_dets=300):
 16 |     # print('soft')
 17 |     if dets.shape[0] == 0:
 18 |         return np.zeros((0, 5))
 19 | 
 20 |     x1 = dets[:, 0]
 21 |     y1 = dets[:, 1]
 22 |     x2 = dets[:, 2]
 23 |     y2 = dets[:, 3]
 24 |     scores = dets[:, 4]
 25 | 
 26 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 27 |     order = scores.argsort()[::-1]
 28 |     scores = scores[order]
 29 | 
 30 |     if max_dets == -1:
 31 |         max_dets = order.size
 32 | 
 33 |     keep = np.zeros(max_dets, dtype=np.intp)
 34 |     keep_cnt = 0
 35 | 
 36 |     while order.size > 0 and keep_cnt < max_dets:
 37 |         i = order[0]
 38 |         dets[i, 4] = scores[0]
 39 |         xx1 = np.maximum(x1[i], x1[order[1:]])
 40 |         yy1 = np.maximum(y1[i], y1[order[1:]])
 41 |         xx2 = np.minimum(x2[i], x2[order[1:]])
 42 |         yy2 = np.minimum(y2[i], y2[order[1:]])
 43 | 
 44 |         w = np.maximum(0.0, xx2 - xx1 + 1)
 45 |         h = np.maximum(0.0, yy2 - yy1 + 1)
 46 |         inter = w * h
 47 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
 48 | 
 49 |         order = order[1:]
 50 |         scores = rescore(ovr, scores[1:], thresh)
 51 | 
 52 |         tmp = scores.argsort()[::-1]
 53 |         order = order[tmp]
 54 |         scores = scores[tmp]
 55 | 
 56 |         keep[keep_cnt] = i
 57 |         keep_cnt += 1
 58 | 
 59 |     keep = keep[:keep_cnt]
 60 |     dets = dets[keep, :]
 61 |     return dets
 62 | 
 63 | def nms(dets, thresh=.3):
 64 |     """
 65 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
 66 |     rule out overlap >= thresh
 67 |     :param dets: [[x1, y1, x2, y2 score]]
 68 |     :param thresh: retain overlap < thresh
 69 |     :return: indexes to keep
 70 |     """
 71 |     # print('none soft')
 72 |     if dets.shape[0] == 0:
 73 |         return []
 74 | 
 75 |     x1 = dets[:, 0]
 76 |     y1 = dets[:, 1]
 77 |     x2 = dets[:, 2]
 78 |     y2 = dets[:, 3]
 79 |     scores = dets[:, 4]
 80 | 
 81 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 82 |     order = scores.argsort()[::-1]
 83 | 
 84 |     keep = []
 85 |     while order.size > 0:
 86 |         i = order[0]
 87 |         keep.append(i)
 88 |         xx1 = np.maximum(x1[i], x1[order[1:]])
 89 |         yy1 = np.maximum(y1[i], y1[order[1:]])
 90 |         xx2 = np.minimum(x2[i], x2[order[1:]])
 91 |         yy2 = np.minimum(y2[i], y2[order[1:]])
 92 | 
 93 |         w = np.maximum(0.0, xx2 - xx1 + 1)
 94 |         h = np.maximum(0.0, yy2 - yy1 + 1)
 95 |         inter = w * h
 96 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
 97 | 
 98 |         inds = np.where(ovr <= thresh)[0]
 99 |         order = order[inds + 1]
100 | 
101 |     dets = dets[keep, :]
102 |     return dets


--------------------------------------------------------------------------------
/resnext.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | New for ResNeXt:
  3 | 1. Wider bottleneck
  4 | 2. Add group for conv2
  5 | '''
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | from model.utils.config import cfg
 11 | from model.faster_rcnn.faster_rcnn import _fasterRCNN
 12 | 
 13 | import torch
 14 | import torch.nn as nn
 15 | import torch.nn.functional as F
 16 | from torch.autograd import Variable
 17 | import math
 18 | import torch.utils.model_zoo as model_zoo
 19 | import pdb
 20 | 
 21 | __all__ = ['ResNeXt', 'resnext18', 'resnext34', 'resnext50', 'resnext101',
 22 |            'resnext152']
 23 | 
 24 | # model_urls = {
 25 | #   'resnext101_32x4d': 'https://data.lip6.fr/cadene/pretrainedmodels/resnext101_32x4d-29e315fa.pth',
 26 | #   'resnext101_64x4d': 'https://data.lip6.fr/cadene/pretrainedmodels/resnext101_64x4d-e77a0586.pth',
 27 | # }
 28 | 
 29 | def conv3x3(in_planes, out_planes, stride=1):
 30 |     """3x3 convolution with padding"""
 31 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 32 |                      padding=1, bias=False)
 33 | 
 34 | class BasicBlock(nn.Module):
 35 |     expansion = 1
 36 | 
 37 |     def __init__(self, inplanes, planes, stride=1, downsample=None, num_group=32):
 38 |         super(BasicBlock, self).__init__()
 39 |         self.conv1 = conv3x3(inplanes, planes*2, stride)
 40 |         self.bn1 = nn.BatchNorm2d(planes*2)
 41 |         self.relu = nn.ReLU(inplace=True)
 42 |         self.conv2 = conv3x3(planes*2, planes*2, groups=num_group)
 43 |         self.bn2 = nn.BatchNorm2d(planes*2)
 44 |         self.downsample = downsample
 45 |         self.stride = stride
 46 | 
 47 |     def forward(self, x):
 48 |         residual = x
 49 | 
 50 |         out = self.conv1(x)
 51 |         out = self.bn1(out)
 52 |         out = self.relu(out)
 53 | 
 54 |         out = self.conv2(out)
 55 |         out = self.bn2(out)
 56 | 
 57 |         if self.downsample is not None:
 58 |             residual = self.downsample(x)
 59 | 
 60 |         out += residual
 61 |         out = self.relu(out)
 62 | 
 63 |         return out
 64 | 
 65 | 
 66 | class Bottleneck(nn.Module):
 67 |     expansion = 4
 68 | 
 69 |     def __init__(self, inplanes, planes, stride=1, downsample=None, num_group=32):
 70 |         super(Bottleneck, self).__init__()
 71 |         self.conv1 = nn.Conv2d(inplanes, planes*2, kernel_size=1, bias=False)
 72 |         self.bn1 = nn.BatchNorm2d(planes*2)
 73 |         self.conv2 = nn.Conv2d(planes*2, planes*2, kernel_size=3, stride=stride,
 74 |                                padding=1, bias=False, groups=num_group)
 75 |         self.bn2 = nn.BatchNorm2d(planes*2)
 76 |         self.conv3 = nn.Conv2d(planes*2, planes * 4, kernel_size=1, bias=False)
 77 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 78 |         self.relu = nn.ReLU(inplace=True)
 79 |         self.downsample = downsample
 80 |         self.stride = stride
 81 | 
 82 |     def forward(self, x):
 83 |         residual = x
 84 | 
 85 |         out = self.conv1(x)
 86 |         out = self.bn1(out)
 87 |         out = self.relu(out)
 88 | 
 89 |         out = self.conv2(out)
 90 |         out = self.bn2(out)
 91 |         out = self.relu(out)
 92 | 
 93 |         out = self.conv3(out)
 94 |         out = self.bn3(out)
 95 | 
 96 |         if self.downsample is not None:
 97 |             residual = self.downsample(x)
 98 | 
 99 |         out += residual
100 |         out = self.relu(out)
101 | 
102 |         return out
103 | 
104 | 
105 | class ResNeXt(nn.Module):
106 | 
107 |     def __init__(self, block, layers, num_classes=1000, num_group=32):
108 |         self.inplanes = 64
109 |         super(ResNeXt, self).__init__()
110 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
111 |                                bias=False)
112 |         self.bn1 = nn.BatchNorm2d(64)
113 |         self.relu = nn.ReLU(inplace=True)
114 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
115 |         self.layer1 = self._make_layer(block, 64, layers[0], num_group)
116 |         self.layer2 = self._make_layer(block, 128, layers[1], num_group, stride=2)
117 |         self.layer3 = self._make_layer(block, 256, layers[2], num_group, stride=2)
118 |         self.layer4 = self._make_layer(block, 512, layers[3], num_group, stride=2)
119 |         self.avgpool = nn.AvgPool2d(7, stride=1)
120 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
121 | 
122 |         for m in self.modules():
123 |             if isinstance(m, nn.Conv2d):
124 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
125 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
126 |             elif isinstance(m, nn.BatchNorm2d):
127 |                 m.weight.data.fill_(1)
128 |                 m.bias.data.zero_()
129 | 
130 |     def _make_layer(self, block, planes, blocks, num_group, stride=1):
131 |         downsample = None
132 |         if stride != 1 or self.inplanes != planes * block.expansion:
133 |             downsample = nn.Sequential(
134 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
135 |                           kernel_size=1, stride=stride, bias=False),
136 |                 nn.BatchNorm2d(planes * block.expansion),
137 |             )
138 | 
139 |         layers = []
140 |         layers.append(block(self.inplanes, planes, stride, downsample, num_group=num_group))
141 |         self.inplanes = planes * block.expansion
142 |         for i in range(1, blocks):
143 |             layers.append(block(self.inplanes, planes, num_group=num_group))
144 | 
145 |         return nn.Sequential(*layers)
146 | 
147 |     def forward(self, x):
148 |         x = self.conv1(x)
149 |         x = self.bn1(x)
150 |         x = self.relu(x)
151 |         x = self.maxpool(x)
152 | 
153 |         x = self.layer1(x)
154 |         x = self.layer2(x)
155 |         x = self.layer3(x)
156 |         x = self.layer4(x)
157 | 
158 |         x = self.avgpool(x)
159 |         x = x.view(x.size(0), -1)
160 |         x = self.fc(x)
161 | 
162 |         return x
163 | 
164 | 
165 | def resnext18(**kwargs):
166 |     """Constructs a ResNeXt-18 model.
167 |     """
168 |     model = ResNeXt(BasicBlock, [2, 2, 2, 2], **kwargs)
169 |     return model
170 | 
171 | 
172 | def resnext34(**kwargs):
173 |     """Constructs a ResNeXt-34 model.
174 |     """
175 |     model = ResNeXt(BasicBlock, [3, 4, 6, 3], **kwargs)
176 |     return model
177 | 
178 | 
179 | def resnext50(**kwargs):
180 |     """Constructs a ResNeXt-50 model.
181 |     """
182 |     model = ResNeXt(Bottleneck, [3, 4, 6, 3], **kwargs)
183 |     return model
184 | 
185 | 
186 | def resnext101_32x4d(**kwargs):
187 |     """Constructs a ResNeXt-101 model.
188 |     """
189 |     model = ResNeXt(Bottleneck, [3, 4, 23, 3], num_group=32, **kwargs)
190 |     return model
191 | 
192 | def resnext101_64x4d(**kwargs):
193 |     """Constructs a ResNeXt-101 model.
194 |     """
195 |     model = ResNeXt(Bottleneck, [3, 4, 23, 3], num_group=64, **kwargs)
196 |     return model
197 | 
198 | 
199 | def resnext152(**kwargs):
200 |     """Constructs a ResNeXt-152 model.
201 |     """
202 |     model = ResNeXt(Bottleneck, [3, 8, 36, 3], **kwargs)
203 |     return model
204 | 
205 | class resnext(_fasterRCNN):
206 |   def __init__(self, classes, num_layers=101, pretrained=False, class_agnostic=False):
207 |     #self.model_path = 'data/pretrained_model/resnext101_64x4d-e77a0586.pth'
208 |     self.model_path = 'data/pretrained_model/resnext101_32x4d-29e315fa.pth'
209 |     self.dout_base_model = 1024
210 |     self.pretrained = pretrained
211 |     self.class_agnostic = class_agnostic
212 | 
213 |     _fasterRCNN.__init__(self, classes, class_agnostic)
214 | 
215 |   def _init_modules(self):
216 |     #resnext = resnext101_64x4d()   
217 |     resnext = resnext101_32x4d()
218 | 
219 |     if self.pretrained == True:
220 |       print("Loading pretrained weights from %s" %(self.model_path))
221 |       #state_dict = torch.load(self.model_path)
222 |       #resnext.load_state_dict({k:v for k,v in state_dict.items() if k in resnext.state_dict()})
223 |       pretrained_dict = torch.load(self.model_path)
224 |       new = list(pretrained_dict.items())
225 |       my_model_kvpair = resnext.state_dict() 
226 |       cnt = 0
227 |       for key, value in my_model_kvpair.items():
228 |         layer_name, weights = new[cnt]
229 |         my_model_kvpair[key] = weights
230 |         cnt += 1
231 |       resnext.load_state_dict(my_model_kvpair)     
232 | 
233 |     # Build resnext.
234 |     self.RCNN_base = nn.Sequential(resnext.conv1, resnext.bn1,resnext.relu,
235 |       resnext.maxpool,resnext.layer1,resnext.layer2,resnext.layer3)
236 | 
237 |     self.RCNN_top = nn.Sequential(resnext.layer4)
238 | 
239 |     self.RCNN_cls_score = nn.Linear(2048, self.n_classes)
240 |     if self.class_agnostic:
241 |       self.RCNN_bbox_pred = nn.Linear(2048, 4)
242 |     else:
243 |       self.RCNN_bbox_pred = nn.Linear(2048, 4 * self.n_classes)
244 | 
245 |     # Fix blocks
246 |     for p in self.RCNN_base[0].parameters(): p.requires_grad=False
247 |     for p in self.RCNN_base[1].parameters(): p.requires_grad=False
248 | 
249 |     assert (0 <= cfg.RESNEXT.FIXED_BLOCKS < 4)
250 |     if cfg.RESNEXT.FIXED_BLOCKS >= 3:
251 |       for p in self.RCNN_base[6].parameters(): p.requires_grad=False
252 |     if cfg.RESNEXT.FIXED_BLOCKS >= 2:
253 |       for p in self.RCNN_base[5].parameters(): p.requires_grad=False
254 |     if cfg.RESNEXT.FIXED_BLOCKS >= 1:
255 |       for p in self.RCNN_base[4].parameters(): p.requires_grad=False
256 | 
257 |     def set_bn_fix(m):
258 |       classname = m.__class__.__name__
259 |       if classname.find('BatchNorm') != -1:
260 |         for p in m.parameters(): p.requires_grad=False
261 | 
262 |     self.RCNN_base.apply(set_bn_fix)
263 |     self.RCNN_top.apply(set_bn_fix)
264 | 
265 |   def train(self, mode=True):
266 |     # Override train so that the training mode is set as we want
267 |     nn.Module.train(self, mode)
268 |     if mode:
269 |       # Set fixed blocks to be in eval mode
270 |       self.RCNN_base.eval()
271 |       self.RCNN_base[4].train()
272 |       self.RCNN_base[5].train()
273 |       self.RCNN_base[6].train()
274 | 
275 |       def set_bn_eval(m):
276 |         classname = m.__class__.__name__
277 |         if classname.find('BatchNorm') != -1:
278 |           m.eval()
279 | 
280 |       self.RCNN_base.apply(set_bn_eval)
281 |       self.RCNN_top.apply(set_bn_eval)
282 | 
283 |   def _head_to_tail(self, pool5):
284 |     fc7 = self.RCNN_top(pool5).mean(3).mean(2)
285 |     return fc7
286 | 


--------------------------------------------------------------------------------
/show_boundingbox_on_pic.py:
--------------------------------------------------------------------------------
 1 | #!coding=utf-8  
 2 | #####################################
 3 | # 对训练集合进行可视化
 4 | #####################################
 5 | import cv2
 6 | import pandas as pd
 7 | 
 8 | #在图上画框 
 9 | def showPicResult(image, coords, name_set):  
10 |     img = cv2.imread(image)  
11 |     for i in range(len(coords)):  
12 |         x1=coords[i][0] - coords[i][2]/2
13 |         y1=coords[i][1] - coords[i][3]/2
14 |         x2=coords[i][0] + coords[i][2]/2
15 |         y2=coords[i][1] + coords[i][3]/2
16 |         name = name_set[i]
17 |         cv2.rectangle(img,(int(x1),int(y1)),(int(x2),int(y2)),(0,255,0),3) 
18 |         cv2.putText(img,name,(int(x1),int(y1+20)),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2)
19 |     cv2.namedWindow("image_detector", 1)    #1表示原图
20 |     cv2.moveWindow("image_detector",0,0)
21 |     cv2.resizeWindow("image_detector", 256,192) #可视化的图片大小
22 |     cv2.imshow('image_detector', img)  
23 |     cv2.waitKey(0)    #表示等待500ms，0表示一直等待直到按键
24 |     cv2.destroyAllWindows()  
25 | 
26 | #转化为预测的框格式
27 | def transfer(coords):
28 |     '''
29 |     输入：  
30 |         coords：坐标，形式为"x_min_y_min_x_max_y_max"
31 |     输出：
32 |         转换后的坐标：(x,y,w,h) x和y分别为中心横纵坐标，w为框的宽度，h为高度
33 |     '''
34 |     coords = coords.split('_')
35 |     x_min = float(coords[0])
36 |     y_min = float(coords[1])
37 |     x_max = float(coords[2])
38 |     y_max = float(coords[3])
39 |     
40 |     transfered_x = (x_min + x_max)/2
41 |     transfered_y = (y_min + y_max)/2
42 |     transfered_w = x_max - x_min
43 |     transfered_h = y_max - y_min
44 |     
45 |     return (transfered_x,transfered_y,transfered_w,transfered_h)
46 |       
47 | if __name__ == "__main__":  
48 |     import os
49 |     txt_path = './txt'
50 |     pic_root_path = './train'
51 |     for parent, _, files in os.walk(txt_path):
52 |         for file in files:
53 |             pic_file = open(os.path.join(parent, file), 'r')
54 |             ori_img = pic_root_path+'/'+file.split('.')[0]+'.jpg'
55 |             coords_set = []
56 |             name_set = []
57 |             for line in pic_file.readlines():
58 |                 name = line.split(' ')[0]
59 |                 coord = line.split(' ')[1]+'_'+line.split(' ')[2]+'_'+line.split(' ')[3]+'_'+line.split(' ')[4]
60 |                 coords_set.append(transfer(coord))
61 |                 name_set.append(name)
62 |             print(file)
63 |             showPicResult(ori_img, coords_set, name_set)
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------