├── .gitignore
├── Data_preproces
├── data_expansion.py
├── order_name.py
└── voc_annotation.py
├── LICENSE
├── README.md
├── data_expansion.py
├── frcnn_predict.py
├── get_map.py
├── nets
├── FasterRCNN_train.py
├── Suggestion_box.py
├── __init__.py
├── __pycache__
│ ├── FasterRCNN_train.cpython-37.pyc
│ ├── FasterRCNN_train.cpython-39.pyc
│ ├── Suggestion_box.cpython-37.pyc
│ ├── Suggestion_box.cpython-39.pyc
│ ├── __init__.cpython-37.pyc
│ ├── __init__.cpython-39.pyc
│ ├── classifier.cpython-37.pyc
│ ├── classifier.cpython-39.pyc
│ ├── faster_rcnn_feature_extraction.cpython-39.pyc
│ ├── feature_extraction.cpython-39.pyc
│ ├── feature_pyramid_network.cpython-39.pyc
│ ├── frcnn.cpython-39.pyc
│ ├── frcnn_training.cpython-39.pyc
│ ├── resnet101.cpython-37.pyc
│ ├── resnet101.cpython-39.pyc
│ ├── resnet50.cpython-37.pyc
│ ├── resnet50.cpython-39.pyc
│ ├── resnet50_FPN.cpython-37.pyc
│ ├── resnet50_FPN.cpython-39.pyc
│ ├── rpn.cpython-37.pyc
│ ├── rpn.cpython-39.pyc
│ ├── vgg16.cpython-37.pyc
│ └── vgg16.cpython-39.pyc
├── classifier.py
├── resnet101.py
├── resnet50.py
├── resnet50_ECA_FPN.py
├── resnet50_FPN.py
├── rpn.py
└── vgg16.py
├── order_name.py
├── predict.py
├── qa.md
├── requirements.txt
├── summary.py
├── train.py
├── utils
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-37.pyc
│ ├── __init__.cpython-39.pyc
│ ├── anchors.cpython-37.pyc
│ ├── anchors.cpython-39.pyc
│ ├── callbacks.cpython-37.pyc
│ ├── callbacks.cpython-39.pyc
│ ├── dataloader.cpython-37.pyc
│ ├── dataloader.cpython-39.pyc
│ ├── utils.cpython-37.pyc
│ ├── utils.cpython-39.pyc
│ ├── utils_bbox.cpython-37.pyc
│ ├── utils_bbox.cpython-39.pyc
│ ├── utils_fit.cpython-37.pyc
│ ├── utils_fit.cpython-39.pyc
│ ├── utils_map.cpython-37.pyc
│ └── utils_map.cpython-39.pyc
├── anchors.py
├── callbacks.py
├── dataloader.py
├── kmeans_anchors
│ ├── Bikmeans_anchors.py
│ ├── main.py
│ ├── plot_kmeans.py
│ ├── read_voc.py
│ └── yolo_kmeans.py
├── soft_nms.py
├── utils.py
├── utils_bbox.py
├── utils_fit.py
└── utils_map.py
└── voc_annotation.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # ignore map, miou, datasets
2 | map_out/
3 | miou_out/
4 | VOCdevkit/
5 | datasets/
6 | Medical_Datasets/
7 | lfw/
8 | logs/
9 | model_data/
10 | .temp_map_out/
11 |
12 | # Byte-compiled / optimized / DLL files
13 | __pycache__/
14 | *.py[cod]
15 | *$py.class
16 |
17 | # C extensions
18 | *.so
19 |
20 | # Distribution / packaging
21 | .Python
22 | build/
23 | develop-eggs/
24 | dist/
25 | downloads/
26 | eggs/
27 | .eggs/
28 | lib/
29 | lib64/
30 | parts/
31 | sdist/
32 | var/
33 | wheels/
34 | pip-wheel-metadata/
35 | share/python-wheels/
36 | *.egg-info/
37 | .installed.cfg
38 | *.egg
39 | MANIFEST
40 |
41 | # PyInstaller
42 | # Usually these files are written by a python script from a template
43 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
44 | *.manifest
45 | *.spec
46 |
47 | # Installer logs
48 | pip-log.txt
49 | pip-delete-this-directory.txt
50 |
51 | # Unit test / coverage reports
52 | htmlcov/
53 | .tox/
54 | .nox/
55 | .coverage
56 | .coverage.*
57 | .cache
58 | nosetests.xml
59 | coverage.xml
60 | *.cover
61 | *.py,cover
62 | .hypothesis/
63 | .pytest_cache/
64 |
65 | # Translations
66 | *.mo
67 | *.pot
68 |
69 | # Django stuff:
70 | *.log
71 | local_settings.py
72 | db.sqlite3
73 | db.sqlite3-journal
74 |
75 | # Flask stuff:
76 | instance/
77 | .webassets-cache
78 |
79 | # Scrapy stuff:
80 | .scrapy
81 |
82 | # Sphinx documentation
83 | docs/_build/
84 |
85 | # PyBuilder
86 | target/
87 |
88 | # Jupyter Notebook
89 | .ipynb_checkpoints
90 |
91 | # IPython
92 | profile_default/
93 | ipython_config.py
94 |
95 | # pyenv
96 | .python-version
97 |
98 | # pipenv
99 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
100 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
101 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
102 | # install all needed dependencies.
103 | #Pipfile.lock
104 |
105 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
106 | __pypackages__/
107 |
108 | # Celery stuff
109 | celerybeat-schedule
110 | celerybeat.pid
111 |
112 | # SageMath parsed files
113 | *.sage.py
114 |
115 | # Environments
116 | .env
117 | .venv
118 | env/
119 | venv/
120 | ENV/
121 | env.bak/
122 | venv.bak/
123 |
124 | # Spyder project settings
125 | .spyderproject
126 | .spyproject
127 |
128 | # Rope project settings
129 | .ropeproject
130 |
131 | # mkdocs documentation
132 | /site
133 |
134 | # mypy
135 | .mypy_cache/
136 | .dmypy.json
137 | dmypy.json
138 |
139 | # Pyre type checker
140 | .pyre/
141 |
--------------------------------------------------------------------------------
/Data_preproces/data_expansion.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | """数据增强
3 | 1. 翻转变换 flip
4 | 2. 图片裁剪 crop
5 | 3. 色彩抖动 color jittering
6 | 4. 平移变换 shift
7 | 5. 尺度变换 scale
8 | 6. 对比度变换 contrast
9 | 7. 噪声扰动 noise
10 | 8. 旋转变换/反射变换 Rotation/reflection
11 | 9.直方图增强
12 | 10.拉普拉斯算子
13 | 11.对数变换
14 | 12.伽马变换
15 | 13.限制对比度自适应直方图均衡化CLAHE
16 | 14.retinex SSR
17 | 15.retinex MMR
18 | 16.
19 |
20 | """
21 |
22 | import logging
23 | import os
24 | import random
25 | import threading
26 | import time
27 | from dataclasses import dataclass
28 | from distutils.log import error
29 |
30 | import cv2
31 | import numpy as np
32 | from PIL import Image, ImageEnhance, ImageFile
33 |
34 |
35 | # 图片裁剪
36 | def read_path(file_pathname):
37 |
38 | for filename in os.listdir(file_pathname):
39 | # print(filename)
40 | img_filename = os.path.join(file_pathname, filename) #将图片路径与图片名进行拼接
41 |
42 | img = cv2.imread(img_filename) #img_path为图片所在路径
43 | crop_img = img[0:3585,0:3629] #x0,y0为裁剪区域左上坐标;x1,y1为裁剪区域右下坐标(y0:y1,x0:x1)
44 |
45 | #####save figure
46 | # cv2.imwrite(r'date_set\data_source1'+"/"+filename,crop_img)
47 | cv2.imwrite(r'jixing\polarity'+"/"+filename,crop_img)
48 |
49 |
50 | logger = logging.getLogger(__name__)
51 | ImageFile.LOAD_TRUNCATED_IMAGES = True
52 |
53 |
54 | class DataAugmentation:
55 | """
56 | 包含数据增强的八种方式
57 | """
58 |
59 | def __init__(self):
60 | pass
61 |
62 | @staticmethod
63 | def openImage(image):
64 | img=cv2.imread(image)
65 | return img
66 |
67 | @staticmethod
68 | def randomRotation(image, center=None, scale=1.0): #mode=Image.BICUBIC
69 | """
70 | 对图像进行随机任意角度(0~360度)旋转
71 | :return: 旋转转之后的图像
72 | """
73 | random_angle = np.random.randint(-180, 180)
74 | (h, w) = image.shape[:2]
75 | # If no rotation center is specified, the center of the image is set as the rotation center
76 | if center is None:
77 | center = (w / 2, h / 2)
78 | m = cv2.getRotationMatrix2D(center, random_angle, scale) #center:旋转中心坐标.angle:旋转角度,负号为逆时针,正号为顺时针.scale:缩放比例,1为等比例缩放
79 | rotated = cv2.warpAffine(image, m, (w, h))
80 | return rotated
81 |
82 | @staticmethod
83 | def transpose(image):
84 | """
85 | 水平垂直翻转
86 | :return: 旋转转之后的图像
87 | """
88 | random_angle = np.random.randint(-2, 2) #取[-1,1]的随机整数
89 | img_filp=cv2.flip(image,random_angle)
90 | return img_filp
91 |
92 | '''噪声抖动'''
93 |
94 | @staticmethod
95 | def randomColor(image):
96 | """
97 | 对图像进行颜色抖动
98 | :param image: PIL的图像image
99 | :return: 有颜色色差的图像image
100 | """
101 | saturation=random.randint(0,1)
102 | brightness=random.randint(0,1)
103 | contrast=random.randint(0,1)
104 | sharpness=random.randint(0,1)
105 | image=Image.fromarray(cv2.cvtColor(image,cv2.COLOR_BGR2RGB)) #转化为PIL.Image对象,才能使用ImageEnhance.Brightness(image)
106 | if random.random() < saturation:
107 | random_factor = np.random.randint(0, 31) / 10. # 随机因子
108 | image = ImageEnhance.Color(image).enhance(random_factor) # 调整图像的饱和度
109 | if random.random() < brightness:
110 | random_factor = np.random.randint(10, 21) / 10. # 随机因子
111 | image = ImageEnhance.Brightness(image).enhance(random_factor) # 调整图像的亮度
112 | if random.random() < contrast:
113 | random_factor = np.random.randint(10, 21) / 10. # 随机因1子
114 | image = ImageEnhance.Contrast(image).enhance(random_factor) # 调整图像对比度
115 | if random.random() < sharpness:
116 | random_factor = np.random.randint(0, 31) / 10. # 随机因子
117 | image= ImageEnhance.Sharpness(image).enhance(random_factor) # 调整图像锐度
118 | image=cv2.cvtColor(np.asarray(image),cv2.COLOR_RGB2BGR) #转换为cv格式
119 | return image
120 |
121 | @staticmethod
122 | def randomGaussian(image, mean=0.2, sigma=0.04):
123 | """
124 | 对图像进行高斯噪声处理
125 | mean:设置高斯分布的均值和方差
126 | sigma:设置高斯分布的标准差,sigma值越大,噪声越多
127 |
128 | 返回:
129 | gaussian_out : 噪声处理后的图片
130 | """
131 | # 将图片灰度标准化
132 | img = image / 255
133 | # 产生高斯 noise
134 | noise = np.random.normal(mean, sigma, img.shape)
135 | # 将噪声和图片叠加
136 | gaussian_out = img + noise
137 | # 将超过 1 的置 1,低于 0 的置 0
138 | gaussian_out = np.clip(gaussian_out, 0, 1)
139 | # 将图片灰度范围的恢复为 0-255
140 | gaussian_out = np.uint8(gaussian_out*255)
141 | # 将噪声范围搞为 0-255
142 | # noise = np.uint8(noise*255)
143 | return gaussian_out
144 |
145 | @staticmethod
146 | def Pepper_noise(image):
147 | '''
148 | 椒盐噪声
149 | '''
150 | #设置添加椒盐噪声的数目比例
151 | s_vs_p = 0.04
152 | #设置添加噪声图像像素的数目
153 | amount =0.03
154 | noisy_img = np.copy(image)
155 | #添加salt噪声
156 | num_salt = np.ceil(amount * image.size * s_vs_p)
157 | #设置添加噪声的坐标位置
158 | coords = [np.random.randint(0,i - 1, int(num_salt)) for i in image.shape]
159 | noisy_img[tuple(coords)] = 255
160 | #添加pepper噪声
161 | num_pepper = np.ceil(amount * image.size * (1. - s_vs_p))
162 | #设置添加噪声的坐标位置
163 | coords = [np.random.randint(0,i - 1, int(num_pepper)) for i in image.shape]
164 | noisy_img[tuple (coords)] = 0
165 | return noisy_img
166 |
167 | @staticmethod
168 | def Poisson_noise(image):
169 | '''泊松噪声'''
170 |
171 | #计算图像像素的分布范围
172 | vals = len(np.unique(image))
173 | vals = 2 ** np.ceil(np.log2(vals))
174 | #给图片添加泊松噪声
175 | noisy_img = np.random.poisson(image * vals) / float(vals)
176 | return noisy_img
177 |
178 | '''图像增强算法'''
179 |
180 | @staticmethod
181 | def hist(image):
182 | '''直方图均衡增强'''
183 | r, g, b = cv2.split(image)
184 | r1 = cv2.equalizeHist(r)
185 | g1 = cv2.equalizeHist(g)
186 | b1 = cv2.equalizeHist(b)
187 | image_equal_clo = cv2.merge([r1, g1, b1])
188 | return image_equal_clo
189 |
190 | @staticmethod
191 | def laplacian(image):
192 | '''拉普拉斯算子'''
193 | kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
194 | image_lap = cv2.filter2D(image, cv2.CV_8UC3, kernel)
195 | return image_lap
196 |
197 | @staticmethod
198 | def log(image):
199 | '''对数变换'''
200 | image_log = np.uint8(np.log(np.array(image) + 1))
201 | cv2.normalize(image_log, image_log, 0, 255, cv2.NORM_MINMAX)
202 | # 转换成8bit图像显示
203 | cv2.convertScaleAbs(image_log, image_log)
204 | return image_log
205 |
206 | @staticmethod
207 | def gamma(image):
208 | '''伽马变换'''
209 | fgamma = 0.5 #数值越大,生成的图片越黑
210 | image_gamma = np.uint8(np.power((np.array(image) / 255.0), fgamma) * 255.0)
211 | cv2.normalize(image_gamma, image_gamma, 0, 255, cv2.NORM_MINMAX)
212 | cv2.convertScaleAbs(image_gamma, image_gamma)
213 | return image_gamma
214 |
215 | @staticmethod
216 | def clahe(image):
217 | '''# 限制对比度自适应直方图均衡化CLAHE'''
218 | b, g, r = cv2.split(image)
219 | clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
220 | b = clahe.apply(b)
221 | g = clahe.apply(g)
222 | r = clahe.apply(r)
223 | image_clahe = cv2.merge([b, g, r])
224 | return image_clahe
225 |
226 | def __replaceZeroes(data):
227 | min_nonzero = min(data[np.nonzero(data)])
228 | data[data == 0] = min_nonzero
229 | return data
230 |
231 | def __SSR(src_img, size):
232 |
233 | L_blur = cv2.GaussianBlur(src_img, (size, size), 0)
234 | img =DataAugmentation.__replaceZeroes(src_img)
235 |
236 | L_blur =DataAugmentation. __replaceZeroes(L_blur)
237 |
238 | dst_Img = cv2.log(img/255.0)
239 | dst_Lblur = cv2.log(L_blur/255.0)
240 | dst_IxL = cv2.multiply(dst_Img, dst_Lblur)
241 | log_R = cv2.subtract(dst_Img, dst_IxL)
242 |
243 | dst_R = cv2.normalize(log_R,None, 0, 255, cv2.NORM_MINMAX)
244 | log_uint8 = cv2.convertScaleAbs(dst_R)
245 | return log_uint8
246 |
247 | @staticmethod
248 | def SSR_image(image):
249 | '''SSR_image'''
250 | size = 3
251 | b_gray, g_gray, r_gray = cv2.split(image)
252 | b_gray =DataAugmentation.__SSR(b_gray, size)
253 | g_gray =DataAugmentation.__SSR(g_gray, size)
254 | r_gray =DataAugmentation.__SSR(r_gray, size)
255 | result = cv2.merge([b_gray, g_gray, r_gray])
256 | return result
257 |
258 | # retinex MSR
259 | def __MSR(img, scales):
260 | weight = 2 / 3.0
261 | scales_size = len(scales)
262 | h, w = img.shape[:2]
263 | log_R = np.zeros((h, w), dtype=np.float32)
264 |
265 | for i in range(scales_size):
266 | img =DataAugmentation. __replaceZeroes(img)
267 | L_blur = cv2.GaussianBlur(img, (scales[i], scales[i]), 0)
268 | L_blur =DataAugmentation. __replaceZeroes(L_blur)
269 | dst_Img = cv2.log(img/255.0)
270 | dst_Lblur = cv2.log(L_blur/255.0)
271 | dst_Ixl = cv2.multiply(dst_Img, dst_Lblur)
272 | log_R += weight * cv2.subtract(dst_Img, dst_Ixl)
273 |
274 | dst_R = cv2.normalize(log_R,None, 0, 255, cv2.NORM_MINMAX)
275 | log_uint8 = cv2.convertScaleAbs(dst_R)
276 | return log_uint8
277 |
278 | @staticmethod
279 | def MSR_image(image):
280 | '''MSR_image'''
281 | scales = [15, 101, 301] # [3,5,9]
282 | b_gray, g_gray, r_gray = cv2.split(image)
283 | b_gray =DataAugmentation.__MSR(b_gray, scales)
284 | g_gray =DataAugmentation. __MSR(g_gray, scales)
285 | r_gray =DataAugmentation. __MSR(r_gray, scales)
286 | result = cv2.merge([b_gray, g_gray, r_gray])
287 | return result
288 |
289 |
290 | def imageOps(func_name, image1, img_des_path, img_file_name, times=1): #times=1每种方式,每张图片运行一次
291 | funcMap = {#"randomRotation": DataAugmentation.randomRotation,
292 | "randomcolor": DataAugmentation.randomColor,"transpose": DataAugmentation.transpose,
293 | "randomGaussian": DataAugmentation.randomGaussian, "pepper_noise": DataAugmentation.Pepper_noise,
294 | "Poisson_noise": DataAugmentation.Poisson_noise, "hist": DataAugmentation.hist,
295 | "laplacian": DataAugmentation.laplacian,"log": DataAugmentation.log,
296 | "gamma": DataAugmentation.gamma, "clahe": DataAugmentation.clahe,
297 | "SSR_image": DataAugmentation.SSR_image, "MSR_image": DataAugmentation.MSR_image
298 | }
299 | if funcMap.get(func_name) is None:
300 | logger.error("%s is not exist", func_name)
301 | return -1
302 |
303 | for _i in range(0, times, 1):
304 | new_image = funcMap[func_name](image1) #经过变化后的图片
305 | # print('new_image:',new_image)
306 | # path=os.path.join(img_des_path, func_name + str(_i) + img_file_name) #存图的新名字
307 | path=os.path.join(img_des_path, img_file_name)
308 | # print('new_filename:',path)
309 | cv2.imwrite (path,new_image)
310 |
311 |
312 | # opsList = {"transpose",'randomcolor',"gamma","MSR_image","pepper_noise","hist","log","clahe",'randomGaussian',
313 | # 'Poisson_noise','laplacian','SSR_image'}
314 | opsList = {"clahe"} #clahe图像增强效果较好
315 |
316 | def threadOPS(img_path, new_img_path):
317 | """
318 | 多线程处理事务
319 | :param src_path: 源文件
320 | :param des_path: 存放文件
321 | :return:
322 | """
323 | #img path
324 | if os.path.isdir(img_path):
325 | img_names = os.listdir(img_path)
326 | # print('img_names值为:',img_names)
327 | else:
328 | img_names = [img_path]
329 | # print('img_names1值为:',img_names)
330 |
331 | img_num = 0
332 |
333 | #img num
334 | for img_name in img_names:
335 | tmp_img_name = os.path.join(img_path, img_name)
336 | if os.path.isdir(tmp_img_name):
337 | print('contain file folder')
338 | exit()
339 | else:
340 | img_num = img_num + 1
341 | num = img_num
342 | # print("num数值为:",num )
343 |
344 |
345 | for i in range(num):
346 | img_name = img_names[i]
347 | # print("img_name:",img_name)
348 | tmp_img_name = os.path.join(img_path, img_name)
349 | # 读取文件并进行操作
350 | image1 = DataAugmentation.openImage(tmp_img_name)
351 | # print("读取文件image:",image1)
352 |
353 | # threadImage =[0] * 12 #定义一个元组,其长度为12.
354 | threadImage ={} #定义为空字典类型。用来装线程结果信息
355 | _index = 0
356 | for ops_name in opsList:
357 | # print("ops_name:",ops_name)
358 | #创建一个新线程
359 | threadImage[_index] = threading.Thread(target=imageOps,
360 | args=(ops_name, image1, new_img_path,img_name))
361 | print('threadImage[{}]:{}'.format(_index,threadImage))
362 | threadImage[_index].start() #启动线程
363 | _index += 1 #显示每个线程的起停位置
364 | time.sleep(0.2) #线程执行的时间
365 |
366 |
367 | if __name__ == '__main__':
368 | threadOPS(#r"F:\Desktop\PCB_code\date_set\1shujuchuli",
369 | #r"F:\Desktop\PCB_code\date_set\2shujucunfang"
370 | r'F:\Desktop\PCB_code\data_set1\data_shiyan',
371 | r'F:\Desktop\PCB_code\data_set1\data_shiyan_kuochong')
372 |
373 | # read_path(r'F:\Desktop\PCB_code\data_set1\data_shiyan') #图片裁剪
374 |
375 |
376 | '''
377 | 路径问题:
378 | 关于上述路径中,\table\name\rain中的\t,\n,\r都易被识别为转义字符。
379 | 解决的办法主要由以下三种:
380 | #1
381 | path=r"C:\data\table\name\rain"
382 | #前面加r表示不转义
383 |
384 | #2
385 | path="C:\\data\\table\\name\\rain"
386 | #用\\代替\
387 |
388 | #3
389 | path="C:/data/table/name/rain"
390 | #用\代替/
391 |
392 | '''
393 |
394 |
--------------------------------------------------------------------------------
/Data_preproces/order_name.py:
--------------------------------------------------------------------------------
1 | #...........................#
2 | #对文件夹中的文件进行重命名
3 | #...........................#
4 | import os
5 | import xml
6 | from xml.dom import minidom
7 | import xml.etree.cElementTree as ET
8 |
9 | def myrename(file_path):
10 | file_list=os.listdir(file_path)
11 | for i,fi in enumerate(file_list):
12 | old_dir=os.path.join(file_path,fi)
13 | print('wenjianmingzi :',old_dir)
14 | # 删除名字中的空格
15 | new_name = fi.replace(" ", "_")
16 | print("新名字为:",new_name)
17 |
18 | # # 顺序命名
19 | # # new_name=str(i+1)+"."+str(fi.split(".")[-1])
20 | new_dir=os.path.join(file_path,new_name)
21 | try:
22 | os.rename(old_dir,new_dir)
23 | except Exception as e:
24 | print(e)
25 | print("Failed!")
26 | else:
27 | print("SUcess!")
28 |
29 |
30 | #...........................#
31 | #对xml文件内的filename和path名进行重命名
32 | #...........................#
33 |
34 | def xml_name(xmlpath):
35 | files = os.listdir(xmlpath) # 得到文件夹下所有文件名称
36 | count = 0
37 | for xmlFile in files: # 遍历文件夹
38 | if not os.path.isdir(xmlFile): # 判断是否是文件夹,不是文件夹才打开
39 | name1 = xmlFile.split('.')[0]
40 | dom = xml.dom.minidom.parse(xmlpath + '/' + xmlFile)
41 | root = dom.documentElement
42 | #filename重命名
43 | newfilename = root.getElementsByTagName('filename')
44 | t=newfilename[0].firstChild.data = name1 + '.jpg'
45 | print('t:',t )
46 | #path重命名
47 | newpath = root.getElementsByTagName('path')
48 | t1=newpath[0].firstChild.data =xmlpath +'\\'+ name1 +'.jpg'
49 | print('t1:',t1 )
50 |
51 | with open(os.path.join(xmlpath, xmlFile), 'w',) as fh:
52 | print('fh:',fh )
53 | dom.writexml(fh)
54 | print('写入name/pose OK!')
55 | count = count + 1
56 |
57 |
58 | # 删除xml文件中显示的版本号
59 | def delete_xmlversion(xmlpath,savedir):
60 |
61 | files = os.listdir(xmlpath)
62 | for ml in files:
63 | if '.xml' in ml:
64 | fo = open(savedir + '/' + '{}'.format(ml), 'w', encoding='utf-8')
65 | print('{}'.format(ml))
66 | fi = open(xmlpath + '/' + '{}'.format(ml), 'r')
67 | content = fi.readlines()
68 | for line in content:
69 | # line = line.replace('a', 'b') # 例:将a替换为b
70 | line = line.replace('', '')
71 | # line = line.replace('测试图片', '车辆图片')
72 | # line = line.replace('class1', 'class2')
73 | fo.write(line)
74 | fo.close()
75 | print('替换成功')
76 |
77 |
78 | #删除xml文件中部分不要的标签信息
79 | def Delete_part_information_xml(path_root,xy_classes):
80 | for anno_path in path_root:
81 | xml_list=os.listdir(anno_path)
82 | print("打开{}文件".format(xml_list))
83 | for annoxml in xml_list:
84 | path_xml=os.path.join(anno_path,annoxml)
85 | print('保存文件路径为{}'.format(path_xml))
86 | tree =ET.parse(path_xml)
87 | root=tree.getroot()
88 |
89 | for child in root.findall('object'):
90 | name = child.find('name').text
91 | if not name in xy_classes:
92 | root.remove(child)
93 | print(annoxml)
94 | tree.write(os.path.join(r'F:\Desktop\PCB_code\PCB_DataSet\Annotations—new', annoxml)) #处理结束后保存的路径
95 |
96 |
97 |
98 |
99 | if __name__=="__main__":
100 | file_path=r"F:\Desktop\PCB_code\date_set\new_data" #完整路径+文件名
101 | # xmlpath="F:\\桌面\\PCB_code\\date_set\\Image_label_source"
102 | # savedir = r'F:\桌面\PCB_code\date_set\3' #删除xml文件中显示的版本号后存放文件位置
103 | # xmlpath=r'F:\桌面\PCB_code\date_set\label'
104 | myrename(file_path) #图片重命名文件
105 |
106 | #对xml文件中的名字进行修改
107 | # myrename(xmlpath) #1、xml文件名重命名
108 | # xml_name(xmlpath) #2、xml文件内的filename和path重命名
109 | # delete_xmlversion(xmlpath,savedir) #删除经过xml重命名后文件内的版本号
110 |
111 | #删除xml文件中部分不要的标签信息
112 | path_root=r'F:\Desktop\PCB_code\PCB_DataSet\Annotations'
113 | xy_classes=['Speaker',"Bat","2USB","Rj45+2USB","Cap_cross","Cap_blue_black","Jumper04p",
114 | "Jumper10p", "HDD","Power08p","Power04p","Power24p"]
115 | Delete_part_information_xml(path_root,xy_classes)
116 |
117 |
--------------------------------------------------------------------------------
/Data_preproces/voc_annotation.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import xml.etree.ElementTree as ET
4 |
5 | import numpy as np
6 |
7 | from utils.utils import get_classes
8 |
9 | #--------------------------------------------------------------------------------------------------------------------------------#
10 | # annotation_mode用于指定该文件运行时计算的内容
11 | # annotation_mode为0代表整个标签处理过程,包括获得VOCdevkit/VOC2007/ImageSets里面的txt以及训练用的2007_train.txt、2007_val.txt
12 | # annotation_mode为1代表获得VOCdevkit/VOC2007/ImageSets里面的txt
13 | # annotation_mode为2代表获得训练用的2007_train.txt、2007_val.txt
14 | #--------------------------------------------------------------------------------------------------------------------------------#
15 | annotation_mode = 0
16 | #-------------------------------------------------------------------#
17 | # 必须要修改,用于生成2007_train.txt、2007_val.txt的目标信息
18 | # 与训练和预测所用的classes_path一致即可
19 | # 如果生成的2007_train.txt里面没有目标信息
20 | # 那么就是因为classes没有设定正确
21 | # 仅在annotation_mode为0和2的时候有效
22 | #-------------------------------------------------------------------#
23 | classes_path = r'F:\Desktop\PCB_code\PCB_DataSet\cls_classes.txt'
24 | #--------------------------------------------------------------------------------------------------------------------------------#
25 | # trainval_percent用于指定(训练集+验证集)与测试集的比例,默认情况下 (训练集+验证集):测试集 = 9:1
26 | # train_percent用于指定(训练集+验证集)中训练集与验证集的比例,默认情况下 训练集:验证集 = 9:1
27 | # 仅在annotation_mode为0和1的时候有效
28 | #--------------------------------------------------------------------------------------------------------------------------------#
29 | trainval_percent = 0.9
30 | train_percent = 0.9
31 | #-------------------------------------------------------#
32 | # 指向VOC数据集所在的文件夹
33 | # 默认指向根目录下的VOC数据集
34 | #-------------------------------------------------------#
35 | PCB_DataSet_path=r'PCB_DataSet'
36 |
37 | PCB_Data_Sets=['trainval','test']
38 | classes, _ = get_classes(classes_path)
39 |
40 | #-------------------------------------------------------#
41 | # 统计目标数量
42 | #-------------------------------------------------------#
43 | photo_nums = np.zeros(len(PCB_Data_Sets))
44 | nums = np.zeros(len(classes))
45 | def convert_annotation(image_id, list_file):
46 | in_file = open(os.path.join(PCB_DataSet_path, 'Annotations/%s.xml'%(image_id)), encoding='utf-8')
47 | tree=ET.parse(in_file)
48 | root = tree.getroot()
49 |
50 | for obj in root.iter('object'):
51 | difficult = 0
52 | if obj.find('difficult')!=None:
53 | difficult = obj.find('difficult').text
54 | cls = obj.find('name').text
55 | if cls not in classes or int(difficult)==1:
56 | continue
57 | cls_id = classes.index(cls)
58 | xmlbox = obj.find('bndbox')
59 | b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text)))
60 | list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
61 |
62 | nums[classes.index(cls)] = nums[classes.index(cls)] + 1
63 |
64 | if __name__ == "__main__":
65 | random.seed(0)
66 | if " " in os.path.abspath(PCB_DataSet_path):
67 | raise ValueError("数据集存放的文件夹路径与图片名称中不可以存在空格,否则会影响正常的模型训练,请注意修改。")
68 |
69 | if annotation_mode == 0 or annotation_mode == 1:
70 | print("Generate txt in ImageSets.")
71 | xmlfilepath = os.path.join(PCB_DataSet_path, 'Annotations')
72 | saveBasePath = os.path.join(PCB_DataSet_path, 'ImageSets')
73 | temp_xml = os.listdir(xmlfilepath)
74 | total_xml = []
75 | for xml in temp_xml:
76 | if xml.endswith(".xml"):
77 | total_xml.append(xml)
78 |
79 | num = len(total_xml)
80 | list = range(num)
81 | tv = int(num*trainval_percent)
82 | tr = int(tv*train_percent)
83 | trainval= random.sample(list,tv)
84 | train = random.sample(trainval,tr)
85 |
86 | print("train and val size",tv)
87 | print("train size",tr)
88 | ftrainval = open(os.path.join(saveBasePath,'trainval.txt'), 'w')
89 | ftest = open(os.path.join(saveBasePath,'test.txt'), 'w')
90 | ftrain = open(os.path.join(saveBasePath,'train.txt'), 'w')
91 | fval = open(os.path.join(saveBasePath,'val.txt'), 'w')
92 |
93 | for i in list:
94 | name=total_xml[i][:-4]+'\n'
95 | if i in trainval:
96 | ftrainval.write(name)
97 | if i in train:
98 | ftrain.write(name)
99 | else:
100 | fval.write(name)
101 | else:
102 | ftest.write(name)
103 |
104 | ftrainval.close()
105 | ftrain.close()
106 | fval.close()
107 | ftest.close()
108 | print("Generate txt in ImageSets done.")
109 |
110 | if annotation_mode == 0 or annotation_mode == 2:
111 | print("Generate PCB_train.txt and PCB_val.txt for train.")
112 | type_index = 0
113 | for image_set in PCB_Data_Sets:
114 | image_ids = open(os.path.join(PCB_DataSet_path, 'ImageSets/%s.txt'%(image_set)), encoding='utf-8').read().strip().split()
115 | list_file = open(os.path.join(PCB_DataSet_path,'%s.txt'%( image_set)), 'w', encoding='utf-8')#保存训练集和测试集
116 | for image_id in image_ids:
117 | list_file.write('%s/JPEGImages/%s.jpg'%(os.path.abspath(PCB_DataSet_path),image_id))#在训练集和测试集中写入图片路径信息
118 |
119 | convert_annotation(image_id, list_file)
120 | list_file.write('\n')
121 | photo_nums[type_index] = len(image_ids)
122 | type_index += 1
123 | list_file.close()
124 | print("Generate PCB_train.txt and PCB_val.txt for train done.")
125 |
126 | def printTable(List1, List2):
127 | for i in range(len(List1[0])):
128 | print("|", end=' ')
129 | for j in range(len(List1)):
130 | print(List1[j][i].rjust(int(List2[j])), end=' ')
131 | print("|", end=' ')
132 | print()
133 |
134 | str_nums = [str(int(x)) for x in nums]
135 | tableData = [
136 | classes, str_nums
137 | ]
138 | colWidths = [0]*len(tableData)
139 | len1 = 0
140 | for i in range(len(tableData)):
141 | for j in range(len(tableData[i])):
142 | if len(tableData[i][j]) > colWidths[i]:
143 | colWidths[i] = len(tableData[i][j])
144 | printTable(tableData, colWidths)
145 |
146 | if photo_nums[0] <= 500:
147 | print("训练集数量小于500,属于较小的数据量,请注意设置较大的训练世代(Epoch)以满足足够的梯度下降次数(Step)。")
148 |
149 | if np.sum(nums) == 0:
150 | print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!")
151 | print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!")
152 | print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!")
153 | print("(重要的事情说三遍)。")
154 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 JiaQi Xu
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Faster-Rcnn:PCB-component-defect-detection目标检测模型在Pytorch当中的实现
2 | ---
3 |
4 | ## 目录
5 | 1. [仓库更新 Top News](#仓库更新)
6 | 2. [性能情况 Performance](#性能情况)
7 | 3. [所需环境 Environment](#所需环境)
8 | 4. [文件下载 Download](#文件下载)
9 | 5. [预测步骤 How2predict](#预测步骤)
10 | 6. [训练步骤 How2train](#训练步骤)
11 | 7. [评估步骤 How2eval](#评估步骤)
12 | 8. [参考资料 Reference](#Reference)
13 |
14 | ## Top News
15 | **本项目支持step、cos学习率下降法、支持adam、sgd优化器选择、支持学习率根据batch_size自适应调整、新增图片裁剪。**
16 | **增加了大量注释、增加了大量可调整参数、对代码的组成模块进行修改、增加fps、视频预测、批量预测等功能。**
17 |
18 | ## 性能情况
19 | | 训练数据集 | 权值文件名称 | 测试数据集 | 输入图片大小 | mAP 0.5:0.95 | mAP 0.5 |
20 | | :-----: | :-----: | :------: | :------: | :------: | :-----: |
21 | | VOC07+12 | [voc_weights_resnet.pth](https://github.com/bubbliiiing/faster-rcnn-pytorch/releases/download/v1.0/voc_weights_resnet.pth) | VOC-Test07 | - | - | 80.36
22 | | VOC07+12 | [voc_weights_vgg.pth](https://github.com/bubbliiiing/faster-rcnn-pytorch/releases/download/v1.0/voc_weights_vgg.pth) | VOC-Test07 | - | - | 77.46
23 | **本代码中也用训练权重,读者可以不用下载。
24 |
25 | ## 所需环境
26 | torch == 1.2.0
27 |
28 | ## 文件下载
29 | 训练所需的voc_weights_resnet.pth或者voc_weights_vgg.pth以及主干的网络权重可以在百度云下载。
30 | voc_weights_resnet.pth是resnet为主干特征提取网络用到的;
31 | voc_weights_vgg.pth是vgg为主干特征提取网络用到的;
32 | 链接: https://pan.baidu.com/s/1S6wG8sEXBeoSec95NZxmlQ
33 | 提取码: 8mgp
34 |
35 | VOC数据集下载地址如下,里面已经包括了训练集、测试集、验证集(与测试集一样),无需再次划分:
36 | 链接: https://pan.baidu.com/s/1YuBbBKxm2FGgTU5OfaeC5A
37 | 提取码: uack
38 | **笔者是使用生产工厂,现场采集的PCB图片,故不能上传,读者需要自备数据集或者根据提供的数据集链接下载
39 | **在后续中,笔者以共用数据集VOC07+12数据集进行讲解,
40 |
41 | ## 训练步骤
42 | ### a、训练VOC07+12数据集
43 | 1. 数据集的准备
44 | **本文使用VOC格式进行训练,训练前需要下载好VOC07+12的数据集,解压后放在根目录**
45 |
46 | 2. 数据集的处理
47 | 修改voc_annotation.py里面的annotation_mode=2,运行voc_annotation.py生成根目录下的2007_train.txt和2007_val.txt。
48 |
49 | 3. 开始网络训练
50 | train.py的默认参数用于训练VOC数据集,直接运行train.py即可开始训练。
51 |
52 | 4. 训练结果预测
53 | 训练结果预测需要用到两个文件,分别是frcnn_predict.py和predict.py。我们首先需要去frcnn_predict.py里面修改model_path以及classes_path,这两个参数必须要修改。
54 | **model_path指向训练好的权值文件,在logs文件夹里。
55 | classes_path指向检测类别所对应的txt。**
56 | 完成修改后就可以运行predict.py进行检测了。运行后输入图片路径即可检测。
57 |
58 | ### b、训练自己的数据集
59 | 1. 数据集的准备
60 | **本文使用VOC格式进行训练,训练前需要自己制作好数据集,**
61 | 训练前将标签文件放在VOCdevkit文件夹下的VOC2007文件夹下的Annotation中。
62 | 训练前将图片文件放在VOCdevkit文件夹下的VOC2007文件夹下的JPEGImages中。
63 |
64 | 2. 数据集的处理
65 | 在完成数据集的摆放之后,我们需要利用voc_annotation.py获得训练用的2007_train.txt和2007_val.txt。
66 | 修改voc_annotation.py里面的参数。第一次训练可以仅修改classes_path,classes_path用于指向检测类别所对应的txt。
67 | 训练自己的数据集时,可以自己建立一个cls_classes.txt,里面写自己所需要区分的类别。
68 | model_data/cls_classes.txt文件内容为:
69 | ```python
70 | cat
71 | dog
72 | ...
73 | ```
74 | 修改voc_annotation.py中的classes_path,使其对应cls_classes.txt,并运行voc_annotation.py。
75 |
76 | 3. 开始网络训练
77 | **训练的参数较多,均在train.py中,大家可以在下载库后仔细看注释,其中最重要的部分依然是train.py里的classes_path。**
78 | **classes_path用于指向检测类别所对应的txt,这个txt和voc_annotation.py里面的txt一样!训练自己的数据集必须要修改!**
79 | 修改完classes_path后就可以运行train.py开始训练了,在训练多个epoch后,权值会生成在logs文件夹中。
80 |
81 | 4. 训练结果预测
82 | 训练结果预测需要用到两个文件,分别是frcnn_predict.py和predict.py。在frcnn_predict.py里面修改model_path以及classes_path。
83 | **model_path指向训练好的权值文件,在logs文件夹里。
84 | classes_path指向检测类别所对应的txt。**
85 | 完成修改后就可以运行predict.py进行检测了。运行后输入图片路径即可检测。
86 |
87 | ## 预测步骤
88 | ### a、使用预训练权重
89 | 1. 下载完库后解压,在百度网盘下载frcnn_weights.pth,放入model_data,运行predict.py,输入
90 | ```python
91 | img/street.jpg
92 | ```
93 | 2. 在predict.py里面进行设置可以进行fps测试和video视频检测。
94 | ### b、使用自己训练的权重
95 | 1. 按照训练步骤训练。
96 | 2. 在frcnn_predict.py文件里面,在如下部分修改model_path和classes_path使其对应训练好的文件;**model_path对应logs文件夹下面的权值文件,classes_path是model_path对应分的类**。
97 |
98 |
99 | 3. 运行predict.py,输入
100 | ```python
101 | img/street.jpg
102 | ```
103 | 4.在predict.py里面进行设置可以进行fps测试和video视频检测。
104 |
105 | ## 评估步骤
106 | ### a、评估VOC07+12的测试集
107 | 1. 本文使用VOC格式进行评估。VOC07+12已经划分好了测试集,无需利用voc_annotation.py生成ImageSets文件夹下的txt。
108 | 2. 在frcnn.py里面修改model_path以及classes_path。**model_path指向训练好的权值文件,在logs文件夹里。classes_path指向检测类别所对应的txt。**
109 | 3. 运行get_map.py即可获得评估结果,评估结果会保存在map_out文件夹中。
110 |
111 | ### b、评估自己的数据集
112 | 1. 本文使用VOC格式进行评估。
113 | 2. 如果在训练前已经运行过voc_annotation.py文件,代码会自动将数据集划分成训练集、验证集和测试集。如果想要修改测试集的比例,可以修改voc_annotation.py文件下的trainval_percent。trainval_percent用于指定(训练集+验证集)与测试集的比例,默认情况下 (训练集+验证集):测试集 = 9:1。train_percent用于指定(训练集+验证集)中训练集与验证集的比例,默认情况下 训练集:验证集 = 9:1。
114 | 3. 利用voc_annotation.py划分测试集后,前往get_map.py文件修改classes_path,classes_path用于指向检测类别所对应的txt,这个txt和训练时的txt一样。评估自己的数据集必须要修改。
115 | 4. 在frcnn_predict.py里面修改model_path以及classes_path。**model_path指向训练好的权值文件,在logs文件夹里。classes_path指向检测类别所对应的txt。**
116 | 5. 运行get_map.py即可获得评估结果,评估结果会保存在map_out文件夹中。
117 |
--------------------------------------------------------------------------------
/data_expansion.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | """数据增强
3 | 1. 翻转变换 flip
4 | 2. 图片裁剪 crop
5 | 3. 色彩抖动 color jittering
6 | 4. 平移变换 shift
7 | 5. 尺度变换 scale
8 | 6. 对比度变换 contrast
9 | 7. 噪声扰动 noise
10 | 8. 旋转变换/反射变换 Rotation/reflection
11 | 9.直方图增强
12 | 10.拉普拉斯算子
13 | 11.对数变换
14 | 12.伽马变换
15 | 13.限制对比度自适应直方图均衡化CLAHE
16 | 14.retinex SSR
17 | 15.retinex MMR
18 | 16.
19 |
20 | """
21 |
22 | import logging
23 | import os
24 | import random
25 | import threading
26 | import time
27 | from dataclasses import dataclass
28 | from distutils.log import error
29 |
30 | import cv2
31 | import numpy as np
32 | from PIL import Image, ImageEnhance, ImageFile
33 |
34 |
35 | # 图片裁剪
36 | def read_path(file_pathname):
37 |
38 | for filename in os.listdir(file_pathname):
39 | # print(filename)
40 | img_filename = os.path.join(file_pathname, filename) #将图片路径与图片名进行拼接
41 |
42 | img = cv2.imread(img_filename) #img_path为图片所在路径
43 | crop_img = img[0:3585,0:3629] #x0,y0为裁剪区域左上坐标;x1,y1为裁剪区域右下坐标(y0:y1,x0:x1)
44 |
45 | #####save figure
46 | # cv2.imwrite(r'date_set\data_source1'+"/"+filename,crop_img)
47 | cv2.imwrite(r'jixing\polarity'+"/"+filename,crop_img)
48 |
49 |
50 | logger = logging.getLogger(__name__)
51 | ImageFile.LOAD_TRUNCATED_IMAGES = True
52 |
53 |
54 | class DataAugmentation:
55 | """
56 | 包含数据增强的八种方式
57 | """
58 |
59 | def __init__(self):
60 | pass
61 |
62 | @staticmethod
63 | def openImage(image):
64 | img=cv2.imread(image)
65 | return img
66 |
67 | @staticmethod
68 | def randomRotation(image, center=None, scale=1.0): #mode=Image.BICUBIC
69 | """
70 | 对图像进行随机任意角度(0~360度)旋转
71 | :return: 旋转转之后的图像
72 | """
73 | random_angle = np.random.randint(-180, 180)
74 | (h, w) = image.shape[:2]
75 | # If no rotation center is specified, the center of the image is set as the rotation center
76 | if center is None:
77 | center = (w / 2, h / 2)
78 | m = cv2.getRotationMatrix2D(center, random_angle, scale) #center:旋转中心坐标.angle:旋转角度,负号为逆时针,正号为顺时针.scale:缩放比例,1为等比例缩放
79 | rotated = cv2.warpAffine(image, m, (w, h))
80 | return rotated
81 |
82 | @staticmethod
83 | def transpose(image):
84 | """
85 | 水平垂直翻转
86 | :return: 旋转转之后的图像
87 | """
88 | random_angle = np.random.randint(-2, 2) #取[-1,1]的随机整数
89 | img_filp=cv2.flip(image,random_angle)
90 | return img_filp
91 |
92 | '''噪声抖动'''
93 |
94 | @staticmethod
95 | def randomColor(image):
96 | """
97 | 对图像进行颜色抖动
98 | :param image: PIL的图像image
99 | :return: 有颜色色差的图像image
100 | """
101 | saturation=random.randint(0,1)
102 | brightness=random.randint(0,1)
103 | contrast=random.randint(0,1)
104 | sharpness=random.randint(0,1)
105 | image=Image.fromarray(cv2.cvtColor(image,cv2.COLOR_BGR2RGB)) #转化为PIL.Image对象,才能使用ImageEnhance.Brightness(image)
106 | if random.random() < saturation:
107 | random_factor = np.random.randint(0, 31) / 10. # 随机因子
108 | image = ImageEnhance.Color(image).enhance(random_factor) # 调整图像的饱和度
109 | if random.random() < brightness:
110 | random_factor = np.random.randint(10, 21) / 10. # 随机因子
111 | image = ImageEnhance.Brightness(image).enhance(random_factor) # 调整图像的亮度
112 | if random.random() < contrast:
113 | random_factor = np.random.randint(10, 21) / 10. # 随机因1子
114 | image = ImageEnhance.Contrast(image).enhance(random_factor) # 调整图像对比度
115 | if random.random() < sharpness:
116 | random_factor = np.random.randint(0, 31) / 10. # 随机因子
117 | image= ImageEnhance.Sharpness(image).enhance(random_factor) # 调整图像锐度
118 | image=cv2.cvtColor(np.asarray(image),cv2.COLOR_RGB2BGR) #转换为cv格式
119 | return image
120 |
121 | @staticmethod
122 | def randomGaussian(image, mean=0.2, sigma=0.04):
123 | """
124 | 对图像进行高斯噪声处理
125 | mean:设置高斯分布的均值和方差
126 | sigma:设置高斯分布的标准差,sigma值越大,噪声越多
127 |
128 | 返回:
129 | gaussian_out : 噪声处理后的图片
130 | """
131 | # 将图片灰度标准化
132 | img = image / 255
133 | # 产生高斯 noise
134 | noise = np.random.normal(mean, sigma, img.shape)
135 | # 将噪声和图片叠加
136 | gaussian_out = img + noise
137 | # 将超过 1 的置 1,低于 0 的置 0
138 | gaussian_out = np.clip(gaussian_out, 0, 1)
139 | # 将图片灰度范围的恢复为 0-255
140 | gaussian_out = np.uint8(gaussian_out*255)
141 | # 将噪声范围搞为 0-255
142 | # noise = np.uint8(noise*255)
143 | return gaussian_out
144 |
145 | @staticmethod
146 | def Pepper_noise(image):
147 | '''
148 | 椒盐噪声
149 | '''
150 | #设置添加椒盐噪声的数目比例
151 | s_vs_p = 0.04
152 | #设置添加噪声图像像素的数目
153 | amount =0.03
154 | noisy_img = np.copy(image)
155 | #添加salt噪声
156 | num_salt = np.ceil(amount * image.size * s_vs_p)
157 | #设置添加噪声的坐标位置
158 | coords = [np.random.randint(0,i - 1, int(num_salt)) for i in image.shape]
159 | noisy_img[tuple(coords)] = 255
160 | #添加pepper噪声
161 | num_pepper = np.ceil(amount * image.size * (1. - s_vs_p))
162 | #设置添加噪声的坐标位置
163 | coords = [np.random.randint(0,i - 1, int(num_pepper)) for i in image.shape]
164 | noisy_img[tuple (coords)] = 0
165 | return noisy_img
166 |
167 | @staticmethod
168 | def Poisson_noise(image):
169 | '''泊松噪声'''
170 |
171 | #计算图像像素的分布范围
172 | vals = len(np.unique(image))
173 | vals = 2 ** np.ceil(np.log2(vals))
174 | #给图片添加泊松噪声
175 | noisy_img = np.random.poisson(image * vals) / float(vals)
176 | return noisy_img
177 |
178 | '''图像增强算法'''
179 |
180 | @staticmethod
181 | def hist(image):
182 | '''直方图均衡增强'''
183 | r, g, b = cv2.split(image)
184 | r1 = cv2.equalizeHist(r)
185 | g1 = cv2.equalizeHist(g)
186 | b1 = cv2.equalizeHist(b)
187 | image_equal_clo = cv2.merge([r1, g1, b1])
188 | return image_equal_clo
189 |
190 | @staticmethod
191 | def laplacian(image):
192 | '''拉普拉斯算子'''
193 | kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
194 | image_lap = cv2.filter2D(image, cv2.CV_8UC3, kernel)
195 | return image_lap
196 |
197 | @staticmethod
198 | def log(image):
199 | '''对数变换'''
200 | image_log = np.uint8(np.log(np.array(image) + 1))
201 | cv2.normalize(image_log, image_log, 0, 255, cv2.NORM_MINMAX)
202 | # 转换成8bit图像显示
203 | cv2.convertScaleAbs(image_log, image_log)
204 | return image_log
205 |
206 | @staticmethod
207 | def gamma(image):
208 | '''伽马变换'''
209 | fgamma = 0.5 #数值越大,生成的图片越黑
210 | image_gamma = np.uint8(np.power((np.array(image) / 255.0), fgamma) * 255.0)
211 | cv2.normalize(image_gamma, image_gamma, 0, 255, cv2.NORM_MINMAX)
212 | cv2.convertScaleAbs(image_gamma, image_gamma)
213 | return image_gamma
214 |
215 | @staticmethod
216 | def clahe(image):
217 | '''# 限制对比度自适应直方图均衡化CLAHE'''
218 | b, g, r = cv2.split(image)
219 | clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
220 | b = clahe.apply(b)
221 | g = clahe.apply(g)
222 | r = clahe.apply(r)
223 | image_clahe = cv2.merge([b, g, r])
224 | return image_clahe
225 |
226 | def __replaceZeroes(data):
227 | min_nonzero = min(data[np.nonzero(data)])
228 | data[data == 0] = min_nonzero
229 | return data
230 |
231 | def __SSR(src_img, size):
232 |
233 | L_blur = cv2.GaussianBlur(src_img, (size, size), 0)
234 | img =DataAugmentation.__replaceZeroes(src_img)
235 |
236 | L_blur =DataAugmentation. __replaceZeroes(L_blur)
237 |
238 | dst_Img = cv2.log(img/255.0)
239 | dst_Lblur = cv2.log(L_blur/255.0)
240 | dst_IxL = cv2.multiply(dst_Img, dst_Lblur)
241 | log_R = cv2.subtract(dst_Img, dst_IxL)
242 |
243 | dst_R = cv2.normalize(log_R,None, 0, 255, cv2.NORM_MINMAX)
244 | log_uint8 = cv2.convertScaleAbs(dst_R)
245 | return log_uint8
246 |
247 | @staticmethod
248 | def SSR_image(image):
249 | '''SSR_image'''
250 | size = 3
251 | b_gray, g_gray, r_gray = cv2.split(image)
252 | b_gray =DataAugmentation.__SSR(b_gray, size)
253 | g_gray =DataAugmentation.__SSR(g_gray, size)
254 | r_gray =DataAugmentation.__SSR(r_gray, size)
255 | result = cv2.merge([b_gray, g_gray, r_gray])
256 | return result
257 |
258 | # retinex MSR
259 | def __MSR(img, scales):
260 | weight = 2 / 3.0
261 | scales_size = len(scales)
262 | h, w = img.shape[:2]
263 | log_R = np.zeros((h, w), dtype=np.float32)
264 |
265 | for i in range(scales_size):
266 | img =DataAugmentation. __replaceZeroes(img)
267 | L_blur = cv2.GaussianBlur(img, (scales[i], scales[i]), 0)
268 | L_blur =DataAugmentation. __replaceZeroes(L_blur)
269 | dst_Img = cv2.log(img/255.0)
270 | dst_Lblur = cv2.log(L_blur/255.0)
271 | dst_Ixl = cv2.multiply(dst_Img, dst_Lblur)
272 | log_R += weight * cv2.subtract(dst_Img, dst_Ixl)
273 |
274 | dst_R = cv2.normalize(log_R,None, 0, 255, cv2.NORM_MINMAX)
275 | log_uint8 = cv2.convertScaleAbs(dst_R)
276 | return log_uint8
277 |
278 | @staticmethod
279 | def MSR_image(image):
280 | '''MSR_image'''
281 | scales = [15, 101, 301] # [3,5,9]
282 | b_gray, g_gray, r_gray = cv2.split(image)
283 | b_gray =DataAugmentation.__MSR(b_gray, scales)
284 | g_gray =DataAugmentation. __MSR(g_gray, scales)
285 | r_gray =DataAugmentation. __MSR(r_gray, scales)
286 | result = cv2.merge([b_gray, g_gray, r_gray])
287 | return result
288 |
289 |
290 | def imageOps(func_name, image1, img_des_path, img_file_name, times=1): #times=1每种方式,每张图片运行一次
291 | funcMap = {#"randomRotation": DataAugmentation.randomRotation,
292 | "randomcolor": DataAugmentation.randomColor,"transpose": DataAugmentation.transpose,
293 | "randomGaussian": DataAugmentation.randomGaussian, "pepper_noise": DataAugmentation.Pepper_noise,
294 | "Poisson_noise": DataAugmentation.Poisson_noise, "hist": DataAugmentation.hist,
295 | "laplacian": DataAugmentation.laplacian,"log": DataAugmentation.log,
296 | "gamma": DataAugmentation.gamma, "clahe": DataAugmentation.clahe,
297 | "SSR_image": DataAugmentation.SSR_image, "MSR_image": DataAugmentation.MSR_image
298 | }
299 | if funcMap.get(func_name) is None:
300 | logger.error("%s is not exist", func_name)
301 | return -1
302 |
303 | for _i in range(0, times, 1):
304 | new_image = funcMap[func_name](image1) #经过变化后的图片
305 | # print('new_image:',new_image)
306 | # path=os.path.join(img_des_path, func_name + str(_i) + img_file_name) #存图的新名字
307 | path=os.path.join(img_des_path, img_file_name)
308 | # print('new_filename:',path)
309 | cv2.imwrite (path,new_image)
310 |
311 |
312 | # opsList = {"transpose",'randomcolor',"gamma","MSR_image","pepper_noise","hist","log","clahe",'randomGaussian',
313 | # 'Poisson_noise','laplacian','SSR_image'}
314 | opsList = {"clahe"} #clahe图像增强效果较好
315 |
316 | def threadOPS(img_path, new_img_path):
317 | """
318 | 多线程处理事务
319 | :param src_path: 源文件
320 | :param des_path: 存放文件
321 | :return:
322 | """
323 | #img path
324 | if os.path.isdir(img_path):
325 | img_names = os.listdir(img_path)
326 | # print('img_names值为:',img_names)
327 | else:
328 | img_names = [img_path]
329 | # print('img_names1值为:',img_names)
330 |
331 | img_num = 0
332 |
333 | #img num
334 | for img_name in img_names:
335 | tmp_img_name = os.path.join(img_path, img_name)
336 | if os.path.isdir(tmp_img_name):
337 | print('contain file folder')
338 | exit()
339 | else:
340 | img_num = img_num + 1
341 | num = img_num
342 | # print("num数值为:",num )
343 |
344 |
345 | for i in range(num):
346 | img_name = img_names[i]
347 | # print("img_name:",img_name)
348 | tmp_img_name = os.path.join(img_path, img_name)
349 | # 读取文件并进行操作
350 | image1 = DataAugmentation.openImage(tmp_img_name)
351 | # print("读取文件image:",image1)
352 |
353 | # threadImage =[0] * 12 #定义一个元组,其长度为12.
354 | threadImage ={} #定义为空字典类型。用来装线程结果信息
355 | _index = 0
356 | for ops_name in opsList:
357 | # print("ops_name:",ops_name)
358 | #创建一个新线程
359 | threadImage[_index] = threading.Thread(target=imageOps,
360 | args=(ops_name, image1, new_img_path,img_name))
361 | print('threadImage[{}]:{}'.format(_index,threadImage))
362 | threadImage[_index].start() #启动线程
363 | _index += 1 #显示每个线程的起停位置
364 | time.sleep(0.2) #线程执行的时间
365 |
366 |
367 | if __name__ == '__main__':
368 | threadOPS(#r"F:\Desktop\PCB_code\date_set\1shujuchuli",
369 | #r"F:\Desktop\PCB_code\date_set\2shujucunfang"
370 | r'F:\Desktop\PCB_code\data_set1\data_shiyan',
371 | r'F:\Desktop\PCB_code\data_set1\data_shiyan_kuochong')
372 |
373 | # read_path(r'F:\Desktop\PCB_code\data_set1\data_shiyan') #图片裁剪
374 |
375 |
376 | '''
377 | 路径问题:
378 | 关于上述路径中,\table\name\rain中的\t,\n,\r都易被识别为转义字符。
379 | 解决的办法主要由以下三种:
380 | #1
381 | path=r"C:\data\table\name\rain"
382 | #前面加r表示不转义
383 |
384 | #2
385 | path="C:\\data\\table\\name\\rain"
386 | #用\\代替\
387 |
388 | #3
389 | path="C:/data/table/name/rain"
390 | #用\代替/
391 |
392 | '''
393 |
394 |
--------------------------------------------------------------------------------
/get_map.py:
--------------------------------------------------------------------------------
1 | import os
2 | import xml.etree.ElementTree as ET
3 |
4 | from PIL import Image
5 | from tqdm import tqdm
6 |
7 | from utils.utils import get_classes
8 | from utils.utils_map import get_coco_map, get_map
9 | from frcnn_predict import FRCNN
10 |
11 | if __name__ == "__main__":
12 | '''
13 | Recall和Precision不像AP是一个面积的概念,因此在门限值(Confidence)不同时,网络的Recall和Precision值是不同的。
14 | 默认情况下,本代码计算的Recall和Precision代表的是当门限值(Confidence)为0.5时,所对应的Recall和Precision值。
15 |
16 | 受到mAP计算原理的限制,网络在计算mAP时需要获得近乎所有的预测框,这样才可以计算不同门限条件下的Recall和Precision值
17 | 因此,本代码获得的map_out/detection-results/里面的txt的框的数量一般会比直接predict多一些,目的是列出所有可能的预测框,
18 | '''
19 | #------------------------------------------------------------------------------------------------------------------#
20 | # map_mode用于指定该文件运行时计算的内容
21 | # map_mode为0代表整个map计算流程,包括获得预测结果、获得真实框、计算VOC_map。
22 | # map_mode为1代表仅仅获得预测结果。
23 | # map_mode为2代表仅仅获得真实框。
24 | # map_mode为3代表仅仅计算VOC_map。
25 | # map_mode为4代表利用COCO工具箱计算当前数据集的0.50:0.95map。需要获得预测结果、获得真实框后并安装pycocotools才行
26 | #-------------------------------------------------------------------------------------------------------------------#
27 | map_mode = 0
28 | #--------------------------------------------------------------------------------------#
29 | # 此处的classes_path用于指定需要测量VOC_map的类别
30 | # 一般情况下与训练和预测所用的classes_path一致即可
31 | #--------------------------------------------------------------------------------------#
32 | classes_path = 'PCB_DataSet/cls_classes.txt'
33 | #--------------------------------------------------------------------------------------#
34 | # MINOVERLAP用于指定想要获得的mAP0.x,mAP0.x的意义是什么请同学们百度一下。
35 | # 比如计算mAP0.75,可以设定MINOVERLAP = 0.75。
36 | #
37 | # 当某一预测框与真实框重合度大于MINOVERLAP时,该预测框被认为是正样本,否则为负样本。
38 | # 因此MINOVERLAP的值越大,预测框要预测的越准确才能被认为是正样本,此时算出来的mAP值越低,
39 | #--------------------------------------------------------------------------------------#
40 | MINOVERLAP = 0.5
41 | #--------------------------------------------------------------------------------------#
42 | # 受到mAP计算原理的限制,网络在计算mAP时需要获得近乎所有的预测框,这样才可以计算mAP
43 | # 因此,confidence的值应当设置的尽量小进而获得全部可能的预测框。
44 | #
45 | # 该值一般不调整。因为计算mAP需要获得近乎所有的预测框,此处的confidence不能随便更改。
46 | # 想要获得不同门限值下的Recall和Precision值,请修改下方的score_threhold。
47 | #--------------------------------------------------------------------------------------#
48 | confidence = 0.02
49 | #--------------------------------------------------------------------------------------#
50 | # 预测时使用到的非极大抑制值的大小,越大表示非极大抑制越不严格。
51 | #
52 | # 该值一般不调整。
53 | #--------------------------------------------------------------------------------------#
54 | nms_iou = 0.5
55 | #---------------------------------------------------------------------------------------------------------------#
56 | # Recall和Precision不像AP是一个面积的概念,因此在门限值不同时,网络的Recall和Precision值是不同的。
57 | #
58 | # 默认情况下,本代码计算的Recall和Precision代表的是当门限值为0.5(此处定义为score_threhold)时所对应的Recall和Precision值。
59 | # 因为计算mAP需要获得近乎所有的预测框,上面定义的confidence不能随便更改。
60 | # 这里专门定义一个score_threhold用于代表门限值,进而在计算mAP时找到门限值对应的Recall和Precision值。
61 | #---------------------------------------------------------------------------------------------------------------#
62 | score_threhold = 0.5
63 | #-------------------------------------------------------#
64 | # map_vis用于指定是否开启VOC_map计算的可视化
65 | #-------------------------------------------------------#
66 | map_vis = False
67 | #-------------------------------------------------------#
68 | # 指向VOC数据集所在的文件夹
69 | # 默认指向根目录下的VOC数据集
70 | #-------------------------------------------------------#
71 | VOCdevkit_path = r'F:\Desktop\PCB_code\PCB_DataSet'
72 | #-------------------------------------------------------#
73 | # 结果输出的文件夹,默认为map_out
74 | #-------------------------------------------------------#
75 | map_out_path = 'faster-rcnn-pytorch-master/map_out'
76 |
77 | image_ids = open(os.path.join(VOCdevkit_path, "ImageSets/test.txt")).read().strip().split()
78 |
79 | if not os.path.exists(map_out_path):
80 | os.makedirs(map_out_path)
81 | if not os.path.exists(os.path.join(map_out_path, 'ground-truth')):
82 | os.makedirs(os.path.join(map_out_path, 'ground-truth'))
83 | if not os.path.exists(os.path.join(map_out_path, 'detection-results')):
84 | os.makedirs(os.path.join(map_out_path, 'detection-results'))
85 | if not os.path.exists(os.path.join(map_out_path, 'images-optional')):
86 | os.makedirs(os.path.join(map_out_path, 'images-optional'))
87 |
88 | class_names, _ = get_classes(classes_path)
89 |
90 | if map_mode == 0 or map_mode == 1:
91 | print("Load model.")
92 | frcnn = FRCNN(confidence = confidence, nms_iou = nms_iou)
93 | print("Load model done.")
94 |
95 | print("Get predict result.")
96 | for image_id in tqdm(image_ids):
97 | image_path = os.path.join(VOCdevkit_path, "JPEGImages/"+image_id+".jpg")
98 | image = Image.open(image_path)
99 | if map_vis:
100 | image.save(os.path.join(map_out_path, "images-optional/" + image_id + ".jpg"))
101 | frcnn.get_map_txt(image_id, image, class_names, map_out_path)
102 | print("Get predict result done.")
103 |
104 | if map_mode == 0 or map_mode == 2:
105 | print("Get ground truth result.")
106 | for image_id in tqdm(image_ids):
107 | with open(os.path.join(map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f:
108 | root = ET.parse(os.path.join(VOCdevkit_path, "Annotations/"+image_id+".xml")).getroot()
109 | for obj in root.findall('object'):
110 | difficult_flag = False
111 | if obj.find('difficult')!=None:
112 | difficult = obj.find('difficult').text
113 | if int(difficult)==1:
114 | difficult_flag = True
115 | obj_name = obj.find('name').text
116 | if obj_name not in class_names:
117 | continue
118 | bndbox = obj.find('bndbox')
119 | left = bndbox.find('xmin').text
120 | top = bndbox.find('ymin').text
121 | right = bndbox.find('xmax').text
122 | bottom = bndbox.find('ymax').text
123 |
124 | if difficult_flag:
125 | new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom))
126 | else:
127 | new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))
128 | print("Get ground truth result done.")
129 |
130 | if map_mode == 0 or map_mode == 3:
131 | print("Get map.")
132 | get_map(MINOVERLAP, True, score_threhold = score_threhold, path = map_out_path)
133 | print("Get map done.")
134 |
135 | if map_mode == 4:
136 | print("Get map.")
137 | get_coco_map(class_names = class_names, path = map_out_path)
138 | print("Get map done.")
139 |
--------------------------------------------------------------------------------
/nets/FasterRCNN_train.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch
3 |
4 | from nets.classifier import Resnet50RoIHead, Resnet101RoIHead, VGG16RoIHead,Resnet50_FPNRoIHead
5 | from nets.vgg16 import decom_vgg16
6 | from nets.resnet50 import resnet50
7 | from nets.resnet101 import resnet101
8 | from nets.resnet50_FPN import resnet50_FPN
9 | from nets.rpn import RegionProposalNetwork, resnet50_fpn_RPNhead
10 |
11 |
12 |
13 | class FasterRCNN(nn.Module):
14 | def __init__(self, num_classes,
15 | mode = "training",
16 | feat_stride = 16,
17 | anchor_scales = [4, 16, 32],
18 | ratios = [0.5, 1, 2],
19 | backbone = 'vgg',
20 | pretrained = False):
21 | super(FasterRCNN, self).__init__() #对继承自父类的属性进行初始化,且用父类的初始化方法来初始化继承的属性
22 | self.feat_stride = feat_stride
23 | #---------------------------------#
24 | # vgg和resnet50,resnet101,resnet50_FPN主干网络
25 | #---------------------------------#
26 | if backbone == 'vgg':
27 | self.extractor, classifier = decom_vgg16(pretrained)
28 | #---------------------------------#
29 | # 构建建议框网络
30 | #---------------------------------#
31 | self.rpn = RegionProposalNetwork(
32 | 512, 512,
33 | ratios = ratios,
34 | anchor_scales = anchor_scales,
35 | feat_stride = self.feat_stride,
36 | mode = mode
37 | )
38 | #---------------------------------#
39 | # 构建分类器网络
40 | #---------------------------------#
41 | self.head = VGG16RoIHead(
42 | n_class = num_classes + 1,
43 | roi_size = 7,
44 | spatial_scale = 1,
45 | classifier = classifier
46 | )
47 |
48 | elif backbone == 'resnet50':
49 | # 获得图像的特征层和分类层特征信息
50 | self.extractor, classifier = resnet50(pretrained)
51 | #---------------------------------#
52 | # 构建建议框Proposal卷积网络
53 | #---------------------------------#
54 | self.rpn = RegionProposalNetwork(
55 | 1024, 512,
56 | ratios = ratios,
57 | anchor_scales = anchor_scales,
58 | feat_stride = self.feat_stride,
59 | mode = mode
60 | )
61 | #---------------------------------#
62 | # 构建classifier网络
63 | #---------------------------------#
64 | self.head = Resnet50RoIHead(
65 | n_class = num_classes + 1,
66 | roi_size = 14,
67 | spatial_scale = 1,
68 | classifier = classifier
69 | )
70 |
71 | elif backbone=='resnet101':
72 | self.extractor, classifier = resnet101(pretrained)
73 | #---------------------------------#
74 | # 构建建议框Proposal卷积网络
75 | #---------------------------------#
76 | self.rpn = RegionProposalNetwork(
77 | 1024, 512,
78 | ratios = ratios,
79 | anchor_scales = anchor_scales,
80 | feat_stride = self.feat_stride,
81 | mode = mode
82 | )
83 | #---------------------------------#
84 | # 构建classifier网络
85 | #---------------------------------#
86 | self.head = Resnet101RoIHead(
87 | n_class = num_classes + 1,
88 | roi_size = 14,
89 | spatial_scale = 1,
90 | classifier = classifier)
91 |
92 | elif backbone=='resnet50_FPN':
93 | self.extractor, classifier = resnet50_FPN(pretrained)
94 | #---------------------------------#
95 | # 构建建议框Proposal卷积网络
96 | #---------------------------------#
97 | ratios = ratios*len(anchor_scales)
98 | self.rpn = resnet50_fpn_RPNhead(
99 | 256, 256,
100 | ratios = ratios,
101 | anchor_scales = anchor_scales,
102 | feat_stride = self.feat_stride,
103 | mode = mode
104 | )
105 | #---------------------------------#
106 | # 构建classifier网络
107 | #---------------------------------#
108 | self.head = Resnet50_FPNRoIHead(
109 | n_class = num_classes + 1,
110 | roi_size = 14,
111 | spatial_scale = 1,
112 | classifier = classifier)
113 |
114 | #x= [base_feature, img_size],在Suggestion_box.FasterRCNNTrainer.forward()产生
115 | def forward(self, x, scale=1., mode="forward"):
116 | if mode == "forward":
117 | #---------------------------------#
118 | # 计算输入图片的大小
119 | #---------------------------------#
120 | img_size = x.shape[2:]
121 | #---------------------------------#
122 | # 利用主干网络提取特征
123 | #---------------------------------#
124 | base_feature = self.extractor.forward(x)
125 |
126 | #---------------------------------#
127 | # 获得建议框
128 | #---------------------------------#
129 | _, _, rois, roi_indices, _ = self.rpn.forward(base_feature, img_size, scale)
130 | #---------------------------------------#
131 | # 获得classifier的分类结果和回归结果
132 | #---------------------------------------#
133 | roi_cls_locs, roi_scores = self.head.forward(base_feature, rois, roi_indices, img_size)
134 | return roi_cls_locs, roi_scores, rois, roi_indices
135 | elif mode == "extractor":
136 | #---------------------------------#
137 | # 利用主干网络提取特征,resnet50网络特征提取
138 | #---------------------------------#
139 | base_feature = self.extractor.forward(x)
140 | return base_feature
141 | elif mode == "rpn":
142 | base_feature, img_size = x
143 | #---------------------------------#
144 | # 获得建议框
145 | #---------------------------------#
146 | rpn_locs, rpn_scores, rois, roi_indices, anchor = self.rpn.forward(base_feature, img_size, scale)
147 | return rpn_locs, rpn_scores, rois, roi_indices, anchor
148 | elif mode == "head":
149 | base_feature, rois, roi_indices, img_size = x
150 | #---------------------------------------#
151 | # 获得classifier的分类结果和回归结果
152 | #---------------------------------------#
153 | roi_cls_locs, roi_scores = self.head.forward(base_feature, rois, roi_indices, img_size)
154 | return roi_cls_locs, roi_scores
155 | elif mode == "fpn_head":
156 | base_feature, rois, roi_indices, img_size = x
157 | #---------------------------------------#
158 | # 获得classifier的分类结果和回归结果
159 | # 取p2~p5层进行分类结果预测
160 | #---------------------------------------#
161 | roi_cls_locs, roi_scores = self.head.forward(base_feature[:4], rois[:4], roi_indices[:4], img_size)
162 | return roi_cls_locs, roi_scores
163 |
164 | def freeze_bn(self):
165 | for m in self.modules():
166 | if isinstance(m, nn.BatchNorm2d):
167 | m.eval()
168 |
--------------------------------------------------------------------------------
/nets/__init__.py:
--------------------------------------------------------------------------------
1 | #
--------------------------------------------------------------------------------
/nets/__pycache__/FasterRCNN_train.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/FasterRCNN_train.cpython-37.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/FasterRCNN_train.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/FasterRCNN_train.cpython-39.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/Suggestion_box.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/Suggestion_box.cpython-37.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/Suggestion_box.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/Suggestion_box.cpython-39.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/__init__.cpython-39.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/classifier.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/classifier.cpython-37.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/classifier.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/classifier.cpython-39.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/faster_rcnn_feature_extraction.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/faster_rcnn_feature_extraction.cpython-39.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/feature_extraction.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/feature_extraction.cpython-39.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/feature_pyramid_network.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/feature_pyramid_network.cpython-39.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/frcnn.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/frcnn.cpython-39.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/frcnn_training.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/frcnn_training.cpython-39.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/resnet101.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/resnet101.cpython-37.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/resnet101.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/resnet101.cpython-39.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/resnet50.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/resnet50.cpython-37.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/resnet50.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/resnet50.cpython-39.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/resnet50_FPN.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/resnet50_FPN.cpython-37.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/resnet50_FPN.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/resnet50_FPN.cpython-39.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/rpn.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/rpn.cpython-37.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/rpn.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/rpn.cpython-39.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/vgg16.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/vgg16.cpython-37.pyc
--------------------------------------------------------------------------------
/nets/__pycache__/vgg16.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/vgg16.cpython-39.pyc
--------------------------------------------------------------------------------
/nets/classifier.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | import torch
4 | from torch import nn
5 | from torchvision.ops import RoIPool,MultiScaleRoIAlign,RoIAlign
6 | from collections import OrderedDict
7 |
8 | warnings.filterwarnings("ignore")
9 |
10 | class VGG16RoIHead(nn.Module):
11 | def __init__(self, n_class, roi_size, spatial_scale, classifier):
12 | super(VGG16RoIHead, self).__init__()
13 | self.classifier = classifier
14 | #--------------------------------------#
15 | # 对ROIPooling后的的结果进行回归预测
16 | #--------------------------------------#
17 | self.cls_loc = nn.Linear(4096, n_class * 4)
18 | #-----------------------------------#
19 | # 对ROIPooling后的的结果进行分类
20 | #-----------------------------------#
21 | self.score = nn.Linear(4096, n_class)
22 | #-----------------------------------#
23 | # 权值初始化
24 | #-----------------------------------#
25 | normal_init(self.cls_loc, 0, 0.001)
26 | normal_init(self.score, 0, 0.01)
27 |
28 | self.roi = RoIPool((roi_size, roi_size), spatial_scale)
29 |
30 | def forward(self, x, rois, roi_indices, img_size):
31 | n, _, _, _ = x.shape
32 | if x.is_cuda:
33 | roi_indices = roi_indices.cuda()
34 | rois = rois.cuda()
35 | rois = torch.flatten(rois, 0, 1)
36 | roi_indices = torch.flatten(roi_indices, 0, 1)
37 |
38 | rois_feature_map = torch.zeros_like(rois)
39 | rois_feature_map[:, [0,2]] = rois[:, [0,2]] / img_size[1] * x.size()[3]
40 | rois_feature_map[:, [1,3]] = rois[:, [1,3]] / img_size[0] * x.size()[2]
41 |
42 | indices_and_rois = torch.cat([roi_indices[:, None], rois_feature_map], dim=1)
43 | #-----------------------------------#
44 | # 利用建议框对公用特征层进行截取
45 | #-----------------------------------#
46 | pool = self.roi(x, indices_and_rois)
47 | #-----------------------------------#
48 | # 利用classifier网络进行特征提取
49 | #-----------------------------------#
50 | pool = pool.view(pool.size(0), -1)
51 | #--------------------------------------------------------------#
52 | # 当输入为一张图片的时候,这里获得的f7的shape为[300, 4096]
53 | #--------------------------------------------------------------#
54 | fc7 = self.classifier(pool)
55 |
56 | roi_cls_locs = self.cls_loc(fc7)
57 | roi_scores = self.score(fc7)
58 |
59 | roi_cls_locs = roi_cls_locs.view(n, -1, roi_cls_locs.size(1))
60 | roi_scores = roi_scores.view(n, -1, roi_scores.size(1))
61 | return roi_cls_locs, roi_scores
62 |
63 | # faster rcnn 网络部分的分类
64 | class Resnet50RoIHead(nn.Module):
65 | def __init__(self, n_class, roi_size, spatial_scale, classifier):
66 | super(Resnet50RoIHead, self).__init__()
67 | self.classifier = classifier
68 | #--------------------------------------#
69 | # 对ROIPooling后的的结果进行回归预测
70 | # in_features:2048 -> out_features:n_class * 4
71 | #--------------------------------------#
72 | self.cls_loc = nn.Linear(2048, n_class * 4)
73 | #-----------------------------------#
74 | # 对ROIPooling后的的结果进行分类
75 | #-----------------------------------#
76 | self.score = nn.Linear(2048, n_class )
77 | #-----------------------------------#
78 | # 权值初始化
79 | #-----------------------------------#
80 | normal_init(self.cls_loc, 0, 0.001)
81 | normal_init(self.score, 0, 0.01)
82 |
83 | self.roi = RoIPool((roi_size, roi_size), spatial_scale)
84 |
85 | def forward(self, x, rois, roi_indices, img_size):
86 | n, _, _, _ = x.shape
87 | if x.is_cuda:
88 | roi_indices = roi_indices.cuda()
89 | rois = rois.cuda()
90 | rois = torch.flatten(rois, 0, 1)
91 | roi_indices = torch.flatten(roi_indices, 0, 1)
92 |
93 | # 对特征层的建议框进行缩放
94 | rois_feature_map = torch.zeros_like(rois)
95 | rois_feature_map[:, [0,2]] = rois[:, [0,2]] / img_size[1] * x.size()[3]
96 | rois_feature_map[:, [1,3]] = rois[:, [1,3]] / img_size[0] * x.size()[2]
97 |
98 | indices_and_rois = torch.cat([roi_indices[:, None], rois_feature_map], dim=1)
99 | #-----------------------------------#
100 | # 利用建议框对公用特征层进行截取
101 | # 得到一个将预选框映射到一个预测特征层
102 | #-----------------------------------#
103 | pool = self.roi(x, indices_and_rois)
104 | #-----------------------------------#
105 | # 利用classifier网络进行特征提取
106 | #-----------------------------------#
107 | fc = self.classifier(pool)
108 | #--------------------------------------------------------------#
109 | # 当输入为一张图片的时候,这里获得的f7的shape为[300, 2048]
110 | #--------------------------------------------------------------#
111 | fc7 = fc.view(fc.size(0), -1)
112 |
113 | roi_cls_locs = self.cls_loc(fc7)
114 | roi_scores = self.score(fc7)
115 | roi_cls_locs = roi_cls_locs.view(n, -1, roi_cls_locs.size(1))
116 | roi_scores = roi_scores.view(n, -1, roi_scores.size(1))
117 | return roi_cls_locs, roi_scores
118 |
119 |
120 | class Resnet50_FPNRoIHead(nn.Module):
121 | def __init__(self, n_class, roi_size, spatial_scale, classifier):
122 | super(Resnet50_FPNRoIHead, self).__init__()
123 | self.classifier = classifier
124 | #--------------------------------------#
125 | # 对ROIPooling后的结果进行全连接
126 | # 最大池化层
127 | #--------------------------------------#
128 | self.maxpool =nn.AdaptiveMaxPool2d(7) # output size = (1, 1)
129 | self.fc = nn.Linear(12544, 1024)
130 | #--------------------------------------#
131 | # 对ROIPooling后的结果进行回归预测
132 | # in_features:256 -> out_features:n_class * 4
133 | #--------------------------------------#
134 | self.cls_loc = nn.Linear(1024, n_class * 4)
135 | #-----------------------------------#
136 | # 对ROIPooling后的的结果进行分类
137 | #-----------------------------------#
138 | self.score = nn.Linear(1024, n_class)
139 | #-----------------------------------#
140 | # 权值初始化
141 | #-----------------------------------#
142 | normal_init(self.fc, 0, 0.001)
143 | normal_init(self.cls_loc, 0, 0.001)
144 | normal_init(self.score, 0, 0.01)
145 |
146 | self.roi = MultiScaleRoIAlign(featmap_names=['p2', 'p3', 'p4', 'p5'], output_size=7, sampling_ratio=2, canonical_scale=600, canonical_level=4)
147 | # self.roi = RoIAlign(output_size=7, sampling_ratio=2, spatial_scale=1)
148 |
149 | def forward(self, x, rois, roi_indices, img_size):
150 | # import time
151 | # start=time.time()
152 | # 将列表转换为OrderedDict类型,MultiScaleRoIAlign()数据准备
153 | Ordered_x = OrderedDict(p2=x[0],p3=x[1],p4=x[2],p5=x[3])
154 | # img_size_p1 = [(img_size[0], img_size[1])]
155 | rois_p = []
156 | # 将每层背景和前景的建议框进行合并
157 | for p in range(len(x)):
158 | n, _, _, _ = x[p].shape
159 | if x[p].is_cuda:
160 | roi = rois[p].cuda()
161 | rois_p.append(torch.flatten(roi, 0, 1))
162 | else:
163 | rois_p.append(torch.flatten(rois[p], 0, 1))
164 |
165 |
166 | # 将p2~p4层Proposal框合并为一个[tensor]
167 | rois_x = [torch.cat(rois_p, dim=0)]
168 | # 使用MultiScaleRoIAlign()在多尺度特征层进行预测
169 | pool = self.roi(Ordered_x, rois_x, [img_size])
170 | #--------------------------------------------------------------#
171 | # 将 rois 的信息在第一维度上进行展平操作
172 | # 当输入为一张图片的时候,这里获得的f7的shape为[1024, 12544]
173 | #--------------------------------------------------------------#
174 | fc=self.maxpool(pool)
175 | fc = pool.view(pool.size(0), -1)
176 | # 进行全链接层。最终输出 1024 维的特征向量。
177 | fc7 = self.fc(fc)
178 |
179 | roi_cls_locs = self.cls_loc(fc7)
180 | roi_scores = self.score(fc7)
181 | roi_cls_locs = roi_cls_locs.view(n, -1, roi_cls_locs.size(1))
182 | roi_scores = roi_scores.view(n, -1, roi_scores.size(1))
183 |
184 | # end=time.time()-start
185 | # print("代码运行时间:",end)
186 | return roi_cls_locs, roi_scores
187 |
188 |
189 | class Resnet101RoIHead(nn.Module):
190 | def __init__(self, n_class, roi_size, spatial_scale, classifier):
191 | super(Resnet101RoIHead, self).__init__()
192 | self.classifier = classifier
193 | #--------------------------------------#
194 | # 对ROIPooling后的的结果进行回归预测
195 | #--------------------------------------#
196 | self.cls_loc = nn.Linear(2048, n_class * 4)
197 | #-----------------------------------#
198 | # 对ROIPooling后的的结果进行分类
199 | #-----------------------------------#
200 | self.score = nn.Linear(2048, n_class)
201 | #-----------------------------------#
202 | # 权值初始化
203 | #-----------------------------------#
204 | normal_init(self.cls_loc, 0, 0.001)
205 | normal_init(self.score, 0, 0.01)
206 |
207 | self.roi = RoIPool((roi_size, roi_size), spatial_scale)
208 |
209 | def forward(self, x, rois, roi_indices, img_size):
210 | n, _, _, _ = x.shape
211 | if x.is_cuda:
212 | roi_indices = roi_indices.cuda()
213 | rois = rois.cuda()
214 | rois = torch.flatten(rois, 0, 1)
215 | roi_indices = torch.flatten(roi_indices, 0, 1)
216 |
217 | rois_feature_map = torch.zeros_like(rois)
218 | rois_feature_map[:, [0,2]] = rois[:, [0,2]] / img_size[1] * x.size()[3]
219 | rois_feature_map[:, [1,3]] = rois[:, [1,3]] / img_size[0] * x.size()[2]
220 |
221 | indices_and_rois = torch.cat([roi_indices[:, None], rois_feature_map], dim=1)
222 | #-----------------------------------#
223 | # 利用建议框对公用特征层进行截取
224 | #-----------------------------------#
225 | pool = self.roi(x, indices_and_rois)
226 | #-----------------------------------#
227 | # 利用classifier网络进行特征提取
228 | #-----------------------------------#
229 | fc7 = self.classifier(pool)
230 | #--------------------------------------------------------------#
231 | # 当输入为一张图片的时候,这里获得的f7的shape为[300, 2048]
232 | #--------------------------------------------------------------#
233 | fc7 = fc7.view(fc7.size(0), -1)
234 |
235 | roi_cls_locs = self.cls_loc(fc7)
236 | roi_scores = self.score(fc7)
237 | roi_cls_locs = roi_cls_locs.view(n, -1, roi_cls_locs.size(1))
238 | roi_scores = roi_scores.view(n, -1, roi_scores.size(1))
239 | return roi_cls_locs, roi_scores
240 |
241 |
242 | def normal_init(m, mean, stddev, truncated=False):
243 | if truncated:
244 | m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation
245 | else:
246 | m.weight.data.normal_(mean, stddev)
247 | m.bias.data.zero_()
248 |
--------------------------------------------------------------------------------
/nets/resnet101.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import math
3 | from torch.hub import load_state_dict_from_url
4 |
5 | class Bottleneck(nn.Module):
6 | """
7 | 注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。
8 | 但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2,
9 | 这么做的好处是能够在top1上提升大概0.5%的准确率。
10 | 可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
11 | """
12 | expansion = 4
13 | def __init__(self, inplanes, planes, stride=1, downsample=None):
14 | super(Bottleneck, self).__init__()
15 | #1*1的卷积压缩通道数
16 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
17 | self.bn1 = nn.BatchNorm2d(planes)
18 | #3*3卷积特征提取
19 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
20 | self.bn2 = nn.BatchNorm2d(planes)
21 | #1*1复原通道数
22 |
23 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
24 | self.bn3 = nn.BatchNorm2d(planes * 4)
25 |
26 | self.relu = nn.ReLU(inplace=True)
27 | self.downsample = downsample
28 | self.stride = stride
29 |
30 | def forward(self, x):
31 | residual = x
32 |
33 | out = self.conv1(x)
34 | out = self.bn1(out)
35 | out = self.relu(out)
36 |
37 | out = self.conv2(out)
38 | out = self.bn2(out)
39 | out = self.relu(out)
40 |
41 | out = self.conv3(out)
42 | out = self.bn3(out)
43 | if self.downsample is not None:
44 | residual = self.downsample(x) #判断是否有残差边,有残差边即为:输入维度和输出维度发生改变,对应conv block
45 | #无残差边:输入维度=输出维度,对应identity block
46 |
47 | out += residual
48 | out = self.relu(out)
49 |
50 | return out
51 |
52 | class ResNet101(nn.Module):
53 | def __init__(self, block, layers, num_classes=1000):
54 | #-----------------------------------#
55 | # 假设输入进来的图片是600,600,3
56 | #-----------------------------------#
57 | self.inplanes = 64
58 | super(ResNet101, self).__init__()
59 |
60 | # input(600,600,3) -> conv2d stride(300,300,64)
61 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)#输入3通道,卷积核大小kernel_size=7*7,
62 | #步长stride=2,输出通道数=64,bias偏移量
63 | self.bn1 = nn.BatchNorm2d(64) #标准化(归一化)
64 | self.relu = nn.ReLU(inplace=True) #激活函数
65 |
66 | # 300,300,64 -> 150,150,64 最大池化
67 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True)
68 |
69 | # 150,150,64 -> 150,150,256
70 | self.layer1 = self._make_layer(block, 64, layers[0])
71 | # 150,150,256 -> 75,75,512
72 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
73 | # 75,75,512 -> 38,38,1024 到这里可以获得一个38,38,1024的共享特征层
74 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
75 | # self.layer4被用在classifier模型中
76 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
77 |
78 | self.avgpool = nn.AvgPool2d(7)
79 | self.fc = nn.Linear(512 * block.expansion, num_classes)
80 |
81 | for m in self.modules():
82 | if isinstance(m, nn.Conv2d):
83 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
84 | m.weight.data.normal_(0, math.sqrt(2. / n))
85 | elif isinstance(m, nn.BatchNorm2d):
86 | m.weight.data.fill_(1)
87 | m.bias.data.zero_()
88 |
89 | # 构建resnet残差结构layer1.。。。layer5
90 | def _make_layer(self, block, planes, blocks, stride=1):
91 | downsample = None
92 | #-------------------------------------------------------------------#
93 | # 当模型需要进行高和宽的压缩的时候,就需要用到残差边的downsample(下采样)
94 | #-------------------------------------------------------------------#
95 | if stride != 1 or self.inplanes != planes * block.expansion:
96 | downsample = nn.Sequential(
97 | nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride, bias=False),
98 | nn.BatchNorm2d(planes * block.expansion),
99 | )
100 | layers = []
101 | layers.append(block(self.inplanes, planes, stride, downsample))
102 | self.inplanes = planes * block.expansion
103 | for i in range(1, blocks):
104 | layers.append(block(self.inplanes, planes))
105 | return nn.Sequential(*layers)
106 |
107 | def forward(self, x):
108 | x = self.conv1(x)
109 | x = self.bn1(x)
110 | x = self.relu(x)
111 | x = self.maxpool(x)
112 |
113 | x = self.layer1(x)
114 | x = self.layer2(x)
115 | x = self.layer3(x)
116 | x = self.layer4(x)
117 |
118 | x = self.avgpool(x)
119 | x = x.view(x.size(0), -1) #维度变化
120 | x = self.fc(x) #全连接层
121 | return x
122 |
123 | def resnet101(pretrained = False):
124 | model = ResNet101(Bottleneck, [3, 4, 23, 3]) #对应resnet101的网络结构shape,第五次压缩是在roi中使用,有3个bottleneck。
125 | if pretrained:
126 | state_dict = load_state_dict_from_url("https://download.pytorch.org/models/resnet101-5d3b4d8f.pth", model_dir="./model_data")
127 | model.load_state_dict(state_dict)
128 | #----------------------------------------------------------------------------#
129 | # 获取特征提取部分,从conv1到model.layer3,最终获得一个38,38,1024的特征层
130 | #----------------------------------------------------------------------------#
131 | features = list([model.conv1, model.bn1, model.relu, model.maxpool, model.layer1, model.layer2, model.layer3])
132 | #----------------------------------------------------------------------------#
133 | # 获取分类部分,从model.layer4到model.avgpool
134 | #----------------------------------------------------------------------------#
135 | classifier = list([model.layer4, model.avgpool])
136 |
137 | features = nn.Sequential(*features)
138 | classifier = nn.Sequential(*classifier)
139 | return features, classifier
140 |
--------------------------------------------------------------------------------
/nets/resnet50.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import torch.nn as nn
4 | from torch.hub import load_state_dict_from_url
5 |
6 |
7 | class Bottleneck(nn.Module):
8 | """
9 | 注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。
10 | 但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2,
11 | 这么做的好处是能够在top1上提升大概0.5%的准确率。
12 | 可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
13 | """
14 | expansion = 4
15 | def __init__(self, inplanes, planes, stride=1, downsample=None):
16 | super(Bottleneck, self).__init__()
17 | #1*1的卷积压缩通道数
18 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
19 | self.bn1 = nn.BatchNorm2d(planes)
20 | #3*3卷积特征提取
21 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
22 | self.bn2 = nn.BatchNorm2d(planes)
23 | #1*1复原通道数
24 |
25 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
26 | self.bn3 = nn.BatchNorm2d(planes * 4)
27 |
28 | self.relu = nn.ReLU(inplace=True)
29 | self.downsample = downsample
30 | self.stride = stride
31 |
32 | def forward(self, x):
33 | residual = x
34 |
35 | out = self.conv1(x)
36 | out = self.bn1(out)
37 | out = self.relu(out)
38 |
39 | out = self.conv2(out)
40 | out = self.bn2(out)
41 | out = self.relu(out)
42 |
43 | out = self.conv3(out)
44 | out = self.bn3(out)
45 | if self.downsample is not None:
46 | residual = self.downsample(x) #判断是否有残差边,有残差边即为:输入维度和输出维度发生改变,对应conv block
47 | #无残差边:输入维度=输出维度,对应identity block
48 |
49 | out += residual
50 | out = self.relu(out)
51 |
52 | return out
53 |
54 | class ResNet(nn.Module):
55 | def __init__(self, block, layers, include_top=True,num_classes=1000):
56 | #-----------------------------------#
57 | # 假设输入进来的图片是600,600,3
58 | #-----------------------------------#
59 | self.include_top = include_top
60 | self.inplanes = 64
61 | super(ResNet, self).__init__()
62 |
63 | # input(600,600,3) -> conv2d stride(300,300,64)
64 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)#输入3通道,卷积核大小kernel_size=7*7,
65 | #步长stride=2,输出通道数=64,bias偏移量
66 | self.bn1 = nn.BatchNorm2d(64) #标准化(归一化)
67 | self.relu = nn.ReLU(inplace=True) #激活函数
68 |
69 | # 300,300,64 -> 150,150,64 最大池化
70 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True)
71 |
72 | # 150,150,64 -> 150,150,256
73 | self.layer1 = self._make_layer(block, 64, layers[0])
74 | # 150,150,256 -> 75,75,512
75 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
76 | # 75,75,512 -> 38,38,1024 到这里可以获得一个38,38,1024的共享特征层
77 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
78 | # self.layer4被用在classifier模型中
79 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
80 |
81 | if self.include_top:
82 | self.avgpool = nn.AvgPool2d(7) # output size = (1, 1)
83 | self.fc = nn.Linear(512 * block.expansion, num_classes)
84 |
85 | #resnet模型每层进行参数学习,如:layer1中每层进行模型训练
86 | for m in self.modules():
87 | if isinstance(m, nn.Conv2d):
88 | new_var = 1
89 | n = m.kernel_size[0] * m.kernel_size[new_var] * m.out_channels #通道数的改变(如:256->64)
90 | m.weight.data.normal_(0, math.sqrt(2. / n))
91 | elif isinstance(m, nn.BatchNorm2d):
92 | m.weight.data.fill_(1) #更改resnet50网络中每层中权重数据
93 | m.bias.data.zero_()
94 |
95 | def _make_layer(self, block, planes, blocks, stride=1):
96 | downsample = None
97 | #-------------------------------------------------------------------#
98 | # 当模型需要进行高和宽的压缩的时候,就需要用到残差边的downsample(下采样)
99 | #-------------------------------------------------------------------#
100 | if stride != 1 or self.inplanes != planes * block.expansion:
101 | downsample = nn.Sequential(
102 | nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride, bias=False),
103 | nn.BatchNorm2d(planes * block.expansion),
104 | )
105 | layers = []
106 | layers.append(block(self.inplanes, planes, stride, downsample))
107 | self.inplanes = planes * block.expansion
108 | # resnet50网络层数堆积,layer=[3, 4, 6, 3]
109 | for i in range(1, blocks):
110 | layers.append(block(self.inplanes, planes))
111 | return nn.Sequential(*layers)
112 |
113 | def forward(self, x):
114 | x = self.conv1(x)
115 | x = self.bn1(x)
116 | x = self.relu(x)
117 | x = self.maxpool(x)
118 |
119 | x = self.layer1(x)
120 | x = self.layer2(x)
121 | x = self.layer3(x)
122 | x = self.layer4(x)
123 |
124 | if self.include_top:
125 | x = self.avgpool(x)
126 | # x = torch.flatten(x, 1)
127 | x = x.view(x.size(0), -1) # 传入神经网络之前将tensor变形,
128 | x = self.fc(x) # 输入全连接层,神经网络输入准备
129 |
130 | return x
131 |
132 | def resnet50(pretrained = False):
133 | model = ResNet(Bottleneck, [3, 4, 6, 3]) #对应resnet50的网络结构shape,第五次压缩是在roi中使用,有3个bottleneck。
134 | if pretrained:
135 | state_dict = load_state_dict_from_url("https://download.pytorch.org/models/resnet50-19c8e357.pth", model_dir="./model_data")
136 | model.load_state_dict(state_dict)
137 | #----------------------------------------------------------------------------#
138 | # 获取特征提取部分,从conv1到model.layer3,最终获得一个38,38,1024的特征层
139 | #----------------------------------------------------------------------------#
140 | features = list([model.conv1, model.bn1, model.relu, model.maxpool, model.layer1, model.layer2, model.layer3])
141 | print('features:', features)
142 | #----------------------------------------------------------------------------#
143 | # 获取分类部分,从model.layer4到model.avgpool
144 | #----------------------------------------------------------------------------#
145 | classifier = list([model.layer4, model.avgpool])
146 | print('classifier:', classifier)
147 |
148 | features = nn.Sequential(*features)
149 | print('features:', features)
150 | classifier = nn.Sequential(*classifier)
151 | print('classifier:', classifier)
152 | return features, classifier
153 |
154 |
155 | # net = ResNet(Bottleneck, [3, 4, 6, 3])
156 | # print(net)
157 |
158 |
--------------------------------------------------------------------------------
/nets/resnet50_ECA_FPN.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch.nn as nn
3 | from torch.hub import load_state_dict_from_url
4 |
5 | class eca_layer(nn.Module):
6 | """Constructs a ECA module.
7 |
8 | Args:
9 | channel: Number of channels of the input feature map
10 | k_size: Adaptive selection of kernel size
11 | """
12 | def __init__(self, channel, k_size=3):
13 | super(eca_layer, self).__init__()
14 | self.avg_pool = nn.AdaptiveAvgPool2d(1)
15 | self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False)
16 | self.sigmoid = nn.Sigmoid()
17 |
18 | def forward(self, x):
19 | # feature descriptor on the global spatial information
20 | y = self.avg_pool(x)
21 |
22 | # Two different branches of ECA module
23 | y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
24 |
25 | # Multi-scale information fusion
26 | y = self.sigmoid(y)
27 |
28 | return x * y.expand_as(x)
29 |
30 |
31 | class Bottleneck(nn.Module):
32 | """
33 | 注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。
34 | 但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2,
35 | 这么做的好处是能够在top1上提升大概0.5%的准确率。
36 | 可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
37 | """
38 | expansion = 4 #通道倍增数
39 |
40 | def __init__(self, inplanes, planes, stride=1, downsample=None,k_size=3):
41 | super(Bottleneck, self).__init__()
42 |
43 | #1*1的卷积压缩通道数
44 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
45 | self.bn1 = nn.BatchNorm2d(planes)
46 | #3*3卷积特征提取
47 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
48 | self.bn2 = nn.BatchNorm2d(planes)
49 | #1*1复原通道数
50 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
51 | self.bn3 = nn.BatchNorm2d(planes * 4)
52 |
53 | # 激活+下采样
54 | self.relu = nn.ReLU(inplace=True)
55 | # 加入ECA模型
56 | self.eca = eca_layer(planes * 4, k_size)
57 |
58 | self.downsample = downsample
59 | self.stride = stride
60 |
61 | def forward(self, x):
62 | residual = x
63 |
64 | out = self.conv1(x)
65 | out = self.bn1(out)
66 | out = self.relu(out)
67 |
68 | out = self.conv2(out)
69 | out = self.bn2(out)
70 | out = self.relu(out)
71 |
72 | out = self.conv3(out)
73 | out = self.bn3(out)
74 | out = self.eca(out)
75 |
76 | if self.downsample is not None:
77 | residual = self.downsample(x) #判断是否有残差边,有残差边即为:输入维度和输出维度发生改变,对应conv block
78 | #无残差边:输入维度=输出维度,对应identity block
79 | out += residual
80 | out = self.relu(out)
81 |
82 | return out
83 |
84 | class ResNet50_ECA_FPN(nn.Module):
85 | def __init__(self, block, layers, num_classes=100,k_size=[3, 3, 3, 3]):
86 | #-----------------------------------#
87 | # 假设输入进来的图片是600,600,3
88 | #-----------------------------------#
89 | super(ResNet50_ECA_FPN, self).__init__()
90 | self.inplanes = 64
91 |
92 | #处理输入的C1模块(C1代表了RestNet的前几个卷积与池化层)
93 | # input(600,600,3) -> conv2d stride(300,300,64)
94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)#输入3通道,卷积核大小kernel_size=7*7,
95 | #步长stride=2,输出通道数=64,bias偏移量
96 | self.bn1 = nn.BatchNorm2d(64) #标准化(归一化)
97 | self.relu = nn.ReLU(inplace=True) #激活函数
98 |
99 | # 300,300,64 -> 150,150,64 最大池化
100 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True)
101 |
102 | ''' Bottom-up layers ,搭建自下而上的C2,C3,C4,C5'''
103 | # 150,150,64 -> 150,150,256
104 | self.layer1 = self._make_layer(block, 64, layers[0],int(k_size[0]))
105 | # 150,150,256 -> 75,75,512
106 | self.layer2 = self._make_layer(block, 128, layers[1],int(k_size[1]), stride=2)
107 | # 75,75,512 -> 38,38,1024 到这里可以获得一个38,38,1024的共享特征层
108 | self.layer3 = self._make_layer(block, 256, layers[2],int(k_size[2]), stride=2)
109 | # 38,38,1024 -> 19,19,2048
110 | self.layer4 = self._make_layer(block, 512, layers[3],int(k_size[3]), stride=2)
111 |
112 | # 对C5减少通道数,得到P5
113 | self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0) # Reduce channels
114 |
115 | # Smooth layers,3x3卷积融合特征
116 | self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
117 | self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
118 | self.smooth3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
119 | self.smooth4 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
120 |
121 | # Lateral layers,横向连接,保证通道数相同
122 | self.latlayer3 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)
123 | self.latlayer2 = nn.Conv2d( 512, 256, kernel_size=1, stride=1, padding=0)
124 | self.latlayer1 = nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0)
125 |
126 | # 19,19,p5 ->10,10, p6 最大池化
127 | self.maxpool_p6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0, ceil_mode=True)
128 |
129 | # 平均池化层和全连接层
130 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1)
131 | self.fc = nn.Linear(256, 256)
132 |
133 | #resnet模型每层进行参数学习,如:layer1中每层进行模型训练
134 | for m in self.modules():
135 | if isinstance(m, nn.Conv2d):
136 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
137 | m.weight.data.normal_(0, math.sqrt(2. / n))
138 | elif isinstance(m, nn.BatchNorm2d):
139 | m.weight.data.fill_(1)
140 | m.bias.data.zero_()
141 |
142 | def _make_layer(self, block, planes, blocks, k_size, stride=1):
143 | downsample = None
144 | #-------------------------------------------------------------------#
145 | # 当模型需要进行高和宽的压缩的时候,就需要用到残差边的downsample(下采样)
146 | # 将输入的downsample(x)自动按照Sequential()里面的布局,顺序执行,
147 | # 目的:优化类似于这种结构:x = self.bn1(x),x = self.relu(x),降低运行内存。
148 | #-------------------------------------------------------------------#
149 | if stride != 1 or self.inplanes != planes * block.expansion:
150 | downsample = nn.Sequential(
151 | nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride, bias=False),
152 | nn.BatchNorm2d(planes * block.expansion),
153 | )
154 | layers = []
155 | layers.append(block(self.inplanes, planes, stride, downsample,k_size))
156 | self.inplanes = planes * block.expansion
157 | # resnet50网络层数堆积,layer=[3, 4, 6, 3]
158 | for i in range(1, blocks):
159 | layers.append(block(self.inplanes, planes,k_size=k_size))
160 | return nn.Sequential(*layers)
161 |
162 | # 通过上采样后,进行特征融合
163 | def _upsample_add(self, x, y):
164 | _,_,H,W = y.size()
165 | return nn.functional.upsample(x, size=(H,W), mode='bilinear') + y
166 |
167 | def forward(self, x):
168 | # Bottom-up
169 | x = self.conv1(x)
170 | x = self.bn1(x)
171 | x = self.relu(x)
172 | c1 = self.maxpool(x)
173 |
174 | # 自己构建的fpn网络,c1~c4层搭建
175 | c2 = self.layer1(c1)
176 | c3 = self.layer2(c2)
177 | c4 = self.layer3(c3)
178 | c5 = self.layer4(c4)
179 |
180 | # Top-down 降通道数
181 | p5 = self.toplayer(c5)
182 | # upsample
183 | p4 = self._upsample_add(p5, self.latlayer3(c4))
184 | p3 = self._upsample_add(p4, self.latlayer2(c3))
185 | p2 = self._upsample_add(p3, self.latlayer1(c2))
186 |
187 | # Smooth,特征提取,卷积的融合,平滑处理
188 | p5 = self.smooth4(p5)
189 | # 19,19,256->10,10,256 经过maxpool得到p6,用于rpn网络中
190 | p6 = self.maxpool_p6(p5)
191 | p4 = self.smooth3(p4)
192 | p3 = self.smooth2(p3)
193 | p2 = self.smooth1(p2)
194 |
195 | x = [p2, p3, p4,p5, p6]
196 | # 对fpn的特征层进行全连接层
197 | # for key,value in x.items() :
198 | # value = self.avgpool(value)
199 | # # view()函数的功能根reshape类似,用来转换size大小。x = x.view(batchsize, -1)中batchsize指转换后有几行,而-1指在不告诉函数有多少列的情况下,根据原tensor数据和batchsize自动分配列数。
200 | # value = value.view(value.size(0), -1)
201 | # # value = torch.flatten(value, 1) #flatten(x,1)是按照x的第1个维度拼接(按照列来拼接,横向拼接);flatten(x,0)是按照x的第0个维度拼接(按照行来拼接,纵向拼接)
202 | # value = self.fc(value)
203 | # # value = value.view(-1)
204 | # x.update(key,value)
205 |
206 | return x
207 |
208 | # test
209 | # FPN = ResNet50_ECA_FPN(Bottleneck, [3, 4, 6, 3])
210 | # print('FPN:',FPN)
211 |
212 |
213 | def resnet50_ECA_FPN(pretrained=False):
214 | # 对应resnet50的网络结构shape,第五次压缩是在roi中使用,有3个bottleneck。
215 | model = ResNet50_ECA_FPN(Bottleneck, [3, 4, 6, 3])
216 | # print('ResNet50_FPN:',model)
217 | #----------------------------------------------------------------------------#
218 | # 获取特征提取部分,从conv1到model.smooth1(p4层),获得多个p2, p3, p4, p5,p6不同尺度的特征层
219 | #----------------------------------------------------------------------------#
220 | # features = list([model.conv1, model.bn1, model.relu,model.maxpool, model.layer1, model.layer2, model.layer3,model.layer4,
221 | # model.toplayer,model.smooth4, model.smooth3, model.smooth2, model.smooth1])
222 |
223 | # features = list([model.conv1, model.bn1, model.relu, model.maxpool, model.layer1,
224 | # model.layer2, model.layer3, model.layer4, ])
225 | #----------------------------------------------------------------------------#
226 | # 获取分类部分,从model.smooth3(p2)到model.toplayer(p5)特征层
227 | #----------------------------------------------------------------------------#
228 | classifier = list([model.smooth1, model.smooth2, model.smooth3, model.smooth4,
229 | model.avgpool])
230 |
231 | # 特征提取(feature map)
232 | features = model
233 | # features = nn.Sequential(*features) # 函数参数(位置参数,*可变参数(以tuple/list形式传递),**关键字参数(以字典形式传递),
234 | # 默认参数(需要放在参数中最右端,避免传参是歧义))
235 | print('features:', features)
236 | classifier = nn.Sequential(*classifier) #在进行完roipool层后,进行回归和分类预测
237 | print('classifier:', classifier)
238 | return features, classifier
239 |
240 |
--------------------------------------------------------------------------------
/nets/resnet50_FPN.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch.nn as nn
3 | from torch.hub import load_state_dict_from_url
4 |
5 | class eca_layer(nn.Module):
6 | """Constructs a ECA module.
7 |
8 | Args:
9 | channel: Number of channels of the input feature map
10 | k_size: Adaptive selection of kernel size
11 | """
12 | def __init__(self, channel, k_size=3):
13 | super(eca_layer, self).__init__()
14 | self.avg_pool = nn.AdaptiveAvgPool2d(1)
15 | self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False)
16 | self.sigmoid = nn.Sigmoid()
17 |
18 | def forward(self, x):
19 | # feature descriptor on the global spatial information
20 | y = self.avg_pool(x)
21 |
22 | # Two different branches of ECA module
23 | y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
24 |
25 | # Multi-scale information fusion
26 | y = self.sigmoid(y)
27 |
28 | return x * y.expand_as(x)
29 |
30 |
31 | class Bottleneck(nn.Module):
32 | """
33 | 注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。
34 | 但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2,
35 | 这么做的好处是能够在top1上提升大概0.5%的准确率。
36 | 可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
37 | """
38 | expansion = 4 #通道倍增数
39 |
40 | def __init__(self, inplanes, planes, stride=1, downsample=None,k_size=3):
41 | super(Bottleneck, self).__init__()
42 |
43 | #1*1的卷积压缩通道数
44 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
45 | self.bn1 = nn.BatchNorm2d(planes)
46 | #3*3卷积特征提取
47 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
48 | self.bn2 = nn.BatchNorm2d(planes)
49 | #1*1复原通道数
50 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
51 | self.bn3 = nn.BatchNorm2d(planes * 4)
52 |
53 | # 激活+下采样
54 | self.relu = nn.ReLU(inplace=True)
55 | # 加入ECA模型
56 | self.eca = eca_layer(planes * 4, k_size)
57 |
58 | self.downsample = downsample
59 | self.stride = stride
60 |
61 | def forward(self, x):
62 | residual = x
63 |
64 | out = self.conv1(x)
65 | out = self.bn1(out)
66 | out = self.relu(out)
67 |
68 | out = self.conv2(out)
69 | out = self.bn2(out)
70 | out = self.relu(out)
71 |
72 | out = self.conv3(out)
73 | out = self.bn3(out)
74 | out = self.eca(out)
75 |
76 | if self.downsample is not None:
77 | residual = self.downsample(x) #判断是否有残差边,有残差边即为:输入维度和输出维度发生改变,对应conv block
78 | #无残差边:输入维度=输出维度,对应identity block
79 | out += residual
80 | out = self.relu(out)
81 |
82 | return out
83 |
84 | class ResNet50_FPN(nn.Module):
85 | def __init__(self, block, layers, num_classes=100,k_size=[1, 1, 1, 1]):
86 | #-----------------------------------#
87 | # 假设输入进来的图片是600,600,3
88 | #-----------------------------------#
89 | super(ResNet50_FPN, self).__init__()
90 | self.inplanes = 64
91 |
92 | #处理输入的C1模块(C1代表了RestNet的前几个卷积与池化层)
93 | # input(600,600,3) -> conv2d stride(300,300,64)
94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)#输入3通道,卷积核大小kernel_size=7*7,
95 | #步长stride=2,输出通道数=64,bias偏移量
96 | self.bn1 = nn.BatchNorm2d(64) #标准化(归一化)
97 | self.relu = nn.ReLU(inplace=True) #激活函数
98 |
99 | # 300,300,64 -> 150,150,64 最大池化
100 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True)
101 |
102 | ''' Bottom-up layers ,搭建自下而上的C2,C3,C4,C5'''
103 | # 150,150,64 -> 150,150,256
104 | self.layer1 = self._make_layer(block, 64, layers[0],int(k_size[0]))
105 | # 150,150,256 -> 75,75,512
106 | self.layer2 = self._make_layer(block, 128, layers[1],int(k_size[1]), stride=2)
107 | # 75,75,512 -> 38,38,1024 到这里可以获得一个38,38,1024的共享特征层
108 | self.layer3 = self._make_layer(block, 256, layers[2],int(k_size[2]), stride=2)
109 | # 38,38,1024 -> 19,19,2048
110 | self.layer4 = self._make_layer(block, 512, layers[3],int(k_size[3]), stride=2)
111 |
112 | # 对C5减少通道数,得到P5
113 | self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0) # Reduce channels
114 |
115 | # Smooth layers,3x3卷积融合特征
116 | self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
117 | self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
118 | self.smooth3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
119 | self.smooth4 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
120 |
121 | # Lateral layers,横向连接,保证通道数相同
122 | self.latlayer3 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)
123 | self.latlayer2 = nn.Conv2d( 512, 256, kernel_size=1, stride=1, padding=0)
124 | self.latlayer1 = nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0)
125 |
126 | # 19,19,p5 ->10,10, p6 最大池化
127 | self.maxpool_p6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0, ceil_mode=True)
128 |
129 | # 最池化层和全连接层
130 | self.maxpool1 = nn.AdaptiveMaxPool2d(7) # output size = (1, 1)
131 | # self.fc = nn.Linear(256, 256)
132 |
133 | #resnet模型每层进行参数学习,如:layer1中每层进行模型训练
134 | for m in self.modules():
135 | if isinstance(m, nn.Conv2d):
136 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
137 | m.weight.data.normal_(0, math.sqrt(2. / n))
138 | elif isinstance(m, nn.BatchNorm2d):
139 | m.weight.data.fill_(1)
140 | m.bias.data.zero_()
141 |
142 | def _make_layer(self, block, planes, blocks,k_size, stride=1):
143 | downsample = None
144 | #-------------------------------------------------------------------#
145 | # 当模型需要进行高和宽的压缩的时候,就需要用到残差边的downsample(下采样)
146 | # 将输入的downsample(x)自动按照Sequential()里面的布局,顺序执行,
147 | # 目的:优化类似于这种结构:x = self.bn1(x),x = self.relu(x),降低运行内存。
148 | #-------------------------------------------------------------------#
149 | if stride != 1 or self.inplanes != planes * block.expansion:
150 | downsample = nn.Sequential(
151 | nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride, bias=False),
152 | nn.BatchNorm2d(planes * block.expansion),
153 | )
154 | layers = []
155 | layers.append(block(self.inplanes, planes, stride,downsample,k_size))
156 | self.inplanes = planes * block.expansion
157 | # resnet50网络层数堆积,layer=[3, 4, 6, 3]
158 | for i in range(1, blocks):
159 | layers.append(block(self.inplanes, planes,k_size))
160 | return nn.Sequential(*layers)
161 |
162 | # 通过上采样后,进行特征融合
163 | def _upsample_add(self, x, y):
164 | _,_,H,W = y.size()
165 | return nn.functional.upsample(x, size=(H,W), mode='bilinear') + y
166 |
167 | def forward(self, x):
168 | # Bottom-up
169 | x = self.conv1(x)
170 | x = self.bn1(x)
171 | x = self.relu(x)
172 | c1 = self.maxpool(x)
173 |
174 | # 自己构建的fpn网络,c1~c4层搭建
175 | c2 = self.layer1(c1)
176 | c3 = self.layer2(c2)
177 | c4 = self.layer3(c3)
178 | c5 = self.layer4(c4)
179 |
180 | # Top-down 降通道数
181 | p5 = self.toplayer(c5)
182 | # upsample
183 | p4 = self._upsample_add(p5, self.latlayer3(c4))
184 | p3 = self._upsample_add(p4, self.latlayer2(c3))
185 | p2 = self._upsample_add(p3, self.latlayer1(c2))
186 |
187 | # Smooth,特征提取,卷积的融合,平滑处理
188 | p5 = self.smooth4(p5)
189 | # 19,19,256->10,10,256 经过maxpool得到p6,用于rpn网络中
190 | p6 = self.maxpool_p6(p5)
191 | p4 = self.smooth3(p4)
192 | p3 = self.smooth2(p3)
193 | p2 = self.smooth1(p2)
194 |
195 | x = [p2, p3, p4,p5, p6]
196 | # 对fpn的特征层进行全连接层
197 | # for key,value in x.items() :
198 | # value = self.avgpool(value)
199 | # # view()函数的功能根reshape类似,用来转换size大小。x = x.view(batchsize, -1)中batchsize指转换后有几行,而-1指在不告诉函数有多少列的情况下,根据原tensor数据和batchsize自动分配列数。
200 | # value = value.view(value.size(0), -1)
201 | # # value = torch.flatten(value, 1) #flatten(x,1)是按照x的第1个维度拼接(按照列来拼接,横向拼接);flatten(x,0)是按照x的第0个维度拼接(按照行来拼接,纵向拼接)
202 | # value = self.fc(value)
203 | # # value = value.view(-1)
204 | # x.update(key,value)
205 |
206 | return x
207 |
208 | # test
209 | # FPN = ResNet50_FPN(Bottleneck, [3, 4, 6, 3])
210 | # print('FPN:',FPN)
211 |
212 |
213 | def resnet50_FPN(pretrained=False):
214 | # 对应resnet50的网络结构shape,第五次压缩是在roi中使用,有3个bottleneck。
215 | model = ResNet50_FPN(Bottleneck, [3, 4, 6, 3])
216 | # print('ResNet50_FPN:',model)
217 | #----------------------------------------------------------------------------#
218 | # 获取特征提取部分,从conv1到model.smooth1(p4层),获得多个p2, p3, p4, p5,p6不同尺度的特征层
219 | #----------------------------------------------------------------------------#
220 | # features = list([model.conv1, model.bn1, model.relu,model.maxpool, model.layer1, model.layer2, model.layer3,model.layer4,
221 | # model.toplayer,model.smooth4, model.smooth3, model.smooth2, model.smooth1])
222 |
223 | # features = list([model.conv1, model.bn1, model.relu, model.maxpool, model.layer1,
224 | # model.layer2, model.layer3, model.layer4, ])
225 | #----------------------------------------------------------------------------#
226 | # 获取分类部分,从model.smooth3(p2)到model.toplayer(p5)特征层
227 | #----------------------------------------------------------------------------#
228 | classifier = list([model.smooth1, model.smooth2, model.smooth3, model.smooth4,
229 | model.maxpool1])
230 |
231 | # 特征提取(feature map)
232 | features = model
233 | # features = nn.Sequential(*features) # 函数参数(位置参数,*可变参数(以tuple/list形式传递),**关键字参数(以字典形式传递),
234 | # 默认参数(需要放在参数中最右端,避免传参是歧义))
235 | print('features:', features)
236 | classifier = nn.Sequential(*classifier) #在进行完roipool层后,进行回归和分类预测
237 | print('classifier:', classifier)
238 | return features, classifier
239 |
240 |
--------------------------------------------------------------------------------
/nets/rpn.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | import torch
4 | from torch import nn
5 | from torch.nn import functional as F
6 | from torchvision.ops import nms
7 | from utils.anchors import _enumerate_shifted_anchor, generate_anchor_base
8 | from utils.utils_bbox import loc2bbox
9 | from collections import OrderedDict
10 |
11 | class ProposalCreator():
12 | def __init__(
13 | self,
14 | mode,
15 | nms_iou = 0.7,
16 | n_train_pre_nms = 12000,
17 | n_train_post_nms = 1000,
18 | n_test_pre_nms = 3000,
19 | n_test_post_nms = 1000,
20 | min_size = 16
21 |
22 | ):
23 | #-----------------------------------#
24 | # 设置预测还是训练
25 | #-----------------------------------#
26 | self.mode = mode
27 | #-----------------------------------#
28 | # 建议框非极大抑制的iou大小
29 | #-----------------------------------#
30 | self.nms_iou = nms_iou
31 | #-----------------------------------#
32 | # 训练用到的建议框数量
33 | #-----------------------------------#
34 | self.n_train_pre_nms = n_train_pre_nms
35 | self.n_train_post_nms = n_train_post_nms
36 | #-----------------------------------#
37 | # 预测用到的建议框数量
38 | #-----------------------------------#
39 | self.n_test_pre_nms = n_test_pre_nms
40 | self.n_test_post_nms = n_test_post_nms
41 | self.min_size = min_size
42 |
43 | def __call__(self, loc, score, anchor, img_size, scale=1.):
44 | if self.mode == "training":
45 | n_pre_nms = self.n_train_pre_nms
46 | n_post_nms = self.n_train_post_nms
47 | else:
48 | n_pre_nms = self.n_test_pre_nms
49 | n_post_nms = self.n_test_post_nms
50 |
51 | #-----------------------------------#
52 | # 将先验框转换成tensor
53 | #-----------------------------------#
54 | anchor = torch.from_numpy(anchor).type_as(loc)
55 | #-----------------------------------#
56 | # 将RPN网络预测结果转化成建议框
57 | #-----------------------------------#
58 | roi = loc2bbox(anchor, loc)
59 | #-----------------------------------#
60 | # 防止建议框超出图像边缘
61 | #-----------------------------------#
62 | roi[:, [0, 2]] = torch.clamp(roi[:, [0, 2]], min = 0, max = img_size[1])
63 | roi[:, [1, 3]] = torch.clamp(roi[:, [1, 3]], min = 0, max = img_size[0])
64 |
65 | #-----------------------------------#
66 | # 建议框的宽高的最小值不可以小于16
67 | #-----------------------------------#
68 | min_size = self.min_size * scale
69 | keep = torch.where(((roi[:, 2] - roi[:, 0]) >= min_size) & ((roi[:, 3] - roi[:, 1]) >= min_size))[0]
70 | #-----------------------------------#
71 | # 将对应的建议框保留下来
72 | #-----------------------------------#
73 | roi = roi[keep, :]
74 | score = score[keep]
75 |
76 | #-----------------------------------#
77 | # 根据得分进行排序,取出建议框
78 | #-----------------------------------#
79 | order = torch.argsort(score, descending=True)
80 | if n_pre_nms > 0:
81 | order = order[:n_pre_nms]
82 | roi = roi[order, :]
83 | score = score[order]
84 |
85 | #-----------------------------------#
86 | # 对建议框进行非极大抑制
87 | # 使用官方的非极大抑制会快非常多
88 | #-----------------------------------#
89 | keep = nms(roi, score, self.nms_iou)
90 | if len(keep) < n_post_nms:
91 | index_extra = np.random.choice(range(len(keep)), size=(n_post_nms - len(keep)), replace=True)
92 | keep = torch.cat([keep, keep[index_extra]])
93 | keep = keep[:n_post_nms]
94 | roi = roi[keep]
95 | return roi
96 |
97 |
98 | class resnet50_fpn_RPNhead(nn.Module):
99 | def __init__(
100 | self,
101 | in_channels=512,
102 | mid_channels=512,
103 | ratios=[0.5, 1, 2],
104 | anchor_scales=[4, 16, 32],
105 | feat_stride=16,
106 | mode="training",
107 | ):
108 | super(resnet50_fpn_RPNhead, self).__init__()
109 | #-----------------------------------------#
110 | # 生成基础先验框,shape为[9, 4]
111 | #-----------------------------------------#
112 | self.anchor_base = generate_anchor_base(
113 | anchor_scales=anchor_scales, ratios=ratios)
114 | #每个网格上默认的先验框数量
115 | n_anchor = self.anchor_base.shape[0]
116 |
117 | #-----------------------------------------#
118 | # 先进行一个3x3的卷积,可理解为特征整合
119 | #-----------------------------------------#
120 | self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1)
121 | #-----------------------------------------#
122 | # 分类预测先验框内部是否包含物体,score为带有18通道的conv1*1卷积,
123 | #-----------------------------------------#
124 | self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0)
125 | #-----------------------------------------#
126 | # 回归预测对先验框进行调整,loc带有36通道的conv1*1卷积
127 | #-----------------------------------------#
128 | self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0)
129 |
130 | #-----------------------------------------#
131 | # 特征点间距步长
132 | #-----------------------------------------#
133 | self.feat_stride = feat_stride
134 | #-----------------------------------------#
135 | # 用于对建议框解码并进行非极大抑制
136 | #-----------------------------------------#
137 | self.proposal_layer = ProposalCreator(mode)
138 | #--------------------------------------#
139 | # 对FPN的网络部分进行权值初始化
140 | #--------------------------------------#
141 | normal_init(self.conv1, 0, 0.01)
142 | normal_init(self.score, 0, 0.01)
143 | normal_init(self.loc, 0, 0.01)
144 |
145 | #输入的x为feature map共享特征p2~p6层,
146 | def forward(self, x, img_size, scale=1.):
147 | rois = []
148 | roi_indices =[]
149 | rpn_locs =[]
150 | rpn_scores=[]
151 | anchor = []
152 | #对p2~p5层分别进行建议框生成
153 | for p in x:
154 | n, _, h, w = p.shape
155 | #-----------------------------------------#
156 | # 先进行一个3x3的卷积,可理解为特征整合
157 | #-----------------------------------------#
158 | p = F.relu(self.conv1(p)) # 激活函数
159 | #-----------------------------------------#
160 | # 回归预测对先验框进行调整
161 | # view(n, -1, 4):n个(m/(4*n))行4列的新tensor形状。
162 | # 交换后n(第0维度)=batch_size(这里为2,表示背景和物体形状),
163 | # -1(1维度):表示每个先验框(自行计算),
164 | # 4(第2维度)=调整先验框位置的四个参数。
165 | #-----------------------------------------#
166 | rpn_locs_k = self.loc(p)
167 | rpn_locs_k = rpn_locs_k.permute(0, 2, 3, 1).contiguous().view(n, -1, 4)
168 | #-----------------------------------------#
169 | # torch.transpose():交换指定的两个维度的内容
170 | # torch.permute():一次性交换多个维度。
171 | # contiguous():相当于是在permute(0, 2, 3, 1)tensor中复制一份,在用于view()中的tensor进行结构改变,而不影响前面的数据内容和结构。
172 | # torch.view():首先,view()函数会将Tensor所有维度拉平成一维(m),然后再根据传入的的维度信息重构出一个Tensor。
173 | #
174 | # Tensor与ndarray数组一样,
175 | #
176 | # 分类预测先验框内部是否包含物体
177 | #
178 | #-----------------------------------------#
179 | rpn_scores_k = self.score(p)
180 | rpn_scores_k = rpn_scores_k.permute(0, 2, 3, 1).contiguous().view(n, -1, 2)
181 |
182 | #--------------------------------------------------------------------------------------#
183 | # 进行softmax概率计算,每个先验框只有两个判别结果
184 | # 内部包含物体或者内部不包含物体,rpn_softmax_scores[:, :, 1]的内容为包含物体的概率
185 | #--------------------------------------------------------------------------------------#
186 | rpn_softmax_scores = F.softmax(rpn_scores_k, dim=-1)
187 | rpn_fg_scores = rpn_softmax_scores[:, :, 1].contiguous()
188 | rpn_fg_scores = rpn_fg_scores.view(n, -1)
189 |
190 | #------------------------------------------------------------------------------------------------#
191 | # 生成先验框,此时获得的anchor是布满网格点的,当输入图片为600,600,3的时候,shape为(12996, 4)
192 | #------------------------------------------------------------------------------------------------#
193 | anchor_k = _enumerate_shifted_anchor(np.array(self.anchor_base), self.feat_stride, h, w)
194 | rois_k = []
195 | roi_indices_k = []
196 | #分离开背景和前景
197 | for i in range(n):
198 | roi = self.proposal_layer(rpn_locs_k[i], rpn_fg_scores[i], anchor_k, img_size, scale=scale)
199 | batch_index = i * torch.ones((len(roi),))
200 | rois_k.append(roi.unsqueeze(0))
201 | roi_indices_k.append(batch_index.unsqueeze(0))
202 |
203 | #------------------------------------------------------------------#
204 | # 获得RPN网络的预测结果,进行格式调整,把五个特征层的结果进行堆叠
205 | #------------------------------------------------------------------#
206 | rois.append(torch.cat(rois_k, dim=0).type_as(p))
207 | roi_indices.append(torch.cat(roi_indices_k, dim=0).type_as(p))
208 | anchor.append(torch.from_numpy(anchor_k).unsqueeze(0).float().to(p.device))
209 | rpn_locs.append(rpn_locs_k)
210 | rpn_scores.append(rpn_scores_k)
211 |
212 | return rpn_locs, rpn_scores, rois, roi_indices, anchor
213 |
214 |
215 | class RegionProposalNetwork(nn.Module):
216 | def __init__(
217 | self,
218 | in_channels = 512,
219 | mid_channels = 512,
220 | ratios = [0.5, 1, 2],
221 | anchor_scales = [4, 16, 32],
222 | feat_stride = 16,
223 | mode = "training",
224 | ):
225 | super(RegionProposalNetwork, self).__init__()
226 | #-----------------------------------------#
227 | # 生成基础先验框,shape为[9, 4]
228 | #-----------------------------------------#
229 | self.anchor_base = generate_anchor_base(anchor_scales = anchor_scales, ratios = ratios)
230 | #每个网格上默认的先验框数量
231 | n_anchor = self.anchor_base.shape[0]
232 |
233 | #-----------------------------------------#
234 | # 先进行一个3x3的卷积,可理解为特征整合
235 | #-----------------------------------------#
236 | self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1)
237 | #-----------------------------------------#
238 | # 分类预测先验框内部是否包含物体,score为带有18通道的conv1*1卷积,
239 | #-----------------------------------------#
240 | self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0)
241 | #-----------------------------------------#
242 | # 回归预测对先验框进行调整,loc带有36通道的conv1*1卷积
243 | #-----------------------------------------#
244 | self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0)
245 |
246 | #-----------------------------------------#
247 | # 特征点间距步长
248 | #-----------------------------------------#
249 | self.feat_stride = feat_stride
250 | #-----------------------------------------#
251 | # 用于对建议框解码并进行非极大抑制
252 | #-----------------------------------------#
253 | self.proposal_layer = ProposalCreator(mode)
254 | #--------------------------------------#
255 | # 对FPN的网络部分进行权值初始化
256 | #--------------------------------------#
257 | normal_init(self.conv1, 0, 0.01)
258 | normal_init(self.score, 0, 0.01)
259 | normal_init(self.loc, 0, 0.01)
260 |
261 | #输入的x为feature map共享特征层,
262 | def forward(self, x, img_size, scale=1.):
263 | n, _, h, w = x.shape
264 | #-----------------------------------------#
265 | # 先进行一个3x3的卷积,可理解为特征整合
266 | #-----------------------------------------#
267 | x = F.relu(self.conv1(x)) #激活函数
268 | #-----------------------------------------#
269 | # 回归预测对先验框进行调整
270 | # view(n, -1, 4):n个(m/(4*n))行4列的新tensor形状。
271 | # 交换后n(第0维度)=batch_size(这里为2,表示背景和物体形状),
272 | # -1(1维度):表示每个先验框(自行计算),
273 | # 4(第2维度)=调整先验框位置的四个参数。
274 | #-----------------------------------------#
275 | rpn_locs = self.loc(x)
276 | rpn_locs = rpn_locs.permute(0, 2, 3, 1).contiguous().view(n, -1, 4)
277 | #-----------------------------------------#
278 | # torch.transpose():交换指定的两个维度的内容
279 | # torch.permute():一次性交换多个维度。
280 | # contiguous():相当于是在permute(0, 2, 3, 1)tensor中复制一份,在用于view()中的tensor进行结构改变,而不影响前面的数据内容和结构。
281 | # torch.view():首先,view()函数会将Tensor所有维度拉平成一维(m),然后再根据传入的的维度信息重构出一个Tensor。
282 | #
283 | # Tensor与ndarray数组一样,
284 | #
285 | # 分类预测先验框内部是否包含物体
286 | #
287 | #-----------------------------------------#
288 | rpn_scores = self.score(x)
289 | rpn_scores = rpn_scores.permute(0, 2, 3, 1).contiguous().view(n, -1, 2)
290 |
291 | #--------------------------------------------------------------------------------------#
292 | # 进行softmax概率计算,每个先验框只有两个判别结果
293 | # 内部包含物体或者内部不包含物体,rpn_softmax_scores[:, :, 1]的内容为包含物体的概率
294 | #--------------------------------------------------------------------------------------#
295 | rpn_softmax_scores = F.softmax(rpn_scores, dim=-1)
296 | rpn_fg_scores = rpn_softmax_scores[:, :, 1].contiguous()
297 | rpn_fg_scores = rpn_fg_scores.view(n, -1)
298 |
299 | #------------------------------------------------------------------------------------------------#
300 | # 生成先验框,此时获得的anchor是布满网格点的,当输入图片为600,600,3的时候,shape为(12996, 4)
301 | #------------------------------------------------------------------------------------------------#
302 | anchor = _enumerate_shifted_anchor(np.array(self.anchor_base), self.feat_stride, h, w)
303 | rois = list()
304 | roi_indices = list()
305 | for i in range(n):
306 | roi = self.proposal_layer(rpn_locs[i], rpn_fg_scores[i], anchor, img_size, scale = scale)
307 | batch_index = i * torch.ones((len(roi),))
308 | rois.append(roi.unsqueeze(0))
309 | roi_indices.append(batch_index.unsqueeze(0))
310 |
311 | rois = torch.cat(rois, dim=0).type_as(x)
312 | roi_indices = torch.cat(roi_indices, dim=0).type_as(x)
313 | anchor = torch.from_numpy(anchor).unsqueeze(0).float().to(x.device)
314 |
315 | return rpn_locs, rpn_scores, rois, roi_indices, anchor
316 |
317 |
318 | def normal_init(m, mean, stddev, truncated=False):
319 | if truncated:
320 | m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation
321 | else:
322 | m.weight.data.normal_(mean, stddev)
323 | m.bias.data.zero_()
324 |
--------------------------------------------------------------------------------
/nets/vgg16.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.hub import load_state_dict_from_url
4 |
5 |
6 | #--------------------------------------#
7 | # VGG16的结构
8 | #--------------------------------------#
9 | class VGG(nn.Module):
10 | def __init__(self, features, num_classes=1000, init_weights=True):
11 | super(VGG, self).__init__()
12 | self.features = features
13 | #--------------------------------------#
14 | # 平均池化到7x7大小
15 | #--------------------------------------#
16 | self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
17 | #--------------------------------------#
18 | # 分类部分
19 | #--------------------------------------#
20 | self.classifier = nn.Sequential(
21 | nn.Linear(512 * 7 * 7, 4096),
22 | nn.ReLU(True),
23 | nn.Dropout(),
24 | nn.Linear(4096, 4096),
25 | nn.ReLU(True),
26 | nn.Dropout(),
27 | nn.Linear(4096, num_classes),
28 | )
29 | if init_weights:
30 | self._initialize_weights()
31 |
32 | def forward(self, x):
33 | #--------------------------------------#
34 | # 特征提取
35 | #--------------------------------------#
36 | x = self.features(x)
37 | #--------------------------------------#
38 | # 平均池化
39 | #--------------------------------------#
40 | x = self.avgpool(x)
41 | #--------------------------------------#
42 | # 平铺后
43 | #--------------------------------------#
44 | x = torch.flatten(x, 1)
45 | #--------------------------------------#
46 | # 分类部分
47 | #--------------------------------------#
48 | x = self.classifier(x)
49 | return x
50 |
51 | def _initialize_weights(self):
52 | for m in self.modules():
53 | if isinstance(m, nn.Conv2d):
54 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
55 | if m.bias is not None:
56 | nn.init.constant_(m.bias, 0)
57 | elif isinstance(m, nn.BatchNorm2d):
58 | nn.init.constant_(m.weight, 1)
59 | nn.init.constant_(m.bias, 0)
60 | elif isinstance(m, nn.Linear):
61 | nn.init.normal_(m.weight, 0, 0.01)
62 | nn.init.constant_(m.bias, 0)
63 |
64 | '''
65 | 假设输入图像为(600, 600, 3),随着cfg的循环,特征层变化如下:
66 | 600,600,3 -> 600,600,64 -> 600,600,64 -> 300,300,64 -> 300,300,128 -> 300,300,128 -> 150,150,128 -> 150,150,256 -> 150,150,256 -> 150,150,256
67 | -> 75,75,256 -> 75,75,512 -> 75,75,512 -> 75,75,512 -> 37,37,512 -> 37,37,512 -> 37,37,512 -> 37,37,512
68 | 到cfg结束,我们获得了一个37,37,512的特征层
69 | '''
70 |
71 | cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']
72 |
73 | #--------------------------------------#
74 | # 特征提取部分
75 | #--------------------------------------#
76 | def make_layers(cfg, batch_norm=False):
77 | layers = []
78 | in_channels = 3
79 | for v in cfg:
80 | if v == 'M':
81 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
82 | else:
83 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
84 | if batch_norm:
85 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
86 | else:
87 | layers += [conv2d, nn.ReLU(inplace=True)]
88 | in_channels = v
89 | return nn.Sequential(*layers)
90 |
91 | def decom_vgg16(pretrained = False):
92 | model = VGG(make_layers(cfg))
93 | if pretrained:
94 | state_dict = load_state_dict_from_url("https://download.pytorch.org/models/vgg16-397923af.pth", model_dir="./model_data")
95 | model.load_state_dict(state_dict)
96 | #----------------------------------------------------------------------------#
97 | # 获取特征提取部分,最终获得一个37,37,1024的特征层
98 | #----------------------------------------------------------------------------#
99 | features = list(model.features)[:30]
100 | #----------------------------------------------------------------------------#
101 | # 获取分类部分,需要除去Dropout部分
102 | #----------------------------------------------------------------------------#
103 | classifier = list(model.classifier)
104 | del classifier[6]
105 | del classifier[5]
106 | del classifier[2]
107 |
108 | features = nn.Sequential(*features)
109 | classifier = nn.Sequential(*classifier)
110 | return features, classifier
111 |
--------------------------------------------------------------------------------
/order_name.py:
--------------------------------------------------------------------------------
1 | #...........................#
2 | #对文件夹中的文件进行重命名
3 | #...........................#
4 | import os
5 | import xml
6 | from xml.dom import minidom
7 | import xml.etree.cElementTree as ET
8 |
9 | def myrename(file_path):
10 | file_list=os.listdir(file_path)
11 | for i,fi in enumerate(file_list):
12 | old_dir=os.path.join(file_path,fi)
13 | print('wenjianmingzi :',old_dir)
14 | # 删除名字中的空格
15 | new_name = fi.replace(" ", "_")
16 | print("新名字为:",new_name)
17 |
18 | # # 顺序命名
19 | # # new_name=str(i+1)+"."+str(fi.split(".")[-1])
20 | new_dir=os.path.join(file_path,new_name)
21 | try:
22 | os.rename(old_dir,new_dir)
23 | except Exception as e:
24 | print(e)
25 | print("Failed!")
26 | else:
27 | print("SUcess!")
28 |
29 |
30 | #...........................#
31 | #对xml文件内的filename和path名进行重命名
32 | #...........................#
33 |
34 | def xml_name(xmlpath):
35 | files = os.listdir(xmlpath) # 得到文件夹下所有文件名称
36 | count = 0
37 | for xmlFile in files: # 遍历文件夹
38 | if not os.path.isdir(xmlFile): # 判断是否是文件夹,不是文件夹才打开
39 | name1 = xmlFile.split('.')[0]
40 | dom = xml.dom.minidom.parse(xmlpath + '/' + xmlFile)
41 | root = dom.documentElement
42 | #filename重命名
43 | newfilename = root.getElementsByTagName('filename')
44 | t=newfilename[0].firstChild.data = name1 + '.jpg'
45 | print('t:',t )
46 | #path重命名
47 | newpath = root.getElementsByTagName('path')
48 | t1=newpath[0].firstChild.data =xmlpath +'\\'+ name1 +'.jpg'
49 | print('t1:',t1 )
50 |
51 | with open(os.path.join(xmlpath, xmlFile), 'w',) as fh:
52 | print('fh:',fh )
53 | dom.writexml(fh)
54 | print('写入name/pose OK!')
55 | count = count + 1
56 |
57 |
58 | # 删除xml文件中显示的版本号
59 | def delete_xmlversion(xmlpath,savedir):
60 |
61 | files = os.listdir(xmlpath)
62 | for ml in files:
63 | if '.xml' in ml:
64 | fo = open(savedir + '/' + '{}'.format(ml), 'w', encoding='utf-8')
65 | print('{}'.format(ml))
66 | fi = open(xmlpath + '/' + '{}'.format(ml), 'r')
67 | content = fi.readlines()
68 | for line in content:
69 | # line = line.replace('a', 'b') # 例:将a替换为b
70 | line = line.replace('', '')
71 | # line = line.replace('测试图片', '车辆图片')
72 | # line = line.replace('class1', 'class2')
73 | fo.write(line)
74 | fo.close()
75 | print('替换成功')
76 |
77 |
78 | #删除xml文件中部分不要的标签信息
79 | def Delete_part_information_xml(path_root,xy_classes):
80 | for anno_path in path_root:
81 | xml_list=os.listdir(anno_path)
82 | print("打开{}文件".format(xml_list))
83 | for annoxml in xml_list:
84 | path_xml=os.path.join(anno_path,annoxml)
85 | print('保存文件路径为{}'.format(path_xml))
86 | tree =ET.parse(path_xml)
87 | root=tree.getroot()
88 |
89 | for child in root.findall('object'):
90 | name = child.find('name').text
91 | if not name in xy_classes:
92 | root.remove(child)
93 | print(annoxml)
94 | tree.write(os.path.join(r'F:\Desktop\PCB_code\PCB_DataSet\Annotations—new', annoxml)) #处理结束后保存的路径
95 |
96 |
97 |
98 |
99 | if __name__=="__main__":
100 | file_path=r"F:\Desktop\PCB_code\date_set\new_data" #完整路径+文件名
101 | # xmlpath="F:\\桌面\\PCB_code\\date_set\\Image_label_source"
102 | # savedir = r'F:\桌面\PCB_code\date_set\3' #删除xml文件中显示的版本号后存放文件位置
103 | # xmlpath=r'F:\桌面\PCB_code\date_set\label'
104 | myrename(file_path) #图片重命名文件
105 |
106 | #对xml文件中的名字进行修改
107 | # myrename(xmlpath) #1、xml文件名重命名
108 | # xml_name(xmlpath) #2、xml文件内的filename和path重命名
109 | # delete_xmlversion(xmlpath,savedir) #删除经过xml重命名后文件内的版本号
110 |
111 | #删除xml文件中部分不要的标签信息
112 | path_root=r'F:\Desktop\PCB_code\PCB_DataSet\Annotations'
113 | xy_classes=['Speaker',"Bat","2USB","Rj45+2USB","Cap_cross","Cap_blue_black","Jumper04p",
114 | "Jumper10p", "HDD","Power08p","Power04p","Power24p"]
115 | Delete_part_information_xml(path_root,xy_classes)
116 |
117 |
--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
1 | #----------------------------------------------------#
2 | # 将单张图片预测、摄像头检测和FPS测试功能
3 | # 整合到了一个py文件中,通过指定mode进行模式的修改。
4 | #----------------------------------------------------#
5 | import time
6 |
7 | import cv2
8 | import numpy as np
9 | from PIL import Image
10 |
11 | from frcnn_predict import FRCNN
12 |
13 | if __name__ == "__main__":
14 | frcnn = FRCNN()
15 | #----------------------------------------------------------------------------------------------------------#
16 | # mode用于指定测试的模式:
17 | # 'predict' 表示单张图片预测,如果想对预测过程进行修改,如保存图片,截取对象等,可以先看下方详细的注释
18 | # 'video' 表示视频检测,可调用摄像头或者视频进行检测,详情查看下方注释。
19 | # 'fps' 表示测试fps,使用的图片是img里面的street.jpg,详情查看下方注释。
20 | # 'dir_predict' 表示遍历文件夹进行检测并保存。默认遍历img文件夹,保存img_out文件夹,详情查看下方注释。
21 | #----------------------------------------------------------------------------------------------------------#
22 | mode = "dir_predict"
23 | #-------------------------------------------------------------------------#
24 | # crop 指定了是否在单张图片预测后对目标进行截取
25 | # count 指定了是否进行目标的计数
26 | # crop、count仅在mode='predict'时有效
27 | #-------------------------------------------------------------------------#
28 | crop = False
29 | count = False
30 | #----------------------------------------------------------------------------------------------------------#
31 | # video_path 用于指定视频的路径,当video_path=0时表示检测摄像头
32 | # 想要检测视频,则设置如video_path = "xxx.mp4"即可,代表读取出根目录下的xxx.mp4文件。
33 | # video_save_path 表示视频保存的路径,当video_save_path=""时表示不保存
34 | # 想要保存视频,则设置如video_save_path = "yyy.mp4"即可,代表保存为根目录下的yyy.mp4文件。
35 | # video_fps 用于保存的视频的fps
36 | #
37 | # video_path、video_save_path和video_fps仅在mode='video'时有效
38 | # 保存视频时需要ctrl+c退出或者运行到最后一帧才会完成完整的保存步骤。
39 | #----------------------------------------------------------------------------------------------------------#
40 | video_path = 0
41 | video_save_path = ""
42 | video_fps = 25.0
43 | #----------------------------------------------------------------------------------------------------------#
44 | # test_interval 用于指定测量fps的时候,图片检测的次数。理论上test_interval越大,fps越准确。
45 | # fps_image_path 用于指定测试的fps图片
46 | #
47 | # test_interval和fps_image_path仅在mode='fps'有效
48 | #----------------------------------------------------------------------------------------------------------#
49 | test_interval = 100
50 | fps_image_path = "img/street.jpg"
51 | #-------------------------------------------------------------------------#
52 | # dir_origin_path 指定了用于检测的图片的文件夹路径
53 | # dir_save_path 指定了检测完图片的保存路径
54 | #
55 | # dir_origin_path和dir_save_path仅在mode='dir_predict'时有效
56 | #-------------------------------------------------------------------------#
57 | dir_origin_path = "faster-rcnn-pytorch-master/prediction_img"
58 | dir_save_path = "faster-rcnn-pytorch-master/prediction_img_out"
59 |
60 | if mode == "predict":
61 | '''
62 | 1、该代码无法直接进行批量预测,如果想要批量预测,可以利用os.listdir()遍历文件夹,利用Image.open打开图片文件进行预测。
63 | 具体流程可以参考get_dr_txt.py,在get_dr_txt.py即实现了遍历还实现了目标信息的保存。
64 | 2、如果想要进行检测完的图片的保存,利用r_image.save("img.jpg")即可保存,直接在predict.py里进行修改即可。
65 | 3、如果想要获得预测框的坐标,可以进入frcnn.detect_image函数,在绘图部分读取top,left,bottom,right这四个值。
66 | 4、如果想要利用预测框截取下目标,可以进入frcnn.detect_image函数,在绘图部分利用获取到的top,left,bottom,right这四个值
67 | 在原图上利用矩阵的方式进行截取。
68 | 5、如果想要在预测图上写额外的字,比如检测到的特定目标的数量,可以进入frcnn.detect_image函数,在绘图部分对predicted_class进行判断,
69 | 比如判断if predicted_class == 'car': 即可判断当前目标是否为车,然后记录数量即可。利用draw.text即可写字。
70 | '''
71 | while True:
72 | img = input('Input image filename:')
73 | try:
74 | image = Image.open(img)
75 | except:
76 | print('Open Error! Try again!')
77 | continue
78 | else:
79 | r_image = frcnn.detect_image(image, crop = crop, count = count)
80 | r_image.show()
81 |
82 | elif mode == "video":
83 | capture=cv2.VideoCapture(video_path)
84 | if video_save_path!="":
85 | fourcc = cv2.VideoWriter_fourcc(*'XVID')
86 | size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
87 | out = cv2.VideoWriter(video_save_path, fourcc, video_fps, size)
88 |
89 | fps = 0.0
90 | while(True):
91 | t1 = time.time()
92 | # 读取某一帧
93 | ref,frame=capture.read()
94 | # 格式转变,BGRtoRGB
95 | frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
96 | # 转变成Image
97 | frame = Image.fromarray(np.uint8(frame))
98 | # 进行检测
99 | frame = np.array(frcnn.detect_image(frame))
100 | # RGBtoBGR满足opencv显示格式
101 | frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)
102 |
103 | fps = ( fps + (1./(time.time()-t1)) ) / 2
104 | print("fps= %.2f"%(fps))
105 | frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
106 |
107 | cv2.imshow("video",frame)
108 | c= cv2.waitKey(1) & 0xff
109 | if video_save_path!="":
110 | out.write(frame)
111 |
112 | if c==27:
113 | capture.release()
114 | break
115 | capture.release()
116 | out.release()
117 | cv2.destroyAllWindows()
118 |
119 | elif mode == "fps":
120 | img = Image.open(fps_image_path)
121 | tact_time = frcnn.get_FPS(img, test_interval)
122 | print(str(tact_time) + ' seconds, ' + str(1/tact_time) + 'FPS, @batch_size 1')
123 |
124 | elif mode == "dir_predict":
125 | import os
126 | from tqdm import tqdm
127 |
128 | img_names = os.listdir(dir_origin_path)
129 | for img_name in tqdm(img_names):
130 | if img_name.lower().endswith(('.bmp', '.dib', '.png', '.jpg', '.jpeg', '.pbm', '.pgm', '.ppm', '.tif', '.tiff')):
131 | image_path = os.path.join(dir_origin_path, img_name)
132 | image = Image.open(image_path)
133 | r_image = frcnn.detect_image(image)
134 | if not os.path.exists(dir_save_path):
135 | os.makedirs(dir_save_path)
136 | r_image.save(os.path.join(dir_save_path, img_name.replace(".jpg", ".png")), quality=95, subsampling=0)
137 |
138 | else:
139 | raise AssertionError("Please specify the correct mode: 'predict', 'video', 'fps' or 'dir_predict'.")
140 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scipy==1.2.1
2 | numpy==1.17.0
3 | matplotlib==3.1.2
4 | opencv_python==4.1.2.30
5 | torch==1.2.0
6 | torchvision==0.4.0
7 | tqdm==4.60.0
8 | Pillow==8.2.0
9 | h5py==2.10.0
10 |
--------------------------------------------------------------------------------
/summary.py:
--------------------------------------------------------------------------------
1 | #--------------------------------------------#
2 | # 该部分代码用于看网络结构
3 | #--------------------------------------------#
4 | import torch
5 | from thop import clever_format, profile
6 | from torchsummary import summary
7 |
8 | from nets.faster_rcnn_feature_extraction import FasterRCNN
9 |
10 | if __name__ == "__main__":
11 | input_shape = [600, 600]
12 | num_classes = 21
13 |
14 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
15 | model = FasterRCNN(num_classes, backbone = 'vgg').to(device)
16 | summary(model, (3, input_shape[0], input_shape[1]))
17 |
18 | dummy_input = torch.randn(1, 3, input_shape[0], input_shape[1]).to(device)
19 | flops, params = profile(model.to(device), (dummy_input, ), verbose=False)
20 | #--------------------------------------------------------#
21 | # flops * 2是因为profile没有将卷积作为两个operations
22 | # 有些论文将卷积算乘法、加法两个operations。此时乘2
23 | # 有些论文只考虑乘法的运算次数,忽略加法。此时不乘2
24 | # 本代码选择乘2,参考YOLOX。
25 | #--------------------------------------------------------#
26 | flops = flops * 2
27 | flops, params = clever_format([flops, params], "%.3f")
28 | print('Total GFLOPS: %s' % (flops))
29 | print('Total params: %s' % (params))
30 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | #
--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/__init__.cpython-39.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/anchors.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/anchors.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/anchors.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/anchors.cpython-39.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/callbacks.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/callbacks.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/callbacks.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/callbacks.cpython-39.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/dataloader.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/dataloader.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/dataloader.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/dataloader.cpython-39.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/utils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils.cpython-39.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/utils_bbox.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils_bbox.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/utils_bbox.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils_bbox.cpython-39.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/utils_fit.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils_fit.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/utils_fit.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils_fit.cpython-39.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/utils_map.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils_map.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/utils_map.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils_map.cpython-39.pyc
--------------------------------------------------------------------------------
/utils/anchors.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | #--------------------------------------------#
5 | # 生成基础的先验框
6 | #--------------------------------------------#
7 | def generate_anchor_base(base_size=16, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32]):
8 | anchor_base = np.zeros((len(ratios) * len(anchor_scales), 4), dtype=np.float32)
9 | for i in range(len(ratios)):
10 | for j in range(len(anchor_scales)):
11 | h = base_size * anchor_scales[j] * np.sqrt(ratios[i])
12 | w = base_size * anchor_scales[j] * np.sqrt(1. / ratios[i])
13 |
14 | index = i * len(anchor_scales) + j
15 | anchor_base[index, 0] = - h / 2.
16 | anchor_base[index, 1] = - w / 2.
17 | anchor_base[index, 2] = h / 2.
18 | anchor_base[index, 3] = w / 2.
19 | return anchor_base
20 |
21 | #--------------------------------------------#
22 | # 对基础先验框进行拓展对应到所有特征点上
23 | #--------------------------------------------#
24 | def _enumerate_shifted_anchor(anchor_base, feat_stride, height, width):
25 | #---------------------------------#
26 | # 计算网格中心点
27 | #---------------------------------#
28 | shift_x = np.arange(0, width * feat_stride, feat_stride)
29 | shift_y = np.arange(0, height * feat_stride, feat_stride)
30 | shift_x, shift_y = np.meshgrid(shift_x, shift_y)
31 | shift = np.stack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel(),), axis=1)
32 |
33 | #---------------------------------#
34 | # 每个网格点上的9个先验框
35 | #---------------------------------#
36 | A = anchor_base.shape[0]
37 | K = shift.shape[0]
38 | anchor = anchor_base.reshape((1, A, 4)) + shift.reshape((K, 1, 4))
39 | #---------------------------------#
40 | # 所有的先验框
41 | #---------------------------------#
42 | anchor = anchor.reshape((K * A, 4)).astype(np.float32)
43 | return anchor
44 |
45 | if __name__ == "__main__":
46 | import matplotlib.pyplot as plt
47 | nine_anchors = generate_anchor_base()
48 | print(nine_anchors)
49 |
50 | height, width, feat_stride = 38,38,16
51 | anchors_all = _enumerate_shifted_anchor(nine_anchors, feat_stride, height, width)
52 | print(np.shape(anchors_all))
53 |
54 | fig = plt.figure()
55 | ax = fig.add_subplot(111)
56 | plt.ylim(-300,900)
57 | plt.xlim(-300,900)
58 | shift_x = np.arange(0, width * feat_stride, feat_stride)
59 | shift_y = np.arange(0, height * feat_stride, feat_stride)
60 | shift_x, shift_y = np.meshgrid(shift_x, shift_y)
61 | plt.scatter(shift_x,shift_y)
62 | box_widths = anchors_all[:,2]-anchors_all[:,0]
63 | box_heights = anchors_all[:,3]-anchors_all[:,1]
64 |
65 | for i in [108, 109, 110, 111, 112, 113, 114, 115, 116]:
66 | rect = plt.Rectangle([anchors_all[i, 0],anchors_all[i, 1]],box_widths[i],box_heights[i],color="r",fill=False)
67 | ax.add_patch(rect)
68 | plt.show()
69 |
70 |
--------------------------------------------------------------------------------
/utils/callbacks.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import matplotlib
4 | import torch
5 |
6 | matplotlib.use('Agg')
7 | import shutil
8 |
9 | import numpy as np
10 | from matplotlib import pyplot as plt
11 | from PIL import Image
12 | from scipy import signal
13 | from torch.utils.tensorboard import SummaryWriter
14 | # from tqdm import tqdm
15 | import tqdm
16 |
17 | from .utils import cvtColor, get_new_img_size, preprocess_input, resize_image
18 | from .utils_bbox import DecodeBox
19 | from .utils_map import get_coco_map, get_map
20 |
21 |
22 | class LossHistory():
23 | def __init__(self, log_dir, model, input_shape):
24 | self.log_dir = log_dir
25 | self.losses = []
26 | self.val_loss = []
27 |
28 | os.makedirs(self.log_dir)
29 | self.writer = SummaryWriter(self.log_dir)
30 | # try:
31 | # dummy_input = torch.randn(2, 3, input_shape[0], input_shape[1])
32 | # self.writer.add_graph(model, dummy_input)
33 | # except:
34 | # pass
35 |
36 | def append_loss(self, epoch, loss, val_loss):
37 | if not os.path.exists(self.log_dir):
38 | os.makedirs(self.log_dir)
39 |
40 | self.losses.append(loss)
41 | self.val_loss.append(val_loss)
42 |
43 | with open(os.path.join(self.log_dir, "epoch_loss.txt"), 'a') as f:
44 | f.write(str(loss))
45 | f.write("\n")
46 | with open(os.path.join(self.log_dir, "epoch_val_loss.txt"), 'a') as f:
47 | f.write(str(val_loss))
48 | f.write("\n")
49 |
50 | self.writer.add_scalar('loss', loss, epoch)
51 | self.writer.add_scalar('val_loss', val_loss, epoch)
52 | self.loss_plot()
53 |
54 | def loss_plot(self):
55 | iters = range(len(self.losses))
56 |
57 | plt.figure()
58 | plt.plot(iters, self.losses, 'red', linewidth = 2, label='train loss')
59 | plt.plot(iters, self.val_loss, 'coral', linewidth = 2, label='val loss')
60 | try:
61 | if len(self.losses) < 25:
62 | num = 5
63 | else:
64 | num = 15
65 |
66 | # plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss')
67 | plt.plot(iters, signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss')
68 | # plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss')
69 | plt.plot(iters, signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss')
70 | except:
71 | pass
72 |
73 | plt.grid(True)
74 | plt.xlabel('Epoch')
75 | plt.ylabel('Loss')
76 | plt.legend(loc="upper right")
77 |
78 | plt.savefig(os.path.join(self.log_dir, "epoch_loss.png"))
79 |
80 | plt.cla()
81 | plt.close("all")
82 |
83 | class EvalCallback():
84 | def __init__(self, net, input_shape, class_names, num_classes, val_lines, log_dir, cuda, \
85 | map_out_path=".temp_map_out", max_boxes=100, confidence=0.05, nms_iou=0.5, letterbox_image=True, MINOVERLAP=0.5, eval_flag=True, period=1):
86 | super(EvalCallback, self).__init__()
87 |
88 | self.net = net
89 | self.input_shape = input_shape
90 | self.class_names = class_names
91 | self.num_classes = num_classes
92 | self.val_lines = val_lines
93 | self.log_dir = log_dir
94 | self.cuda = cuda
95 | self.map_out_path = map_out_path
96 | self.max_boxes = max_boxes
97 | self.confidence = confidence
98 | self.nms_iou = nms_iou
99 | self.letterbox_image = letterbox_image
100 | self.MINOVERLAP = MINOVERLAP
101 | self.eval_flag = eval_flag
102 | self.period = period
103 |
104 | self.std = torch.Tensor([0.1, 0.1, 0.2, 0.2]).repeat(self.num_classes + 1)[None]
105 | if self.cuda:
106 | self.std = self.std.cuda()
107 | self.bbox_util = DecodeBox(self.std, self.num_classes)
108 |
109 | self.maps = [0]
110 | self.epoches = [0]
111 | if self.eval_flag:
112 | with open(os.path.join(self.log_dir, "epoch_map.txt"), 'a') as f:
113 | f.write(str(0))
114 | f.write("\n")
115 |
116 | #---------------------------------------------------#
117 | # 检测图片
118 | #---------------------------------------------------#
119 | def get_map_txt(self, image_id, image, class_names, map_out_path):
120 | f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w")
121 | #---------------------------------------------------#
122 | # 计算输入图片的高和宽
123 | #---------------------------------------------------#
124 | image_shape = np.array(np.shape(image)[0:2])
125 | input_shape = get_new_img_size(image_shape[0], image_shape[1])
126 | #---------------------------------------------------------#
127 | # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。
128 | # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
129 | #---------------------------------------------------------#
130 | image = cvtColor(image)
131 |
132 | #---------------------------------------------------------#
133 | # 给原图像进行resize,resize到短边为600的大小上
134 | #---------------------------------------------------------#
135 | image_data = resize_image(image, [input_shape[1], input_shape[0]])
136 | #---------------------------------------------------------#
137 | # 添加上batch_size维度
138 | #---------------------------------------------------------#
139 | image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)
140 |
141 | with torch.no_grad():
142 | images = torch.from_numpy(image_data)
143 | if self.cuda:
144 | images = images.cuda()
145 |
146 | roi_cls_locs, roi_scores, rois, _ = self.net(images)
147 | #-------------------------------------------------------------#
148 | # 利用classifier的预测结果对建议框进行解码,获得预测框
149 | #-------------------------------------------------------------#
150 | results = self.bbox_util.forward(roi_cls_locs, roi_scores, rois, image_shape, input_shape,
151 | nms_iou = self.nms_iou, confidence = self.confidence)
152 | #--------------------------------------#
153 | # 如果没有检测到物体,则返回原图
154 | #--------------------------------------#
155 | if len(results[0]) <= 0:
156 | return
157 |
158 | top_label = np.array(results[0][:, 5], dtype = 'int32')
159 | top_conf = results[0][:, 4]
160 | top_boxes = results[0][:, :4]
161 |
162 | top_100 = np.argsort(top_conf)[::-1][:self.max_boxes]
163 | top_boxes = top_boxes[top_100]
164 | top_conf = top_conf[top_100]
165 | top_label = top_label[top_100]
166 |
167 | for i, c in list(enumerate(top_label)):
168 | predicted_class = self.class_names[int(c)]
169 | box = top_boxes[i]
170 | score = str(top_conf[i])
171 |
172 | top, left, bottom, right = box
173 | if predicted_class not in class_names:
174 | continue
175 |
176 | f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom))))
177 |
178 | f.close()
179 | return
180 |
181 | def on_epoch_end(self, epoch):
182 | if epoch % self.period == 0 and self.eval_flag:
183 | if not os.path.exists(self.map_out_path):
184 | os.makedirs(self.map_out_path)
185 | if not os.path.exists(os.path.join(self.map_out_path, "ground-truth")):
186 | os.makedirs(os.path.join(self.map_out_path, "ground-truth"))
187 | if not os.path.exists(os.path.join(self.map_out_path, "detection-results")):
188 | os.makedirs(os.path.join(self.map_out_path, "detection-results"))
189 | print("Get map.")
190 | for annotation_line in tqdm(self.val_lines):
191 | line = annotation_line.split()
192 | image_id = os.path.basename(line[0]).split('.')[0]
193 | #------------------------------#
194 | # 读取图像并转换成RGB图像
195 | #------------------------------#
196 | image = Image.open(line[0])
197 | #------------------------------#
198 | # 获得预测框
199 | #------------------------------#
200 | gt_boxes = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
201 | #------------------------------#
202 | # 获得预测txt
203 | #------------------------------#
204 | self.get_map_txt(image_id, image, self.class_names, self.map_out_path)
205 |
206 | #------------------------------#
207 | # 获得真实框txt
208 | #------------------------------#
209 | with open(os.path.join(self.map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f:
210 | for box in gt_boxes:
211 | left, top, right, bottom, obj = box
212 | obj_name = self.class_names[obj]
213 | new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))
214 |
215 | print("Calculate Map.")
216 | try:
217 | temp_map = get_coco_map(class_names = self.class_names, path = self.map_out_path)[1]
218 | except:
219 | temp_map = get_map(self.MINOVERLAP, False, path = self.map_out_path)
220 | self.maps.append(temp_map)
221 | self.epoches.append(epoch)
222 |
223 | with open(os.path.join(self.log_dir, "epoch_map.txt"), 'a') as f:
224 | f.write(str(temp_map))
225 | f.write("\n")
226 |
227 | plt.figure()
228 | plt.plot(self.epoches, self.maps, 'red', linewidth = 2, label='train map')
229 |
230 | plt.grid(True)
231 | plt.xlabel('Epoch')
232 | plt.ylabel('Map %s'%str(self.MINOVERLAP))
233 | plt.title('A Map Curve')
234 | plt.legend(loc="upper right")
235 |
236 | plt.savefig(os.path.join(self.log_dir, "epoch_map.png"))
237 | plt.cla()
238 | plt.close("all")
239 |
240 | print("Get map done.")
241 | shutil.rmtree(self.map_out_path)
242 |
--------------------------------------------------------------------------------
/utils/dataloader.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import torch
4 | from PIL import Image
5 | from torch.utils.data.dataset import Dataset
6 |
7 | from utils.utils import cvtColor, preprocess_input
8 |
9 |
10 | class FRCNNDataset(Dataset):
11 | def __init__(self, annotation_lines, input_shape = [600, 600], train = True):
12 | self.annotation_lines = annotation_lines
13 | self.length = len(annotation_lines)
14 | self.input_shape = input_shape
15 | self.train = train
16 |
17 | def __len__(self):
18 | return self.length
19 |
20 | def __getitem__(self, index):
21 | index = index % self.length
22 | #---------------------------------------------------#
23 | # 训练时进行数据的随机增强
24 | # 验证时不进行数据的随机增强
25 | #---------------------------------------------------#
26 | image, y = self.get_random_data(self.annotation_lines[index], self.input_shape[0:2], random = self.train)
27 | image = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))
28 | box_data = np.zeros((len(y), 5))
29 | if len(y) > 0:
30 | box_data[:len(y)] = y
31 |
32 | box = box_data[:, :4]
33 | label = box_data[:, -1]
34 | return image, box, label
35 |
36 | def rand(self, a=0, b=1):
37 | return np.random.rand()*(b-a) + a
38 |
39 | def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True):
40 | line = annotation_line.split()
41 | #------------------------------#
42 | # 读取图像并转换成RGB图像
43 | #------------------------------#
44 | # image = Image.open('./PCB_DataSet/JPEGImages/'+line[0]+'.jpg')
45 | image = Image.open(line[0])
46 | image = cvtColor(image)
47 | #------------------------------#
48 | # 获得图像的高宽与目标高宽
49 | #------------------------------#
50 | iw, ih = image.size
51 | h, w = input_shape
52 | #------------------------------#
53 | # 获得预测框
54 | #------------------------------#
55 | box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
56 |
57 | if not random:
58 | scale = min(w/iw, h/ih)
59 | nw = int(iw*scale)
60 | nh = int(ih*scale)
61 | dx = (w-nw)//2
62 | dy = (h-nh)//2
63 |
64 | #---------------------------------#
65 | # 将图像多余的部分加上灰条
66 | #---------------------------------#
67 | image = image.resize((nw,nh), Image.BICUBIC)
68 | new_image = Image.new('RGB', (w,h), (128,128,128))
69 | new_image.paste(image, (dx, dy))
70 | image_data = np.array(new_image, np.float32)
71 |
72 | #---------------------------------#
73 | # 对真实框进行调整
74 | #---------------------------------#
75 | if len(box)>0:
76 | np.random.shuffle(box)
77 | box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
78 | box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
79 | box[:, 0:2][box[:, 0:2]<0] = 0
80 | box[:, 2][box[:, 2]>w] = w
81 | box[:, 3][box[:, 3]>h] = h
82 | box_w = box[:, 2] - box[:, 0]
83 | box_h = box[:, 3] - box[:, 1]
84 | box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
85 |
86 | return image_data, box
87 |
88 | #------------------------------------------#
89 | # 对图像进行缩放并且进行长和宽的扭曲
90 | #------------------------------------------#
91 | new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
92 | scale = self.rand(.25, 2)
93 | if new_ar < 1:
94 | nh = int(scale*h)
95 | nw = int(nh*new_ar)
96 | else:
97 | nw = int(scale*w)
98 | nh = int(nw/new_ar)
99 | image = image.resize((nw,nh), Image.BICUBIC)
100 |
101 | #------------------------------------------#
102 | # 将图像多余的部分加上灰条
103 | #------------------------------------------#
104 | dx = int(self.rand(0, w-nw))
105 | dy = int(self.rand(0, h-nh))
106 | new_image = Image.new('RGB', (w,h), (128,128,128))
107 | new_image.paste(image, (dx, dy))
108 | image = new_image
109 |
110 | #------------------------------------------#
111 | # 翻转图像
112 | #------------------------------------------#
113 | flip = self.rand()<.5
114 | if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
115 |
116 | image_data = np.array(image, np.uint8)
117 | #---------------------------------#
118 | # 对图像进行色域变换
119 | # 计算色域变换的参数
120 | #---------------------------------#
121 | r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
122 | #---------------------------------#
123 | # 将图像转到HSV上
124 | #---------------------------------#
125 | hue, sat, val = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
126 | dtype = image_data.dtype
127 | #---------------------------------#
128 | # 应用变换
129 | #---------------------------------#
130 | x = np.arange(0, 256, dtype=r.dtype)
131 | lut_hue = ((x * r[0]) % 180).astype(dtype)
132 | lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
133 | lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
134 |
135 | image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
136 | image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)
137 |
138 | #---------------------------------#
139 | # 对真实框进行调整
140 | #---------------------------------#
141 | if len(box)>0:
142 | np.random.shuffle(box)
143 | box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
144 | box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
145 | if flip: box[:, [0,2]] = w - box[:, [2,0]]
146 | box[:, 0:2][box[:, 0:2]<0] = 0
147 | box[:, 2][box[:, 2]>w] = w
148 | box[:, 3][box[:, 3]>h] = h
149 | box_w = box[:, 2] - box[:, 0]
150 | box_h = box[:, 3] - box[:, 1]
151 | box = box[np.logical_and(box_w>1, box_h>1)]
152 |
153 | return image_data, box
154 |
155 | # DataLoader中collate_fn使用
156 | def frcnn_dataset_collate(batch):
157 | images = []
158 | bboxes = []
159 | labels = []
160 | for img, box, label in batch:
161 | images.append(img)
162 | bboxes.append(box)
163 | labels.append(label)
164 | images = torch.from_numpy(np.array(images))
165 | return images, bboxes, labels
166 |
167 |
--------------------------------------------------------------------------------
/utils/kmeans_anchors/Bikmeans_anchors.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from read_voc import VOCDataSet
3 |
4 | # bik-means算法
5 | """
6 | Args:
7 | boxes: 需要聚类的bboxes
8 | k: 簇数(聚成几类)
9 | dist: 更新簇坐标的方法(默认使用中位数,比均值效果略好)
10 | """
11 |
12 | def load_data_set(fileName):
13 | """加载数据集"""
14 | dataSet = [] # 初始化一个空列表
15 | fr = open(fileName)
16 | for line in fr.readlines():
17 | # 按tab分割字段,将每行元素分割为list的元素
18 | curLine = line.strip().split('\t')
19 | # 用list函数把map函数返回的迭代器遍历展开成一个列表
20 | # 其中map(float, curLine)表示把列表的每个值用float函数转成float型,并返回迭代器
21 | fltLine = list(map(float, curLine))
22 | dataSet.append(fltLine)
23 | return dataSet
24 |
25 |
26 | def distance_euclidean(vector1, vector2):
27 | """计算欧氏距离"""
28 | return np.sqrt(sum(np.power(vector1-vector2, 2))) # 返回两个向量的距离
29 |
30 |
31 | def rand_center(dataSet, k):
32 | """构建一个包含K个随机质心的集合"""
33 | n = np.shape(dataSet)[1] # 获取样本特征值
34 |
35 | # 初始化质心,创建(k,n)个以0填充的矩阵
36 | centroids = np.mat(np.zeros((k, n))) # 每个质心有n个坐标值,总共要k个质心
37 | # 遍历特征值
38 | for j in range(n):
39 | # 计算每一列的最小值
40 | minJ = min(dataSet[:, j])
41 | # 计算每一列的范围值
42 | rangeJ = float(max(dataSet[:, j]) - minJ)
43 | # 计算每一列的质心,并将其赋给centroids
44 | centroids[:, j] = minJ + rangeJ * np.random.rand(k, 1)
45 |
46 | # 返回质心
47 | return centroids
48 |
49 |
50 | def k_means(dataSet, k, distMeas=distance_euclidean, creatCent=rand_center):
51 | """K-means聚类算法"""
52 | m = np.shape(dataSet)[0] # 行数
53 | # 建立簇分配结果矩阵,第一列存放该数据所属中心点,第二列是该数据到中心点的距离
54 | clusterAssment = np.mat(np.zeros((m, 2)))
55 | centroids = creatCent(dataSet, k) # 质心,即聚类点
56 | # 用来判定聚类是否收敛
57 | clusterChanged = True
58 | while clusterChanged:
59 | clusterChanged = False
60 | for i in range(m): # 把每一个数据划分到离他最近的中心点
61 | minDist = np.inf # 无穷大
62 | minIndex = -1 # 初始化
63 | for j in range(k):
64 | # 计算各点与新的聚类中心的距离
65 | distJI = distMeas(centroids[j, :], dataSet[i, :])
66 | if distJI < minDist:
67 | # 如果第i个数据点到第j中心点更近,则将i归属为j
68 | minDist = distJI
69 | minIndex = j
70 | # 如果分配发生变化,则需要继续迭代
71 | if clusterAssment[i, 0] != minIndex:
72 | clusterChanged = True
73 | # 并将第i个数据点的分配情况存入字典
74 | clusterAssment[i, :] = minIndex, minDist**2
75 | # print(centroids)
76 | for cent in range(k): # 重新计算中心点
77 | # 去第一列等于cent的所有列
78 | ptsInClust = dataSet[np.nonzero(clusterAssment[:, 0].A == cent)[0]]
79 | # 算出这些数据的中心点
80 | centroids[cent, :] = np.mean(ptsInClust, axis=0)
81 | return centroids, clusterAssment
82 |
83 |
84 | def biKmeans(dataMat, k, distMeas=distance_euclidean):
85 | """二分k-means算法"""
86 | m = np.shape(dataMat)[0]
87 | # 创建一个矩阵来存储数据集中每个点的簇分配结果及平方误差
88 | clusterAssment = np.mat(np.zeros((m, 2)))
89 | # 根据数据集均值获取第一个质心
90 | centroid0 = np.mean(dataMat, axis=0).tolist()[0]
91 | # 用一个列表来保留所有的质心
92 | centList = [centroid0]
93 | # 遍历数据集中所有点来计算每个点到质心的距离
94 | for j in range(m):
95 | clusterAssment[j, 1] = distMeas(np.mat(centroid0), dataMat[j, :]) ** 2
96 | # 对簇不停的进行划分,直到得到想要的簇数目为止
97 | while (len(centList) < k):
98 | # 初始化最小SSE为无穷大,用于比较划分前后的SSE
99 | lowestSSE = np.inf # 无穷大
100 | # 通过考察簇列表中的值来获得当前簇的数目,遍历所有的簇来决定最佳的簇进行划分
101 | for i in range(len(centList)):
102 | # 对每一个簇,将该簇中的所有点看成一个小的数据集
103 | ptsInCurrCluster = dataMat[np.nonzero(
104 | clusterAssment[:, 0].A == i)[0], :]
105 | # 将ptsInCurrCluster输入到函数kMeans中进行处理,k=2,
106 | # kMeans会生成两个质心(簇),同时给出每个簇的误差值
107 | centroidMat, splitClustAss = k_means(ptsInCurrCluster, 2, distMeas)
108 | # 划分数据的SSE与未划分的之和作为本次划分的总误差
109 | sseSplit = sum(splitClustAss[:, 1]) # 划分数据集的SSE
110 | sseNotSplit = sum(clusterAssment[np.nonzero(clusterAssment[:, 0].A != i)[0], 1]) # 未划分数据集的SSE
111 | print('划分数据集的SSE, and 未划分的SSE: ', sseSplit, sseNotSplit)
112 | # 将划分与未划分的SSE求和与最小SSE相比较 确定是否划分
113 | if (sseSplit + sseNotSplit) < lowestSSE:
114 | bestCentToSplit = i # 当前最适合做划分的中心点
115 | bestNewCents = centroidMat # 划分后的两个新中心点
116 | bestClustAss = splitClustAss.copy() # 划分点的聚类信息
117 | lowestSSE = sseSplit + sseNotSplit
118 | # 找出最好的簇分配结果
119 | # 调用kmeans函数并且指定簇数为2时,会得到两个编号分别为0和1的结果簇
120 | bestClustAss[np.nonzero(bestClustAss[:, 0].A == 1)[0], 0] = len(centList)
121 | # 更新为最佳质心
122 | bestClustAss[np.nonzero(bestClustAss[:, 0].A == 0)[0], 0] = bestCentToSplit
123 | print('本次最适合划分的质心: ', bestCentToSplit)
124 | print('被划分数据集样本数量: ', len(bestClustAss))
125 | # 更新质心列表
126 | # 更新原质心list中的第i个质心为使用二分kMeans后bestNewCents的第一个质心
127 | centList[bestCentToSplit] = bestNewCents[0, :].tolist()[0]
128 | # 添加bestNewCents的第二个质心
129 | centList.append(bestNewCents[1, :].tolist()[0])
130 | # 重新分配最好簇下的数据(质心)以及SSE
131 | clusterAssment[np.nonzero(clusterAssment[:, 0].A == bestCentToSplit)[0], :] = bestClustAss
132 |
133 | return np.mat(centList), clusterAssment
134 |
135 | def main(img_size=600, k=9, thr=0.25, gen=1000):
136 | # 从数据集中读取所有图片的wh以及对应bboxes的wh
137 | dataset = VOCDataSet(voc_root="/data", year="2012", txt_name="train.txt")
138 | im_wh, boxes_wh = dataset.get_info()
139 |
140 |
141 | if __name__ == "__main__":
142 | import matplotlib.pyplot as plt
143 | # nine_anchors = generate_anchor_base()
144 | # print(nine_anchors)
145 |
146 | # height, width, feat_stride = 38,38,16
147 | # anchors_all = _enumerate_shifted_anchor(nine_anchors, feat_stride, height, width)
148 | # print(np.shape(anchors_all))
149 |
150 | # fig = plt.figure()
151 | # ax = fig.add_subplot(111)
152 | # plt.ylim(-300,900)
153 | # plt.xlim(-300,900)
154 | # shift_x = np.arange(0, width * feat_stride, feat_stride)
155 | # shift_y = np.arange(0, height * feat_stride, feat_stride)
156 | # shift_x, shift_y = np.meshgrid(shift_x, shift_y)
157 | # plt.scatter(shift_x,shift_y)
158 | # box_widths = anchors_all[:,2]-anchors_all[:,0]
159 | # box_heights = anchors_all[:,3]-anchors_all[:,1]
160 |
161 | # for i in [108, 109, 110, 111, 112, 113, 114, 115, 116]:
162 | # rect = plt.Rectangle([anchors_all[i, 0],anchors_all[i, 1]],box_widths[i],box_heights[i],color="r",fill=False)
163 | # ax.add_patch(rect)
164 | # plt.show()
165 |
166 | # 测试biKmeans算法
167 | datMat = np.mat(load_data_set(r'F:\Desktop\PCB_code\PCB_DataSet\trainval.txt'))
168 | # 5个anchor框,
169 | centList, clusterAssment = biKmeans(datMat, 5)
170 | print("质心结果:", centList)
171 | print("聚类结果:", clusterAssment)
172 | # 可视化
173 | plt.scatter(np.array(datMat)[:, 0], np.array(datMat)[:, 1], c=np.array(clusterAssment)[:, 0].T)
174 | plt.scatter(centList[:, 0].tolist(), centList[:, 1].tolist(), c="r")
175 | plt.show()
176 |
177 |
--------------------------------------------------------------------------------
/utils/kmeans_anchors/main.py:
--------------------------------------------------------------------------------
1 | import random
2 | import numpy as np
3 | from tqdm import tqdm
4 | from scipy.cluster.vq import kmeans
5 |
6 | from read_voc import VOCDataSet
7 | from yolo_kmeans import k_means, wh_iou
8 |
9 |
10 | def anchor_fitness(k: np.ndarray, wh: np.ndarray, thr: float): # mutation fitness
11 | r = wh[:, None] / k[None]
12 | x = np.minimum(r, 1. / r).min(2) # ratio metric
13 | # x = wh_iou(wh, k) # iou metric
14 | best = x.max(1)
15 | f = (best * (best > thr).astype(np.float32)).mean() # fitness
16 | bpr = (best > thr).astype(np.float32).mean() # best possible recall
17 | return f, bpr
18 |
19 |
20 | def main(img_size=512, n=9, thr=0.25, gen=1000):
21 | # 从数据集中读取所有图片的wh以及对应bboxes的wh
22 | dataset = VOCDataSet(voc_root="/data", year="2012", txt_name="train.txt")
23 | im_wh, boxes_wh = dataset.get_info()
24 |
25 | # 最大边缩放到img_size
26 | im_wh = np.array(im_wh, dtype=np.float32)
27 | shapes = img_size * im_wh / im_wh.max(1, keepdims=True)
28 | wh0 = np.concatenate([l * s for s, l in zip(shapes, boxes_wh)]) # wh
29 |
30 | # Filter 过滤掉小目标
31 | i = (wh0 < 3.0).any(1).sum()
32 | if i:
33 | print(f'WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')
34 | wh = wh0[(wh0 >= 2.0).any(1)] # 只保留wh都大于等于2个像素的box
35 |
36 | # Kmeans calculation
37 | # print(f'Running kmeans for {n} anchors on {len(wh)} points...')
38 | # s = wh.std(0) # sigmas for whitening
39 | # k, dist = kmeans(wh / s, n, iter=30) # points, mean distance
40 | # assert len(k) == n, print(f'ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}')
41 | # k *= s
42 | k = k_means(wh, n)
43 |
44 | # 按面积排序
45 | k = k[np.argsort(k.prod(1))] # sort small to large
46 | f, bpr = anchor_fitness(k, wh, thr)
47 | print("kmeans: " + " ".join([f"[{int(i[0])}, {int(i[1])}]" for i in k]))
48 | print(f"fitness: {f:.5f}, best possible recall: {bpr:.5f}")
49 |
50 | # Evolve
51 | # 遗传算法(在kmeans的结果基础上变异mutation)
52 | npr = np.random
53 | f, sh, mp, s = anchor_fitness(k, wh, thr)[0], k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma
54 | pbar = tqdm(range(gen), desc=f'Evolving anchors with Genetic Algorithm:') # progress bar
55 | for _ in pbar:
56 | v = np.ones(sh)
57 | while (v == 1).all(): # mutate until a change occurs (prevent duplicates)
58 | v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
59 | kg = (k.copy() * v).clip(min=2.0)
60 | fg, bpr = anchor_fitness(kg, wh, thr)
61 | if fg > f:
62 | f, k = fg, kg.copy()
63 | pbar.desc = f'Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'
64 |
65 | # 按面积排序
66 | k = k[np.argsort(k.prod(1))] # sort small to large
67 | print("genetic: " + " ".join([f"[{int(i[0])}, {int(i[1])}]" for i in k]))
68 | print(f"fitness: {f:.5f}, best possible recall: {bpr:.5f}")
69 |
70 |
71 | if __name__ == "__main__":
72 | main()
73 |
--------------------------------------------------------------------------------
/utils/kmeans_anchors/plot_kmeans.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from matplotlib import pyplot as plt
3 | np.random.seed(0)
4 |
5 | colors = np.array(['blue', 'black'])
6 |
7 |
8 | def plot_clusters(data, cls, clusters, title=""):
9 | if cls is None:
10 | c = [colors[0]] * data.shape[0]
11 | else:
12 | c = colors[cls].tolist()
13 |
14 | plt.scatter(data[:, 0], data[:, 1], c=c)
15 | for i, clus in enumerate(clusters):
16 | plt.scatter(clus[0], clus[1], c='gold', marker='*', s=150)
17 | plt.title(title)
18 | plt.show()
19 | plt.close()
20 |
21 |
22 | def distances(data, clusters):
23 | xy1 = data[:, None] # [N,1,2]
24 | xy2 = clusters[None] # [1,M,2]
25 | d = np.sum(np.power(xy2 - xy1, 2), axis=-1)
26 | return d
27 |
28 |
29 | def k_means(data, k, dist=np.mean):
30 | """
31 | k-means methods
32 | Args:
33 | data: 需要聚类的data
34 | k: 簇数(聚成几类)
35 | dist: 更新簇坐标的方法
36 | """
37 | data_number = data.shape[0]
38 | last_nearest = np.zeros((data_number,))
39 |
40 | # init k clusters
41 | clusters = data[np.random.choice(data_number, k, replace=False)]
42 | print(f"random cluster: \n {clusters}")
43 | # plot
44 | plot_clusters(data, None, clusters, "random clusters")
45 |
46 | step = 0
47 | while True:
48 | d = distances(data, clusters)
49 | current_nearest = np.argmin(d, axis=1)
50 |
51 | # plot
52 | plot_clusters(data, current_nearest, clusters, f"step {step}")
53 |
54 | if (last_nearest == current_nearest).all():
55 | break # clusters won't change
56 | for cluster in range(k):
57 | # update clusters
58 | clusters[cluster] = dist(data[current_nearest == cluster], axis=0)
59 | last_nearest = current_nearest
60 | step += 1
61 |
62 | return clusters
63 |
64 |
65 | def main():
66 | x1, y1 = [np.random.normal(loc=1., size=150) for _ in range(2)]
67 | x2, y2 = [np.random.normal(loc=5., size=150) for _ in range(2)]
68 |
69 | x = np.concatenate([x1, x2])
70 | y = np.concatenate([y1, y2])
71 |
72 | plt.scatter(x, y, c='blue')
73 | plt.title("initial data")
74 | plt.show()
75 | plt.close()
76 |
77 | clusters = k_means(np.concatenate([x[:, None], y[:, None]], axis=-1), k=2)
78 | print(f"k-means fluster: \n {clusters}")
79 |
80 |
81 | if __name__ == '__main__':
82 | main()
83 |
--------------------------------------------------------------------------------
/utils/kmeans_anchors/read_voc.py:
--------------------------------------------------------------------------------
1 | import os
2 | from tqdm import tqdm
3 | from lxml import etree
4 |
5 |
6 | class VOCDataSet(object):
7 | def __init__(self, voc_root, txt_name: str = "train.txt"):
8 | self.root = voc_root
9 | self.annotations_root = os.path.join(self.root, "Annotations")
10 |
11 | # read train.txt or val.txt file
12 | txt_path = os.path.join(self.root, "ImageSets", txt_name)
13 | assert os.path.exists(txt_path), "not found {} file.".format(txt_name)
14 |
15 | with open(txt_path) as read:
16 | self.xml_list = [os.path.join(self.annotations_root, line.strip() + ".xml")
17 | for line in read.readlines() if len(line.strip()) > 0]
18 |
19 | # check file
20 | assert len(self.xml_list) > 0, "in '{}' file does not find any information.".format(txt_path)
21 | for xml_path in self.xml_list:
22 | assert os.path.exists(xml_path), "not found '{}' file.".format(xml_path)
23 |
24 | def __len__(self):
25 | return len(self.xml_list)
26 |
27 | def parse_xml_to_dict(self, xml):
28 | """
29 | 将xml文件解析成字典形式,参考tensorflow的recursive_parse_xml_to_dict
30 | Args:
31 | xml: xml tree obtained by parsing XML file contents using lxml.etree
32 |
33 | Returns:
34 | Python dictionary holding XML contents.
35 | """
36 |
37 | if len(xml) == 0: # 遍历到底层,直接返回tag对应的信息
38 | return {xml.tag: xml.text}
39 |
40 | result = {}
41 | for child in xml:
42 | child_result = self.parse_xml_to_dict(child) # 递归遍历标签信息
43 | if child.tag != 'object':
44 | result[child.tag] = child_result[child.tag]
45 | else:
46 | if child.tag not in result: # 因为object可能有多个,所以需要放入列表里
47 | result[child.tag] = []
48 | result[child.tag].append(child_result[child.tag])
49 | return {xml.tag: result}
50 |
51 | def get_info(self):
52 | im_wh_list = []
53 | boxes_wh_list = []
54 | for xml_path in tqdm(self.xml_list, desc="read data info."):
55 | # read xml
56 | with open(xml_path) as fid:
57 | xml_str = fid.read()
58 | xml = etree.fromstring(xml_str)
59 | data = self.parse_xml_to_dict(xml)["annotation"]
60 | im_height = int(data["size"]["height"])
61 | im_width = int(data["size"]["width"])
62 |
63 | wh = []
64 | for obj in data["object"]:
65 | xmin = float(obj["bndbox"]["xmin"])
66 | xmax = float(obj["bndbox"]["xmax"])
67 | ymin = float(obj["bndbox"]["ymin"])
68 | ymax = float(obj["bndbox"]["ymax"])
69 | wh.append([(xmax - xmin) / im_width, (ymax - ymin) / im_height])
70 |
71 | if len(wh) == 0:
72 | continue
73 |
74 | im_wh_list.append([im_width, im_height])
75 | boxes_wh_list.append(wh)
76 |
77 | return im_wh_list, boxes_wh_list
78 |
--------------------------------------------------------------------------------
/utils/kmeans_anchors/yolo_kmeans.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def wh_iou(wh1, wh2):
5 | # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
6 | wh1 = wh1[:, None] # [N,1,2]
7 | wh2 = wh2[None] # [1,M,2]
8 | inter = np.minimum(wh1, wh2).prod(2) # [N,M]
9 | return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter)
10 |
11 |
12 | def k_means(boxes, k, dist=np.median):
13 | """
14 | yolo k-means methods
15 | refer: https://github.com/qqwweee/keras-yolo3/blob/master/kmeans.py
16 | Args:
17 | boxes: 需要聚类的bboxes
18 | k: 簇数(聚成几类)
19 | dist: 更新簇坐标的方法(默认使用中位数,比均值效果略好)
20 | """
21 | box_number = boxes.shape[0]
22 | last_nearest = np.zeros((box_number,))
23 | # np.random.seed(0) # 固定随机数种子
24 |
25 | # init k clusters
26 | clusters = boxes[np.random.choice(box_number, k, replace=False)]
27 |
28 | while True:
29 | distances = 1 - wh_iou(boxes, clusters)
30 | current_nearest = np.argmin(distances, axis=1)
31 | if (last_nearest == current_nearest).all():
32 | break # clusters won't change
33 | for cluster in range(k):
34 | # update clusters
35 | clusters[cluster] = dist(boxes[current_nearest == cluster], axis=0)
36 |
37 | last_nearest = current_nearest
38 |
39 | return clusters
40 |
--------------------------------------------------------------------------------
/utils/soft_nms.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | def bbox_iou(self, box1, box2, x1y1x2y2=True):
4 | """
5 | 计算IOU
6 | """
7 | if not x1y1x2y2:
8 | b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
9 | b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
10 | b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
11 | b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
12 | else:
13 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
14 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
15 |
16 | inter_rect_x1 = torch.max(b1_x1, b2_x1)
17 | inter_rect_y1 = torch.max(b1_y1, b2_y1)
18 | inter_rect_x2 = torch.min(b1_x2, b2_x2)
19 | inter_rect_y2 = torch.min(b1_y2, b2_y2)
20 |
21 | inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1, min=0) * \
22 | torch.clamp(inter_rect_y2 - inter_rect_y1, min=0)
23 |
24 | b1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1)
25 | b2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1)
26 |
27 | iou = inter_area / torch.clamp(b1_area + b2_area - inter_area, min = 1e-6)
28 |
29 | return iou
30 |
31 | def non_max_suppression(self, prediction, num_classes, input_shape, image_shape, letterbox_image, conf_thres=0.5, nms_thres=0.4, sigma=0.5):
32 | #----------------------------------------------------------#
33 | # 将预测结果的格式转换成左上角右下角的格式。
34 | # prediction [batch_size, num_anchors, 85]
35 | #----------------------------------------------------------#
36 | box_corner = prediction.new(prediction.shape)
37 | box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
38 | box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
39 | box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
40 | box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
41 | prediction[:, :, :4] = box_corner[:, :, :4]
42 |
43 | output = [None for _ in range(len(prediction))]
44 | for i, image_pred in enumerate(prediction):
45 | #----------------------------------------------------------#
46 | # 对种类预测部分取max。
47 | # class_conf [num_anchors, 1] 种类置信度
48 | # class_pred [num_anchors, 1] 种类
49 | #----------------------------------------------------------#
50 | class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True)
51 |
52 | #----------------------------------------------------------#
53 | # 利用置信度进行第一轮筛选
54 | #----------------------------------------------------------#
55 | conf_mask = (image_pred[:, 4] * class_conf[:, 0] >= conf_thres).squeeze()
56 |
57 | #----------------------------------------------------------#
58 | # 根据置信度进行预测结果的筛选
59 | #----------------------------------------------------------#
60 | image_pred = image_pred[conf_mask]
61 | class_conf = class_conf[conf_mask]
62 | class_pred = class_pred[conf_mask]
63 | if not image_pred.size(0):
64 | continue
65 | #-------------------------------------------------------------------------#
66 | # detections [num_anchors, 7]
67 | # 7的内容为:x1, y1, x2, y2, obj_conf, class_conf, class_pred
68 | #-------------------------------------------------------------------------#
69 | detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)
70 |
71 | #------------------------------------------#
72 | # 获得预测结果中包含的所有种类
73 | #------------------------------------------#
74 | unique_labels = detections[:, -1].cpu().unique()
75 |
76 | if prediction.is_cuda:
77 | unique_labels = unique_labels.cuda()
78 | detections = detections.cuda()
79 |
80 | for c in unique_labels:
81 | #------------------------------------------#
82 | # 获得某一类得分筛选后全部的预测结果
83 | #------------------------------------------#
84 | detections_class = detections[detections[:, -1] == c]
85 |
86 | # #------------------------------------------#
87 | # # 使用官方自带的非极大抑制会速度更快一些!
88 | # #------------------------------------------#
89 | # keep = nms(
90 | # detections_class[:, :4],
91 | # detections_class[:, 4] * detections_class[:, 5],
92 | # nms_thres
93 | # )
94 | # max_detections = detections_class[keep]
95 |
96 | # 按照存在物体的置信度排序
97 | _, conf_sort_index = torch.sort(detections_class[:, 4]*detections_class[:, 5], descending=True)
98 | detections_class = detections_class[conf_sort_index]
99 | # 进行非极大抑制
100 | max_detections = []
101 | while detections_class.size(0):
102 | # 取出这一类置信度最高的,一步一步往下判断,判断重合程度是否大于nms_thres,如果是则去除掉
103 | max_detections.append(detections_class[0].unsqueeze(0))
104 | if len(detections_class) == 1:
105 | break
106 | ious = self.bbox_iou(max_detections[-1], detections_class[1:])
107 | # 计算soft-nms新权重,将获得的iou取高斯指数后*原得分
108 | detections_class[1:, 4] = torch.exp(-(ious * ious) / sigma) * detections_class[1:, 4]
109 | detections_class = detections_class[1:]
110 | # 对新的得分进行重新排序
111 | detections_class = detections_class[detections_class[:, 4] >= conf_thres]
112 | arg_sort = torch.argsort(detections_class[:, 4], descending = True)
113 | detections_class = detections_class[arg_sort]
114 | # 堆叠
115 | max_detections = torch.cat(max_detections).data
116 |
117 | # Add max detections to outputs
118 | output[i] = max_detections if output[i] is None else torch.cat((output[i], max_detections))
119 |
120 | if output[i] is not None:
121 | output[i] = output[i].cpu().numpy()
122 | box_xy, box_wh = (output[i][:, 0:2] + output[i][:, 2:4])/2, output[i][:, 2:4] - output[i][:, 0:2]
123 | output[i][:, :4] = self.yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image)
124 |
125 | return output
126 |
127 |
--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from PIL import Image
3 |
4 | #---------------------------------------------------------#
5 | # 将图像转换成RGB图像,防止灰度图在预测时报错。
6 | # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
7 | #---------------------------------------------------------#
8 | def cvtColor(image):
9 | if len(np.shape(image)) == 3 and np.shape(image)[2] == 3:
10 | return image
11 | else:
12 | image = image.convert('RGB')
13 | return image
14 |
15 | #---------------------------------------------------#
16 | # 对输入图像进行resize
17 | #---------------------------------------------------#
18 | def resize_image(image, size):
19 | w, h = size
20 | new_image = image.resize((w, h), Image.BICUBIC)
21 | return new_image
22 |
23 | #---------------------------------------------------#
24 | # 获得类
25 | #---------------------------------------------------#
26 | def get_classes(classes_path):
27 | with open(classes_path, encoding='utf-8') as f:
28 | class_names = f.readlines()
29 | class_names = [c.strip() for c in class_names]
30 | return class_names, len(class_names)
31 |
32 | #---------------------------------------------------#
33 | # 获得学习率
34 | #---------------------------------------------------#
35 | def get_lr(optimizer):
36 | for param_group in optimizer.param_groups:
37 | return param_group['lr']
38 |
39 | def preprocess_input(image):
40 | image /= 255.0
41 | return image
42 |
43 | def show_config(**kwargs):
44 | print('Configurations:')
45 | print('-' * 70)
46 | print('|%25s | %40s|' % ('keys', 'values'))
47 | print('-' * 70)
48 | for key, value in kwargs.items():
49 | print('|%25s | %40s|' % (str(key), str(value)))
50 | print('-' * 70)
51 |
52 | def get_new_img_size(height, width, img_min_side=600):
53 | if width <= height:
54 | f = float(img_min_side) / width
55 | resized_height = int(f * height)
56 | resized_width = int(img_min_side)
57 | else:
58 | f = float(img_min_side) / height
59 | resized_width = int(f * width)
60 | resized_height = int(img_min_side)
61 |
62 | return resized_height, resized_width
63 |
--------------------------------------------------------------------------------
/utils/utils_bbox.py:
--------------------------------------------------------------------------------
1 | from matplotlib import pyplot as plt
2 | from numpy import *
3 | import numpy as np
4 | import torch
5 | from torch.nn import functional as F
6 | from torchvision.ops import nms
7 |
8 |
9 | #src_bbox先验框,loc建议框结果
10 | def loc2bbox(src_bbox, loc):
11 | if src_bbox.size()[0] == 0:
12 | return torch.zeros((0, 4), dtype=loc.dtype)
13 |
14 | #计算先验框的宽、高,中心坐标
15 | src_width = torch.unsqueeze(src_bbox[:, 2] - src_bbox[:, 0], -1)
16 | src_height = torch.unsqueeze(src_bbox[:, 3] - src_bbox[:, 1], -1)
17 | src_ctr_x = torch.unsqueeze(src_bbox[:, 0], -1) + 0.5 * src_width
18 | src_ctr_y = torch.unsqueeze(src_bbox[:, 1], -1) + 0.5 * src_height
19 |
20 | #对先验框进行大小、坐标调整参数
21 | #[:,0::4]:所有行中,列下标为0,1,2,。。。改变其二维表格中的值。
22 | dx = loc[:, 0::4]
23 | dy = loc[:, 1::4]
24 | dw = loc[:, 2::4]
25 | dh = loc[:, 3::4]
26 |
27 | #先验框调整过程
28 | ctr_x = dx * src_width + src_ctr_x
29 | ctr_y = dy * src_height + src_ctr_y
30 | w = torch.exp(dw) * src_width
31 | h = torch.exp(dh) * src_height
32 |
33 | dst_bbox = torch.zeros_like(loc)
34 | dst_bbox[:, 0::4] = ctr_x - 0.5 * w
35 | dst_bbox[:, 1::4] = ctr_y - 0.5 * h
36 | dst_bbox[:, 2::4] = ctr_x + 0.5 * w
37 | dst_bbox[:, 3::4] = ctr_y + 0.5 * h
38 |
39 | return dst_bbox
40 |
41 | class DecodeBox():
42 | def __init__(self, std, num_classes):
43 | self.std = std
44 | self.num_classes = num_classes + 1
45 |
46 | def frcnn_correct_boxes(self, box_xy, box_wh, input_shape, image_shape):
47 | #-----------------------------------------------------------------#
48 | # 把y轴放前面是因为方便预测框和图像的宽高进行相乘
49 | #-----------------------------------------------------------------#
50 | box_yx = box_xy[..., ::-1]
51 | box_hw = box_wh[..., ::-1]
52 | input_shape = np.array(input_shape)
53 | image_shape = np.array(image_shape)
54 |
55 | box_mins = box_yx - (box_hw / 2.)
56 | box_maxes = box_yx + (box_hw / 2.)
57 | boxes = np.concatenate([box_mins[..., 0:1], box_mins[..., 1:2], box_maxes[..., 0:1], box_maxes[..., 1:2]], axis=-1)
58 | boxes *= np.concatenate([image_shape, image_shape], axis=-1)
59 | return boxes
60 |
61 | def forward(self, roi_cls_locs, roi_scores, rois, image_shape, input_shape, nms_iou = 0.3, confidence = 0.5):
62 | results = []
63 | bs = len(roi_cls_locs)
64 | #--------------------------------#
65 | # batch_size, num_rois, 4
66 | #--------------------------------#
67 | rois = rois.view((bs, -1, 4))
68 | #----------------------------------------------------------------------------------------------------------------#
69 | # 对每一张图片进行处理,由于在predict.py的时候,我们只输入一张图片,所以for i in range(len(mbox_loc))只进行一次
70 | #----------------------------------------------------------------------------------------------------------------#
71 | for i in range(bs):
72 | #----------------------------------------------------------#
73 | # 对回归参数进行reshape
74 | #----------------------------------------------------------#
75 | roi_cls_loc = roi_cls_locs[i] * self.std
76 | #----------------------------------------------------------#
77 | # 第一维度是建议框的数量,第二维度是每个种类
78 | # 第三维度是对应种类的调整参数
79 | #----------------------------------------------------------#
80 | roi_cls_loc = roi_cls_loc.view([-1, self.num_classes, 4])
81 |
82 | #-------------------------------------------------------------#
83 | # 利用classifier网络的预测结果对建议框进行调整获得预测框
84 | # num_rois, 4 -> num_rois, 1, 4 -> num_rois, num_classes, 4
85 | #-------------------------------------------------------------#
86 | roi = rois[i].view((-1, 1, 4)).expand_as(roi_cls_loc)
87 | cls_bbox = loc2bbox(roi.contiguous().view((-1, 4)), roi_cls_loc.contiguous().view((-1, 4)))
88 | cls_bbox = cls_bbox.view([-1, (self.num_classes), 4])
89 | #-------------------------------------------------------------#
90 | # 对预测框进行归一化,调整到0-1之间
91 | #-------------------------------------------------------------#
92 | cls_bbox[..., [0, 2]] = (cls_bbox[..., [0, 2]]) / input_shape[1]
93 | cls_bbox[..., [1, 3]] = (cls_bbox[..., [1, 3]]) / input_shape[0]
94 |
95 | roi_score = roi_scores[i]
96 | prob = F.softmax(roi_score, dim=-1)
97 |
98 | results.append([])
99 | for c in range(1, self.num_classes):
100 | #--------------------------------#
101 | # 取出属于该类的所有框的置信度
102 | # 判断是否大于门限
103 | #--------------------------------#
104 | c_confs = prob[:, c]
105 | c_confs_m = c_confs > confidence
106 |
107 | if len(c_confs[c_confs_m]) > 0:
108 | #-----------------------------------------#
109 | # 取出得分高于confidence的框
110 | #-----------------------------------------#
111 | boxes_to_process = cls_bbox[c_confs_m, c]
112 | confs_to_process = c_confs[c_confs_m]
113 |
114 | keep = nms(
115 | boxes_to_process,
116 | confs_to_process,
117 | nms_iou
118 | )
119 | #-----------------------------------------#
120 | # 取出在非极大抑制中效果较好的内容
121 | #-----------------------------------------#
122 | good_boxes = boxes_to_process[keep]
123 | confs = confs_to_process[keep][:, None]
124 | labels = (c - 1) * torch.ones((len(keep), 1)).cuda() if confs.is_cuda else (c - 1) * torch.ones((len(keep), 1))
125 | #-----------------------------------------#
126 | # 将label、置信度、框的位置进行堆叠。
127 | #-----------------------------------------#
128 | c_pred = torch.cat((good_boxes, confs, labels), dim=1).cpu().numpy()
129 | # 添加进result里
130 | results[-1].extend(c_pred)
131 |
132 | if len(results[-1]) > 0:
133 | results[-1] = np.array(results[-1])
134 | box_xy, box_wh = (results[-1][:, 0:2] + results[-1][:, 2:4])/2, results[-1][:, 2:4] - results[-1][:, 0:2]
135 | results[-1][:, :4] = self.frcnn_correct_boxes(box_xy, box_wh, input_shape, image_shape)
136 |
137 | return results
138 |
139 |
140 |
--------------------------------------------------------------------------------
/utils/utils_fit.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import torch
4 | from tqdm import tqdm
5 |
6 | from utils.utils import get_lr
7 |
8 |
9 | def fit_one_epoch(model, train_util, loss_history, eval_callback, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val, Epoch, cuda, fp16, scaler, save_period, save_dir):
10 | total_loss = 0
11 | rpn_loc_loss = 0
12 | rpn_cls_loss = 0
13 | roi_loc_loss = 0
14 | roi_cls_loss = 0
15 |
16 | val_loss = 0
17 | print('Start Train')
18 | with tqdm(total=epoch_step,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
19 | for iteration, batch in enumerate(gen):
20 | if iteration >= epoch_step:
21 | break
22 | images, boxes, labels = batch[0], batch[1], batch[2]
23 | with torch.no_grad():
24 | if cuda:
25 | images = images.cuda() #shape=[2,3,600.600]
26 |
27 | rpn_loc, rpn_cls, roi_loc, roi_cls, total = train_util.train_step(images, boxes, labels, 1, fp16, scaler)
28 | total_loss += total.item()
29 | rpn_loc_loss += rpn_loc.item()
30 | rpn_cls_loss += rpn_cls.item()
31 | roi_loc_loss += roi_loc.item()
32 | roi_cls_loss += roi_cls.item()
33 |
34 | pbar.set_postfix(**{'total_loss' : total_loss / (iteration + 1),
35 | 'rpn_loc' : rpn_loc_loss / (iteration + 1),
36 | 'rpn_cls' : rpn_cls_loss / (iteration + 1),
37 | 'roi_loc' : roi_loc_loss / (iteration + 1),
38 | 'roi_cls' : roi_cls_loss / (iteration + 1),
39 | 'lr' : get_lr(optimizer)})
40 | pbar.update(1)
41 |
42 | print('Finish Train')
43 | print('Start Validation')
44 | with tqdm(total=epoch_step_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
45 | for iteration, batch in enumerate(gen_val):
46 | if iteration >= epoch_step_val:
47 | break
48 | images, boxes, labels = batch[0], batch[1], batch[2]
49 | with torch.no_grad():
50 | if cuda:
51 | images = images.cuda()
52 |
53 | train_util.optimizer.zero_grad()
54 | _, _, _, _, val_total = train_util.forward(images, boxes, labels, 1)
55 | val_loss += val_total.item()
56 |
57 | pbar.set_postfix(**{'val_loss' : val_loss / (iteration + 1)})
58 | pbar.update(1)
59 |
60 | print('Finish Validation')
61 | loss_history.append_loss(epoch + 1, total_loss / epoch_step, val_loss / epoch_step_val)
62 | eval_callback.on_epoch_end(epoch + 1)
63 | print('Epoch:'+ str(epoch + 1) + '/' + str(Epoch))
64 | print('Total Loss: %.3f || Val Loss: %.3f ' % (total_loss / epoch_step, val_loss / epoch_step_val))
65 |
66 | #-----------------------------------------------#
67 | # 保存权值
68 | #-----------------------------------------------#
69 | if (epoch + 1) % save_period == 0 or epoch + 1 == Epoch:
70 | torch.save(model.state_dict(), os.path.join(save_dir, 'ep%03d-loss%.3f-val_loss%.3f.pth' % (epoch + 1, total_loss / epoch_step, val_loss / epoch_step_val)))
71 |
72 | if len(loss_history.val_loss) <= 1 or (val_loss / epoch_step_val) <= min(loss_history.val_loss):
73 | print('Save best model to best_epoch_weights.pth')
74 | torch.save(model.state_dict(), os.path.join(save_dir, "best_epoch_weights.pth"))
75 |
76 | torch.save(model.state_dict(), os.path.join(save_dir, "last_epoch_weights.pth"))
77 |
78 |
--------------------------------------------------------------------------------
/voc_annotation.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import xml.etree.ElementTree as ET
4 |
5 | import numpy as np
6 |
7 | from utils.utils import get_classes
8 |
9 | #--------------------------------------------------------------------------------------------------------------------------------#
10 | # annotation_mode用于指定该文件运行时计算的内容
11 | # annotation_mode为0代表整个标签处理过程,包括获得VOCdevkit/VOC2007/ImageSets里面的txt以及训练用的2007_train.txt、2007_val.txt
12 | # annotation_mode为1代表获得VOCdevkit/VOC2007/ImageSets里面的txt
13 | # annotation_mode为2代表获得训练用的2007_train.txt、2007_val.txt
14 | #--------------------------------------------------------------------------------------------------------------------------------#
15 | annotation_mode = 0
16 | #-------------------------------------------------------------------#
17 | # 必须要修改,用于生成2007_train.txt、2007_val.txt的目标信息
18 | # 与训练和预测所用的classes_path一致即可
19 | # 如果生成的2007_train.txt里面没有目标信息
20 | # 那么就是因为classes没有设定正确
21 | # 仅在annotation_mode为0和2的时候有效
22 | #-------------------------------------------------------------------#
23 | classes_path = r'F:\Desktop\PCB_code\PCB_DataSet\cls_classes.txt'
24 | #--------------------------------------------------------------------------------------------------------------------------------#
25 | # trainval_percent用于指定(训练集+验证集)与测试集的比例,默认情况下 (训练集+验证集):测试集 = 9:1
26 | # train_percent用于指定(训练集+验证集)中训练集与验证集的比例,默认情况下 训练集:验证集 = 9:1
27 | # 仅在annotation_mode为0和1的时候有效
28 | #--------------------------------------------------------------------------------------------------------------------------------#
29 | trainval_percent = 0.9
30 | train_percent = 0.9
31 | #-------------------------------------------------------#
32 | # 指向VOC数据集所在的文件夹
33 | # 默认指向根目录下的VOC数据集
34 | #-------------------------------------------------------#
35 | PCB_DataSet_path=r'PCB_DataSet'
36 |
37 | PCB_Data_Sets=['trainval','test']
38 | classes, _ = get_classes(classes_path)
39 |
40 | #-------------------------------------------------------#
41 | # 统计目标数量
42 | #-------------------------------------------------------#
43 | photo_nums = np.zeros(len(PCB_Data_Sets))
44 | nums = np.zeros(len(classes))
45 | def convert_annotation(image_id, list_file):
46 | in_file = open(os.path.join(PCB_DataSet_path, 'Annotations/%s.xml'%(image_id)), encoding='utf-8')
47 | tree=ET.parse(in_file)
48 | root = tree.getroot()
49 |
50 | for obj in root.iter('object'):
51 | difficult = 0
52 | if obj.find('difficult')!=None:
53 | difficult = obj.find('difficult').text
54 | cls = obj.find('name').text
55 | if cls not in classes or int(difficult)==1:
56 | continue
57 | cls_id = classes.index(cls)
58 | xmlbox = obj.find('bndbox')
59 | b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text)))
60 | list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
61 |
62 | nums[classes.index(cls)] = nums[classes.index(cls)] + 1
63 |
64 | if __name__ == "__main__":
65 | random.seed(0)
66 | if " " in os.path.abspath(PCB_DataSet_path):
67 | raise ValueError("数据集存放的文件夹路径与图片名称中不可以存在空格,否则会影响正常的模型训练,请注意修改。")
68 |
69 | if annotation_mode == 0 or annotation_mode == 1:
70 | print("Generate txt in ImageSets.")
71 | xmlfilepath = os.path.join(PCB_DataSet_path, 'Annotations')
72 | saveBasePath = os.path.join(PCB_DataSet_path, 'ImageSets')
73 | temp_xml = os.listdir(xmlfilepath)
74 | total_xml = []
75 | for xml in temp_xml:
76 | if xml.endswith(".xml"):
77 | total_xml.append(xml)
78 |
79 | num = len(total_xml)
80 | list = range(num)
81 | tv = int(num*trainval_percent)
82 | tr = int(tv*train_percent)
83 | trainval= random.sample(list,tv)
84 | train = random.sample(trainval,tr)
85 |
86 | print("train and val size",tv)
87 | print("train size",tr)
88 | ftrainval = open(os.path.join(saveBasePath,'trainval.txt'), 'w')
89 | ftest = open(os.path.join(saveBasePath,'test.txt'), 'w')
90 | ftrain = open(os.path.join(saveBasePath,'train.txt'), 'w')
91 | fval = open(os.path.join(saveBasePath,'val.txt'), 'w')
92 |
93 | for i in list:
94 | name=total_xml[i][:-4]+'\n'
95 | if i in trainval:
96 | ftrainval.write(name)
97 | if i in train:
98 | ftrain.write(name)
99 | else:
100 | fval.write(name)
101 | else:
102 | ftest.write(name)
103 |
104 | ftrainval.close()
105 | ftrain.close()
106 | fval.close()
107 | ftest.close()
108 | print("Generate txt in ImageSets done.")
109 |
110 | if annotation_mode == 0 or annotation_mode == 2:
111 | print("Generate PCB_train.txt and PCB_val.txt for train.")
112 | type_index = 0
113 | for image_set in PCB_Data_Sets:
114 | image_ids = open(os.path.join(PCB_DataSet_path, 'ImageSets/%s.txt'%(image_set)), encoding='utf-8').read().strip().split()
115 | list_file = open(os.path.join(PCB_DataSet_path,'%s.txt'%( image_set)), 'w', encoding='utf-8')#保存训练集和测试集
116 | for image_id in image_ids:
117 | list_file.write('%s/JPEGImages/%s.jpg'%(os.path.abspath(PCB_DataSet_path),image_id))#在训练集和测试集中写入图片路径信息
118 |
119 | convert_annotation(image_id, list_file)
120 | list_file.write('\n')
121 | photo_nums[type_index] = len(image_ids)
122 | type_index += 1
123 | list_file.close()
124 | print("Generate PCB_train.txt and PCB_val.txt for train done.")
125 |
126 | def printTable(List1, List2):
127 | for i in range(len(List1[0])):
128 | print("|", end=' ')
129 | for j in range(len(List1)):
130 | print(List1[j][i].rjust(int(List2[j])), end=' ')
131 | print("|", end=' ')
132 | print()
133 |
134 | str_nums = [str(int(x)) for x in nums]
135 | tableData = [
136 | classes, str_nums
137 | ]
138 | colWidths = [0]*len(tableData)
139 | len1 = 0
140 | for i in range(len(tableData)):
141 | for j in range(len(tableData[i])):
142 | if len(tableData[i][j]) > colWidths[i]:
143 | colWidths[i] = len(tableData[i][j])
144 | printTable(tableData, colWidths)
145 |
146 | if photo_nums[0] <= 500:
147 | print("训练集数量小于500,属于较小的数据量,请注意设置较大的训练世代(Epoch)以满足足够的梯度下降次数(Step)。")
148 |
149 | if np.sum(nums) == 0:
150 | print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!")
151 | print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!")
152 | print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!")
153 | print("(重要的事情说三遍)。")
154 |
--------------------------------------------------------------------------------