├── .gitignore
├── Data_preproces
    ├── data_expansion.py
    ├── order_name.py
    └── voc_annotation.py
├── LICENSE
├── README.md
├── data_expansion.py
├── frcnn_predict.py
├── get_map.py
├── nets
    ├── FasterRCNN_train.py
    ├── Suggestion_box.py
    ├── __init__.py
    ├── __pycache__
    │   ├── FasterRCNN_train.cpython-37.pyc
    │   ├── FasterRCNN_train.cpython-39.pyc
    │   ├── Suggestion_box.cpython-37.pyc
    │   ├── Suggestion_box.cpython-39.pyc
    │   ├── __init__.cpython-37.pyc
    │   ├── __init__.cpython-39.pyc
    │   ├── classifier.cpython-37.pyc
    │   ├── classifier.cpython-39.pyc
    │   ├── faster_rcnn_feature_extraction.cpython-39.pyc
    │   ├── feature_extraction.cpython-39.pyc
    │   ├── feature_pyramid_network.cpython-39.pyc
    │   ├── frcnn.cpython-39.pyc
    │   ├── frcnn_training.cpython-39.pyc
    │   ├── resnet101.cpython-37.pyc
    │   ├── resnet101.cpython-39.pyc
    │   ├── resnet50.cpython-37.pyc
    │   ├── resnet50.cpython-39.pyc
    │   ├── resnet50_FPN.cpython-37.pyc
    │   ├── resnet50_FPN.cpython-39.pyc
    │   ├── rpn.cpython-37.pyc
    │   ├── rpn.cpython-39.pyc
    │   ├── vgg16.cpython-37.pyc
    │   └── vgg16.cpython-39.pyc
    ├── classifier.py
    ├── resnet101.py
    ├── resnet50.py
    ├── resnet50_ECA_FPN.py
    ├── resnet50_FPN.py
    ├── rpn.py
    └── vgg16.py
├── order_name.py
├── predict.py
├── qa.md
├── requirements.txt
├── summary.py
├── train.py
├── utils
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-37.pyc
    │   ├── __init__.cpython-39.pyc
    │   ├── anchors.cpython-37.pyc
    │   ├── anchors.cpython-39.pyc
    │   ├── callbacks.cpython-37.pyc
    │   ├── callbacks.cpython-39.pyc
    │   ├── dataloader.cpython-37.pyc
    │   ├── dataloader.cpython-39.pyc
    │   ├── utils.cpython-37.pyc
    │   ├── utils.cpython-39.pyc
    │   ├── utils_bbox.cpython-37.pyc
    │   ├── utils_bbox.cpython-39.pyc
    │   ├── utils_fit.cpython-37.pyc
    │   ├── utils_fit.cpython-39.pyc
    │   ├── utils_map.cpython-37.pyc
    │   └── utils_map.cpython-39.pyc
    ├── anchors.py
    ├── callbacks.py
    ├── dataloader.py
    ├── kmeans_anchors
    │   ├── Bikmeans_anchors.py
    │   ├── main.py
    │   ├── plot_kmeans.py
    │   ├── read_voc.py
    │   └── yolo_kmeans.py
    ├── soft_nms.py
    ├── utils.py
    ├── utils_bbox.py
    ├── utils_fit.py
    └── utils_map.py
└── voc_annotation.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # ignore map, miou, datasets
  2 | map_out/
  3 | miou_out/
  4 | VOCdevkit/
  5 | datasets/
  6 | Medical_Datasets/
  7 | lfw/
  8 | logs/
  9 | model_data/
 10 | .temp_map_out/
 11 | 
 12 | # Byte-compiled / optimized / DLL files
 13 | __pycache__/
 14 | *.py[cod]
 15 | *$py.class
 16 | 
 17 | # C extensions
 18 | *.so
 19 | 
 20 | # Distribution / packaging
 21 | .Python
 22 | build/
 23 | develop-eggs/
 24 | dist/
 25 | downloads/
 26 | eggs/
 27 | .eggs/
 28 | lib/
 29 | lib64/
 30 | parts/
 31 | sdist/
 32 | var/
 33 | wheels/
 34 | pip-wheel-metadata/
 35 | share/python-wheels/
 36 | *.egg-info/
 37 | .installed.cfg
 38 | *.egg
 39 | MANIFEST
 40 | 
 41 | # PyInstaller
 42 | #  Usually these files are written by a python script from a template
 43 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 44 | *.manifest
 45 | *.spec
 46 | 
 47 | # Installer logs
 48 | pip-log.txt
 49 | pip-delete-this-directory.txt
 50 | 
 51 | # Unit test / coverage reports
 52 | htmlcov/
 53 | .tox/
 54 | .nox/
 55 | .coverage
 56 | .coverage.*
 57 | .cache
 58 | nosetests.xml
 59 | coverage.xml
 60 | *.cover
 61 | *.py,cover
 62 | .hypothesis/
 63 | .pytest_cache/
 64 | 
 65 | # Translations
 66 | *.mo
 67 | *.pot
 68 | 
 69 | # Django stuff:
 70 | *.log
 71 | local_settings.py
 72 | db.sqlite3
 73 | db.sqlite3-journal
 74 | 
 75 | # Flask stuff:
 76 | instance/
 77 | .webassets-cache
 78 | 
 79 | # Scrapy stuff:
 80 | .scrapy
 81 | 
 82 | # Sphinx documentation
 83 | docs/_build/
 84 | 
 85 | # PyBuilder
 86 | target/
 87 | 
 88 | # Jupyter Notebook
 89 | .ipynb_checkpoints
 90 | 
 91 | # IPython
 92 | profile_default/
 93 | ipython_config.py
 94 | 
 95 | # pyenv
 96 | .python-version
 97 | 
 98 | # pipenv
 99 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
100 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
101 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
102 | #   install all needed dependencies.
103 | #Pipfile.lock
104 | 
105 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
106 | __pypackages__/
107 | 
108 | # Celery stuff
109 | celerybeat-schedule
110 | celerybeat.pid
111 | 
112 | # SageMath parsed files
113 | *.sage.py
114 | 
115 | # Environments
116 | .env
117 | .venv
118 | env/
119 | venv/
120 | ENV/
121 | env.bak/
122 | venv.bak/
123 | 
124 | # Spyder project settings
125 | .spyderproject
126 | .spyproject
127 | 
128 | # Rope project settings
129 | .ropeproject
130 | 
131 | # mkdocs documentation
132 | /site
133 | 
134 | # mypy
135 | .mypy_cache/
136 | .dmypy.json
137 | dmypy.json
138 | 
139 | # Pyre type checker
140 | .pyre/
141 | 


--------------------------------------------------------------------------------
/Data_preproces/data_expansion.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | """数据增强
  3 |    1. 翻转变换 flip
  4 |    2. 图片裁剪 crop
  5 |    3. 色彩抖动 color jittering
  6 |    4. 平移变换 shift
  7 |    5. 尺度变换 scale
  8 |    6. 对比度变换 contrast
  9 |    7. 噪声扰动 noise
 10 |    8. 旋转变换/反射变换 Rotation/reflection
 11 |    9.直方图增强
 12 |    10.拉普拉斯算子
 13 |    11.对数变换
 14 |    12.伽马变换
 15 |    13.限制对比度自适应直方图均衡化CLAHE
 16 |    14.retinex SSR
 17 |    15.retinex MMR
 18 |    16.
 19 | 
 20 | """
 21 | 
 22 | import logging
 23 | import os
 24 | import random
 25 | import threading
 26 | import time
 27 | from dataclasses import dataclass
 28 | from distutils.log import error
 29 | 
 30 | import cv2
 31 | import numpy as np
 32 | from PIL import Image, ImageEnhance, ImageFile
 33 | 
 34 | 
 35 | # 图片裁剪
 36 | def read_path(file_pathname):
 37 | 
 38 |     for filename in os.listdir(file_pathname):
 39 |         # print(filename)
 40 |         img_filename = os.path.join(file_pathname, filename)  #将图片路径与图片名进行拼接
 41 |         
 42 |         img = cv2.imread(img_filename)       #img_path为图片所在路径
 43 |         crop_img = img[0:3585,0:3629]    #x0,y0为裁剪区域左上坐标；x1,y1为裁剪区域右下坐标（y0：y1，x0:x1）
 44 | 
 45 |         #####save figure
 46 |         # cv2.imwrite(r'date_set\data_source1'+"/"+filename,crop_img)
 47 |         cv2.imwrite(r'jixing\polarity'+"/"+filename,crop_img)
 48 | 
 49 | 
 50 | logger = logging.getLogger(__name__)
 51 | ImageFile.LOAD_TRUNCATED_IMAGES = True
 52 | 
 53 | 
 54 | class DataAugmentation:
 55 |     """
 56 |     包含数据增强的八种方式
 57 |     """
 58 | 
 59 |     def __init__(self):
 60 |         pass
 61 |  
 62 |     @staticmethod
 63 |     def openImage(image):
 64 |         img=cv2.imread(image)
 65 |         return img
 66 | 
 67 |     @staticmethod
 68 |     def randomRotation(image, center=None, scale=1.0):    #mode=Image.BICUBIC
 69 |         """
 70 |          对图像进行随机任意角度(0~360度)旋转
 71 |         :return: 旋转转之后的图像
 72 |         """
 73 |         random_angle = np.random.randint(-180, 180)
 74 |         (h, w) = image.shape[:2]
 75 |         # If no rotation center is specified, the center of the image is set as the rotation center
 76 |         if center is None:
 77 |             center = (w / 2, h / 2)
 78 |         m = cv2.getRotationMatrix2D(center, random_angle, scale)  #center：旋转中心坐标.angle：旋转角度，负号为逆时针，正号为顺时针.scale：缩放比例，1为等比例缩放
 79 |         rotated = cv2.warpAffine(image, m, (w, h))
 80 |         return rotated
 81 |     
 82 |     @staticmethod
 83 |     def transpose(image):
 84 |         """
 85 |         水平垂直翻转
 86 |         :return: 旋转转之后的图像
 87 |         """
 88 |         random_angle = np.random.randint(-2, 2)  #取[-1,1]的随机整数
 89 |         img_filp=cv2.flip(image,random_angle)
 90 |         return img_filp
 91 |     
 92 |     '''噪声抖动'''
 93 | 
 94 |     @staticmethod
 95 |     def randomColor(image):
 96 |         """
 97 |         对图像进行颜色抖动
 98 |         :param image: PIL的图像image
 99 |         :return: 有颜色色差的图像image
100 |         """
101 |         saturation=random.randint(0,1)
102 |         brightness=random.randint(0,1)
103 |         contrast=random.randint(0,1)
104 |         sharpness=random.randint(0,1)
105 |         image=Image.fromarray(cv2.cvtColor(image,cv2.COLOR_BGR2RGB))  #转化为PIL.Image对象,才能使用ImageEnhance.Brightness(image)
106 |         if random.random() < saturation:
107 |             random_factor = np.random.randint(0, 31) / 10.  # 随机因子
108 |             image = ImageEnhance.Color(image).enhance(random_factor)  # 调整图像的饱和度
109 |         if random.random() < brightness:
110 |             random_factor = np.random.randint(10, 21) / 10.  # 随机因子
111 |             image = ImageEnhance.Brightness(image).enhance(random_factor)  # 调整图像的亮度
112 |         if random.random() < contrast:
113 |             random_factor = np.random.randint(10, 21) / 10.  # 随机因1子
114 |             image = ImageEnhance.Contrast(image).enhance(random_factor)  # 调整图像对比度
115 |         if random.random() < sharpness:
116 |             random_factor = np.random.randint(0, 31) / 10.  # 随机因子
117 |             image= ImageEnhance.Sharpness(image).enhance(random_factor)  # 调整图像锐度
118 |         image=cv2.cvtColor(np.asarray(image),cv2.COLOR_RGB2BGR)    #转换为cv格式
119 |         return image
120 |     
121 |     @staticmethod
122 |     def randomGaussian(image, mean=0.2, sigma=0.04):  
123 |         """
124 |          对图像进行高斯噪声处理
125 |         mean:设置高斯分布的均值和方差
126 |         sigma:设置高斯分布的标准差,sigma值越大，噪声越多
127 |         
128 |         返回:
129 |         gaussian_out : 噪声处理后的图片
130 |         """
131 |          # 将图片灰度标准化
132 |         img = image / 255
133 |         # 产生高斯 noise
134 |         noise = np.random.normal(mean, sigma, img.shape)
135 |         # 将噪声和图片叠加
136 |         gaussian_out = img + noise
137 |         # 将超过 1 的置 1，低于 0 的置 0
138 |         gaussian_out = np.clip(gaussian_out, 0, 1)
139 |         # 将图片灰度范围的恢复为 0-255
140 |         gaussian_out = np.uint8(gaussian_out*255)
141 |         # 将噪声范围搞为 0-255
142 |         # noise = np.uint8(noise*255)
143 |         return gaussian_out
144 | 
145 |     @staticmethod
146 |     def Pepper_noise(image):
147 |         '''
148 |         椒盐噪声
149 |         '''
150 |         #设置添加椒盐噪声的数目比例
151 |         s_vs_p = 0.04
152 |         #设置添加噪声图像像素的数目
153 |         amount =0.03
154 |         noisy_img = np.copy(image)
155 |         #添加salt噪声
156 |         num_salt = np.ceil(amount * image.size * s_vs_p)
157 |         #设置添加噪声的坐标位置
158 |         coords = [np.random.randint(0,i - 1, int(num_salt)) for i in image.shape]
159 |         noisy_img[tuple(coords)] = 255
160 |         #添加pepper噪声
161 |         num_pepper = np.ceil(amount * image.size * (1. - s_vs_p))
162 |         #设置添加噪声的坐标位置
163 |         coords = [np.random.randint(0,i - 1, int(num_pepper)) for i in image.shape]
164 |         noisy_img[tuple (coords)] = 0
165 |         return noisy_img
166 |  
167 |     @staticmethod
168 |     def Poisson_noise(image):
169 |         '''泊松噪声'''
170 | 
171 |         #计算图像像素的分布范围
172 |         vals = len(np.unique(image))
173 |         vals = 2 ** np.ceil(np.log2(vals))
174 |         #给图片添加泊松噪声
175 |         noisy_img = np.random.poisson(image * vals) / float(vals)
176 |         return noisy_img
177 | 
178 |     '''图像增强算法'''
179 | 
180 |     @staticmethod
181 |     def hist(image):
182 |         '''直方图均衡增强'''
183 |         r, g, b = cv2.split(image)
184 |         r1 = cv2.equalizeHist(r)
185 |         g1 = cv2.equalizeHist(g)
186 |         b1 = cv2.equalizeHist(b)
187 |         image_equal_clo = cv2.merge([r1, g1, b1])
188 |         return image_equal_clo
189 | 
190 |     @staticmethod
191 |     def laplacian(image):
192 |         '''拉普拉斯算子'''
193 |         kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
194 |         image_lap = cv2.filter2D(image, cv2.CV_8UC3, kernel)
195 |         return image_lap
196 | 
197 |     @staticmethod
198 |     def log(image):
199 |         '''对数变换'''
200 |         image_log = np.uint8(np.log(np.array(image) + 1))
201 |         cv2.normalize(image_log, image_log, 0, 255, cv2.NORM_MINMAX)
202 |         # 转换成8bit图像显示
203 |         cv2.convertScaleAbs(image_log, image_log)
204 |         return image_log
205 | 
206 |     @staticmethod
207 |     def gamma(image):
208 |         '''伽马变换'''
209 |         fgamma = 0.5    #数值越大，生成的图片越黑
210 |         image_gamma = np.uint8(np.power((np.array(image) / 255.0), fgamma) * 255.0)
211 |         cv2.normalize(image_gamma, image_gamma, 0, 255, cv2.NORM_MINMAX)
212 |         cv2.convertScaleAbs(image_gamma, image_gamma)
213 |         return image_gamma
214 | 
215 |     @staticmethod
216 |     def clahe(image):
217 |         '''# 限制对比度自适应直方图均衡化CLAHE'''
218 |         b, g, r = cv2.split(image)
219 |         clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
220 |         b = clahe.apply(b)
221 |         g = clahe.apply(g)
222 |         r = clahe.apply(r)
223 |         image_clahe = cv2.merge([b, g, r])
224 |         return image_clahe
225 |     
226 |     def __replaceZeroes(data):
227 |         min_nonzero = min(data[np.nonzero(data)])
228 |         data[data == 0] = min_nonzero
229 |         return data
230 | 
231 |     def __SSR(src_img, size):
232 | 
233 |         L_blur = cv2.GaussianBlur(src_img, (size, size), 0)
234 |         img =DataAugmentation.__replaceZeroes(src_img)
235 | 
236 |         L_blur =DataAugmentation. __replaceZeroes(L_blur)
237 | 
238 |         dst_Img = cv2.log(img/255.0)
239 |         dst_Lblur = cv2.log(L_blur/255.0)
240 |         dst_IxL = cv2.multiply(dst_Img, dst_Lblur)
241 |         log_R = cv2.subtract(dst_Img, dst_IxL)
242 | 
243 |         dst_R = cv2.normalize(log_R,None, 0, 255, cv2.NORM_MINMAX)
244 |         log_uint8 = cv2.convertScaleAbs(dst_R)
245 |         return log_uint8
246 | 
247 |     @staticmethod
248 |     def SSR_image(image):
249 |         '''SSR_image'''
250 |         size = 3
251 |         b_gray, g_gray, r_gray = cv2.split(image)
252 |         b_gray =DataAugmentation.__SSR(b_gray, size)
253 |         g_gray =DataAugmentation.__SSR(g_gray, size)
254 |         r_gray =DataAugmentation.__SSR(r_gray, size)
255 |         result = cv2.merge([b_gray, g_gray, r_gray])
256 |         return result
257 | 
258 |     # retinex MSR
259 |     def __MSR(img, scales):
260 |         weight = 2 / 3.0
261 |         scales_size = len(scales)
262 |         h, w = img.shape[:2]
263 |         log_R = np.zeros((h, w), dtype=np.float32)
264 | 
265 |         for i in range(scales_size):
266 |             img =DataAugmentation. __replaceZeroes(img)
267 |             L_blur = cv2.GaussianBlur(img, (scales[i], scales[i]), 0)
268 |             L_blur =DataAugmentation. __replaceZeroes(L_blur)
269 |             dst_Img = cv2.log(img/255.0)
270 |             dst_Lblur = cv2.log(L_blur/255.0)
271 |             dst_Ixl = cv2.multiply(dst_Img, dst_Lblur)
272 |             log_R += weight * cv2.subtract(dst_Img, dst_Ixl)
273 | 
274 |         dst_R = cv2.normalize(log_R,None, 0, 255, cv2.NORM_MINMAX)
275 |         log_uint8 = cv2.convertScaleAbs(dst_R)
276 |         return log_uint8
277 | 
278 |     @staticmethod   
279 |     def MSR_image(image):
280 |         '''MSR_image'''
281 |         scales = [15, 101, 301]  # [3,5,9]
282 |         b_gray, g_gray, r_gray = cv2.split(image)
283 |         b_gray =DataAugmentation.__MSR(b_gray, scales)
284 |         g_gray =DataAugmentation. __MSR(g_gray, scales)
285 |         r_gray =DataAugmentation. __MSR(r_gray, scales)
286 |         result = cv2.merge([b_gray, g_gray, r_gray])
287 |         return result
288 | 
289 | 
290 | def imageOps(func_name, image1,  img_des_path, img_file_name, times=1):   #times=1每种方式，每张图片运行一次
291 |     funcMap = {#"randomRotation": DataAugmentation.randomRotation, 
292 |                 "randomcolor": DataAugmentation.randomColor,"transpose": DataAugmentation.transpose, 
293 |                "randomGaussian": DataAugmentation.randomGaussian, "pepper_noise": DataAugmentation.Pepper_noise,
294 |                "Poisson_noise": DataAugmentation.Poisson_noise, "hist": DataAugmentation.hist, 
295 |                "laplacian": DataAugmentation.laplacian,"log": DataAugmentation.log,
296 |                 "gamma": DataAugmentation.gamma, "clahe": DataAugmentation.clahe, 
297 |                 "SSR_image": DataAugmentation.SSR_image, "MSR_image": DataAugmentation.MSR_image
298 |                }
299 |     if funcMap.get(func_name) is None:
300 |         logger.error("%s is not exist", func_name)
301 |         return -1
302 |  
303 |     for _i in range(0, times, 1):
304 |         new_image = funcMap[func_name](image1)   #经过变化后的图片
305 |         # print('new_image：',new_image)
306 |         # path=os.path.join(img_des_path, func_name + str(_i) + img_file_name)  #存图的新名字
307 |         path=os.path.join(img_des_path, img_file_name)
308 |         # print('new_filename：',path)
309 |         cv2.imwrite (path,new_image) 
310 | 
311 | 
312 | # opsList = {"transpose",'randomcolor',"gamma","MSR_image","pepper_noise","hist","log","clahe",'randomGaussian',
313 | #             'Poisson_noise','laplacian','SSR_image'}
314 | opsList = {"clahe"}   #clahe图像增强效果较好
315 |  
316 | def threadOPS(img_path, new_img_path):
317 |     """
318 |     多线程处理事务
319 |     :param src_path: 源文件
320 |     :param des_path: 存放文件
321 |     :return:
322 |     """
323 |     #img path 
324 |     if os.path.isdir(img_path):
325 |         img_names = os.listdir(img_path)
326 |         # print('img_names值为：',img_names)
327 |     else:
328 |         img_names = [img_path]
329 |         # print('img_names1值为：',img_names)
330 |  
331 |     img_num = 0
332 |  
333 |     #img num
334 |     for img_name in img_names:
335 |         tmp_img_name = os.path.join(img_path, img_name)
336 |         if os.path.isdir(tmp_img_name):
337 |             print('contain file folder')
338 |             exit()
339 |         else:
340 |             img_num = img_num + 1
341 |             num = img_num
342 |             # print("num数值为：",num )
343 |  
344 |  
345 |     for i in range(num):
346 |         img_name = img_names[i]
347 |         # print("img_name:",img_name)
348 |         tmp_img_name = os.path.join(img_path, img_name)
349 |         # 读取文件并进行操作
350 |         image1 = DataAugmentation.openImage(tmp_img_name)
351 |         # print("读取文件image：",image1)
352 |         
353 |         # threadImage =[0] * 12   #定义一个元组，其长度为12.
354 |         threadImage ={}           #定义为空字典类型。用来装线程结果信息
355 |         _index = 0
356 |         for ops_name in opsList:
357 |             # print("ops_name:",ops_name)
358 |             #创建一个新线程
359 |             threadImage[_index] = threading.Thread(target=imageOps,
360 |                                                     args=(ops_name, image1, new_img_path,img_name))
361 |             print('threadImage[{}]:{}'.format(_index,threadImage))
362 |             threadImage[_index].start()   #启动线程
363 |             _index += 1      #显示每个线程的起停位置
364 |             time.sleep(0.2)  #线程执行的时间
365 |  
366 |  
367 | if __name__ == '__main__':
368 |     threadOPS(#r"F:\Desktop\PCB_code\date_set\1shujuchuli",
369 |               #r"F:\Desktop\PCB_code\date_set\2shujucunfang"
370 |               r'F:\Desktop\PCB_code\data_set1\data_shiyan',
371 |               r'F:\Desktop\PCB_code\data_set1\data_shiyan_kuochong')
372 | 
373 |     # read_path(r'F:\Desktop\PCB_code\data_set1\data_shiyan')   #图片裁剪
374 | 
375 | 
376 |     '''
377 |     路径问题：
378 |     关于上述路径中，\table\name\rain中的\t,\n,\r都易被识别为转义字符。
379 |     解决的办法主要由以下三种：
380 |     #1
381 |     path=r"C:\data\table\name\rain"
382 |     #前面加r表示不转义
383 | 
384 |     #2
385 |     path="C:\\data\\table\\name\\rain"
386 |     #用\\代替\
387 | 
388 |     #3
389 |     path="C:/data/table/name/rain"
390 |     #用\代替/
391 | 
392 |     '''
393 | 
394 | 


--------------------------------------------------------------------------------
/Data_preproces/order_name.py:
--------------------------------------------------------------------------------
  1 | #...........................#
  2 | #对文件夹中的文件进行重命名
  3 | #...........................#
  4 | import os
  5 | import xml
  6 | from xml.dom import minidom
  7 | import xml.etree.cElementTree as ET
  8 | 
  9 | def myrename(file_path):
 10 |     file_list=os.listdir(file_path)
 11 |     for i,fi in enumerate(file_list):
 12 |         old_dir=os.path.join(file_path,fi)
 13 |         print('wenjianmingzi :',old_dir)
 14 |         # 删除名字中的空格
 15 |         new_name = fi.replace(" ", "_")
 16 |         print("新名字为：",new_name)
 17 | 
 18 |         # # 顺序命名
 19 |         # # new_name=str(i+1)+"."+str(fi.split(".")[-1])
 20 |         new_dir=os.path.join(file_path,new_name)
 21 |         try:
 22 |             os.rename(old_dir,new_dir)
 23 |         except Exception as e:
 24 |             print(e)
 25 |             print("Failed!")
 26 |         else:
 27 |             print("SUcess!")
 28 | 
 29 | 
 30 | #...........................#
 31 | #对xml文件内的filename和path名进行重命名
 32 | #...........................#
 33 | 
 34 | def xml_name(xmlpath):
 35 |     files = os.listdir(xmlpath)  # 得到文件夹下所有文件名称
 36 |     count = 0
 37 |     for xmlFile in files:  # 遍历文件夹
 38 |         if not os.path.isdir(xmlFile):  # 判断是否是文件夹,不是文件夹才打开
 39 |             name1 = xmlFile.split('.')[0]
 40 |             dom = xml.dom.minidom.parse(xmlpath + '/' + xmlFile)
 41 |             root = dom.documentElement
 42 |             #filename重命名
 43 |             newfilename = root.getElementsByTagName('filename')
 44 |             t=newfilename[0].firstChild.data = name1 + '.jpg'
 45 |             print('t:',t )
 46 |             #path重命名
 47 |             newpath = root.getElementsByTagName('path')
 48 |             t1=newpath[0].firstChild.data =xmlpath +'\\'+ name1 +'.jpg'
 49 |             print('t1:',t1 )
 50 | 
 51 |             with open(os.path.join(xmlpath, xmlFile), 'w',) as fh:
 52 |                 print('fh:',fh )
 53 |                 dom.writexml(fh)
 54 |                 print('写入name/pose OK!')
 55 |             count = count + 1
 56 | 
 57 | 
 58 | # 删除xml文件中显示的版本号
 59 | def delete_xmlversion(xmlpath,savedir):
 60 |     
 61 |     files = os.listdir(xmlpath)
 62 |     for ml in files:
 63 |         if '.xml' in ml:
 64 |             fo = open(savedir + '/' + '{}'.format(ml), 'w', encoding='utf-8')
 65 |             print('{}'.format(ml))
 66 |             fi = open(xmlpath + '/' + '{}'.format(ml), 'r')
 67 |             content = fi.readlines()
 68 |             for line in content:
 69 |                 # line = line.replace('a', 'b')        # 例：将a替换为b
 70 |                 line = line.replace('<?xml version="1.0" ?>', '')
 71 |                 # line = line.replace('<folder>测试图片</folder>', '<folder>车辆图片</folder>')
 72 |                 # line = line.replace('<name>class1</name>', '<name>class2</name>')
 73 |                 fo.write(line)
 74 |             fo.close()
 75 |             print('替换成功')
 76 | 
 77 | 
 78 | #删除xml文件中部分不要的标签信息
 79 | def Delete_part_information_xml(path_root,xy_classes):
 80 |     for anno_path in path_root:
 81 |         xml_list=os.listdir(anno_path)
 82 |         print("打开{}文件".format(xml_list))
 83 |         for annoxml in xml_list:
 84 |             path_xml=os.path.join(anno_path,annoxml)
 85 |             print('保存文件路径为{}'.format(path_xml))
 86 |             tree =ET.parse(path_xml)
 87 |             root=tree.getroot()
 88 | 
 89 |             for child in root.findall('object'):
 90 |                 name = child.find('name').text
 91 |                 if not name in xy_classes:
 92 |                     root.remove(child)
 93 |             print(annoxml)
 94 |             tree.write(os.path.join(r'F:\Desktop\PCB_code\PCB_DataSet\Annotations—new', annoxml))  #处理结束后保存的路径
 95 | 
 96 | 
 97 | 
 98 | 
 99 | if __name__=="__main__":
100 |     file_path=r"F:\Desktop\PCB_code\date_set\new_data"   #完整路径+文件名
101 |     # xmlpath="F:\\桌面\\PCB_code\\date_set\\Image_label_source"
102 |     # savedir = r'F:\桌面\PCB_code\date_set\3' #删除xml文件中显示的版本号后存放文件位置
103 |     # xmlpath=r'F:\桌面\PCB_code\date_set\label'
104 |     myrename(file_path)        #图片重命名文件
105 | 
106 |     #对xml文件中的名字进行修改
107 |     # myrename(xmlpath)          #1、xml文件名重命名
108 |     # xml_name(xmlpath)          #2、xml文件内的filename和path重命名
109 |     # delete_xmlversion(xmlpath,savedir)  #删除经过xml重命名后文件内的版本号
110 | 
111 |     #删除xml文件中部分不要的标签信息
112 |     path_root=r'F:\Desktop\PCB_code\PCB_DataSet\Annotations'
113 |     xy_classes=['Speaker',"Bat","2USB","Rj45+2USB","Cap_cross","Cap_blue_black","Jumper04p",
114 |                 "Jumper10p", "HDD","Power08p","Power04p","Power24p"]  
115 |     Delete_part_information_xml(path_root,xy_classes)
116 | 
117 | 


--------------------------------------------------------------------------------
/Data_preproces/voc_annotation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | import xml.etree.ElementTree as ET
  4 | 
  5 | import numpy as np
  6 | 
  7 | from utils.utils import get_classes
  8 | 
  9 | #--------------------------------------------------------------------------------------------------------------------------------#
 10 | #   annotation_mode用于指定该文件运行时计算的内容
 11 | #   annotation_mode为0代表整个标签处理过程，包括获得VOCdevkit/VOC2007/ImageSets里面的txt以及训练用的2007_train.txt、2007_val.txt
 12 | #   annotation_mode为1代表获得VOCdevkit/VOC2007/ImageSets里面的txt
 13 | #   annotation_mode为2代表获得训练用的2007_train.txt、2007_val.txt
 14 | #--------------------------------------------------------------------------------------------------------------------------------#
 15 | annotation_mode     = 0
 16 | #-------------------------------------------------------------------#
 17 | #   必须要修改，用于生成2007_train.txt、2007_val.txt的目标信息
 18 | #   与训练和预测所用的classes_path一致即可
 19 | #   如果生成的2007_train.txt里面没有目标信息
 20 | #   那么就是因为classes没有设定正确
 21 | #   仅在annotation_mode为0和2的时候有效
 22 | #-------------------------------------------------------------------#
 23 | classes_path        = r'F:\Desktop\PCB_code\PCB_DataSet\cls_classes.txt'
 24 | #--------------------------------------------------------------------------------------------------------------------------------#
 25 | #   trainval_percent用于指定(训练集+验证集)与测试集的比例，默认情况下 (训练集+验证集):测试集 = 9:1
 26 | #   train_percent用于指定(训练集+验证集)中训练集与验证集的比例，默认情况下 训练集:验证集 = 9:1
 27 | #   仅在annotation_mode为0和1的时候有效
 28 | #--------------------------------------------------------------------------------------------------------------------------------#
 29 | trainval_percent    = 0.9
 30 | train_percent       = 0.9
 31 | #-------------------------------------------------------#
 32 | #   指向VOC数据集所在的文件夹
 33 | #   默认指向根目录下的VOC数据集
 34 | #-------------------------------------------------------#
 35 | PCB_DataSet_path=r'PCB_DataSet'
 36 | 
 37 | PCB_Data_Sets=['trainval','test']
 38 | classes, _      = get_classes(classes_path)
 39 | 
 40 | #-------------------------------------------------------# 
 41 | #   统计目标数量
 42 | #-------------------------------------------------------#
 43 | photo_nums  = np.zeros(len(PCB_Data_Sets))
 44 | nums        = np.zeros(len(classes))
 45 | def convert_annotation(image_id, list_file):
 46 |     in_file = open(os.path.join(PCB_DataSet_path, 'Annotations/%s.xml'%(image_id)), encoding='utf-8')
 47 |     tree=ET.parse(in_file)
 48 |     root = tree.getroot()
 49 | 
 50 |     for obj in root.iter('object'):
 51 |         difficult = 0 
 52 |         if obj.find('difficult')!=None:
 53 |             difficult = obj.find('difficult').text
 54 |         cls = obj.find('name').text
 55 |         if cls not in classes or int(difficult)==1:
 56 |             continue
 57 |         cls_id = classes.index(cls)
 58 |         xmlbox = obj.find('bndbox')
 59 |         b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text)))
 60 |         list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
 61 |         
 62 |         nums[classes.index(cls)] = nums[classes.index(cls)] + 1
 63 |         
 64 | if __name__ == "__main__":
 65 |     random.seed(0)
 66 |     if " " in os.path.abspath(PCB_DataSet_path):
 67 |         raise ValueError("数据集存放的文件夹路径与图片名称中不可以存在空格，否则会影响正常的模型训练，请注意修改。")
 68 | 
 69 |     if annotation_mode == 0 or annotation_mode == 1:
 70 |         print("Generate txt in ImageSets.")
 71 |         xmlfilepath     = os.path.join(PCB_DataSet_path, 'Annotations')
 72 |         saveBasePath    = os.path.join(PCB_DataSet_path, 'ImageSets')
 73 |         temp_xml        = os.listdir(xmlfilepath)
 74 |         total_xml       = []
 75 |         for xml in temp_xml:
 76 |             if xml.endswith(".xml"):
 77 |                 total_xml.append(xml)
 78 | 
 79 |         num     = len(total_xml)  
 80 |         list    = range(num)  
 81 |         tv      = int(num*trainval_percent)  
 82 |         tr      = int(tv*train_percent)  
 83 |         trainval= random.sample(list,tv)  
 84 |         train   = random.sample(trainval,tr)  
 85 |         
 86 |         print("train and val size",tv)
 87 |         print("train size",tr)
 88 |         ftrainval   = open(os.path.join(saveBasePath,'trainval.txt'), 'w')  
 89 |         ftest       = open(os.path.join(saveBasePath,'test.txt'), 'w')  
 90 |         ftrain      = open(os.path.join(saveBasePath,'train.txt'), 'w')  
 91 |         fval        = open(os.path.join(saveBasePath,'val.txt'), 'w')  
 92 |         
 93 |         for i in list:  
 94 |             name=total_xml[i][:-4]+'\n'  
 95 |             if i in trainval:  
 96 |                 ftrainval.write(name)  
 97 |                 if i in train:  
 98 |                     ftrain.write(name)  
 99 |                 else:  
100 |                     fval.write(name)  
101 |             else:  
102 |                 ftest.write(name)  
103 |         
104 |         ftrainval.close()  
105 |         ftrain.close()  
106 |         fval.close()  
107 |         ftest.close()
108 |         print("Generate txt in ImageSets done.")
109 | 
110 |     if annotation_mode == 0 or annotation_mode == 2:
111 |         print("Generate PCB_train.txt and PCB_val.txt for train.")
112 |         type_index = 0
113 |         for image_set in PCB_Data_Sets:
114 |             image_ids = open(os.path.join(PCB_DataSet_path, 'ImageSets/%s.txt'%(image_set)), encoding='utf-8').read().strip().split()
115 |             list_file = open(os.path.join(PCB_DataSet_path,'%s.txt'%( image_set)), 'w', encoding='utf-8')#保存训练集和测试集
116 |             for image_id in image_ids:
117 |                 list_file.write('%s/JPEGImages/%s.jpg'%(os.path.abspath(PCB_DataSet_path),image_id))#在训练集和测试集中写入图片路径信息
118 | 
119 |                 convert_annotation(image_id, list_file)
120 |                 list_file.write('\n')
121 |             photo_nums[type_index] = len(image_ids)
122 |             type_index += 1
123 |             list_file.close()
124 |         print("Generate PCB_train.txt and PCB_val.txt for train done.")
125 |         
126 |         def printTable(List1, List2):
127 |             for i in range(len(List1[0])):
128 |                 print("|", end=' ')
129 |                 for j in range(len(List1)):
130 |                     print(List1[j][i].rjust(int(List2[j])), end=' ')
131 |                     print("|", end=' ')
132 |                 print()
133 | 
134 |         str_nums = [str(int(x)) for x in nums]
135 |         tableData = [
136 |             classes, str_nums
137 |         ]
138 |         colWidths = [0]*len(tableData)
139 |         len1 = 0
140 |         for i in range(len(tableData)):
141 |             for j in range(len(tableData[i])):
142 |                 if len(tableData[i][j]) > colWidths[i]:
143 |                     colWidths[i] = len(tableData[i][j])
144 |         printTable(tableData, colWidths)
145 | 
146 |         if photo_nums[0] <= 500:
147 |             print("训练集数量小于500，属于较小的数据量，请注意设置较大的训练世代（Epoch）以满足足够的梯度下降次数（Step）。")
148 | 
149 |         if np.sum(nums) == 0:
150 |             print("在数据集中并未获得任何目标，请注意修改classes_path对应自己的数据集，并且保证标签名字正确，否则训练将会没有任何效果！")
151 |             print("在数据集中并未获得任何目标，请注意修改classes_path对应自己的数据集，并且保证标签名字正确，否则训练将会没有任何效果！")
152 |             print("在数据集中并未获得任何目标，请注意修改classes_path对应自己的数据集，并且保证标签名字正确，否则训练将会没有任何效果！")
153 |             print("（重要的事情说三遍）。")
154 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 JiaQi Xu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Faster-Rcnn：PCB-component-defect-detection目标检测模型在Pytorch当中的实现
  2 | ---
  3 | 
  4 | ## 目录
  5 | 1. [仓库更新 Top News](#仓库更新)
  6 | 2. [性能情况 Performance](#性能情况)
  7 | 3. [所需环境 Environment](#所需环境)
  8 | 4. [文件下载 Download](#文件下载)
  9 | 5. [预测步骤 How2predict](#预测步骤)
 10 | 6. [训练步骤 How2train](#训练步骤)
 11 | 7. [评估步骤 How2eval](#评估步骤)
 12 | 8. [参考资料 Reference](#Reference)
 13 | 
 14 | ## Top News
 15 | **本项目支持step、cos学习率下降法、支持adam、sgd优化器选择、支持学习率根据batch_size自适应调整、新增图片裁剪。**   
 16 | **增加了大量注释、增加了大量可调整参数、对代码的组成模块进行修改、增加fps、视频预测、批量预测等功能。**   
 17 | 
 18 | ## 性能情况
 19 | | 训练数据集 | 权值文件名称 | 测试数据集 | 输入图片大小 | mAP 0.5:0.95 | mAP 0.5 |
 20 | | :-----: | :-----: | :------: | :------: | :------: | :-----: |
 21 | | VOC07+12 | [voc_weights_resnet.pth](https://github.com/bubbliiiing/faster-rcnn-pytorch/releases/download/v1.0/voc_weights_resnet.pth) | VOC-Test07 | - | - | 80.36
 22 | | VOC07+12 | [voc_weights_vgg.pth](https://github.com/bubbliiiing/faster-rcnn-pytorch/releases/download/v1.0/voc_weights_vgg.pth) | VOC-Test07 | - | - | 77.46
 23 | **本代码中也用训练权重，读者可以不用下载。
 24 | 
 25 | ## 所需环境
 26 | torch == 1.2.0
 27 | 
 28 | ## 文件下载
 29 | 训练所需的voc_weights_resnet.pth或者voc_weights_vgg.pth以及主干的网络权重可以在百度云下载。  
 30 | voc_weights_resnet.pth是resnet为主干特征提取网络用到的；  
 31 | voc_weights_vgg.pth是vgg为主干特征提取网络用到的；   
 32 | 链接: https://pan.baidu.com/s/1S6wG8sEXBeoSec95NZxmlQ      
 33 | 提取码: 8mgp    
 34 | 
 35 | VOC数据集下载地址如下，里面已经包括了训练集、测试集、验证集（与测试集一样），无需再次划分：  
 36 | 链接: https://pan.baidu.com/s/1YuBbBKxm2FGgTU5OfaeC5A    
 37 | 提取码: uack   
 38 | **笔者是使用生产工厂，现场采集的PCB图片，故不能上传，读者需要自备数据集或者根据提供的数据集链接下载
 39 | **在后续中，笔者以共用数据集VOC07+12数据集进行讲解，
 40 | 
 41 | ## 训练步骤
 42 | ### a、训练VOC07+12数据集
 43 | 1. 数据集的准备   
 44 | **本文使用VOC格式进行训练，训练前需要下载好VOC07+12的数据集，解压后放在根目录**  
 45 | 
 46 | 2. 数据集的处理   
 47 | 修改voc_annotation.py里面的annotation_mode=2，运行voc_annotation.py生成根目录下的2007_train.txt和2007_val.txt。   
 48 | 
 49 | 3. 开始网络训练   
 50 | train.py的默认参数用于训练VOC数据集，直接运行train.py即可开始训练。   
 51 | 
 52 | 4. 训练结果预测   
 53 | 训练结果预测需要用到两个文件，分别是frcnn_predict.py和predict.py。我们首先需要去frcnn_predict.py里面修改model_path以及classes_path，这两个参数必须要修改。   
 54 | **model_path指向训练好的权值文件，在logs文件夹里。   
 55 | classes_path指向检测类别所对应的txt。**   
 56 | 完成修改后就可以运行predict.py进行检测了。运行后输入图片路径即可检测。   
 57 | 
 58 | ### b、训练自己的数据集
 59 | 1. 数据集的准备  
 60 | **本文使用VOC格式进行训练，训练前需要自己制作好数据集，**    
 61 | 训练前将标签文件放在VOCdevkit文件夹下的VOC2007文件夹下的Annotation中。   
 62 | 训练前将图片文件放在VOCdevkit文件夹下的VOC2007文件夹下的JPEGImages中。   
 63 | 
 64 | 2. 数据集的处理  
 65 | 在完成数据集的摆放之后，我们需要利用voc_annotation.py获得训练用的2007_train.txt和2007_val.txt。   
 66 | 修改voc_annotation.py里面的参数。第一次训练可以仅修改classes_path，classes_path用于指向检测类别所对应的txt。   
 67 | 训练自己的数据集时，可以自己建立一个cls_classes.txt，里面写自己所需要区分的类别。   
 68 | model_data/cls_classes.txt文件内容为：      
 69 | ```python
 70 | cat
 71 | dog
 72 | ...
 73 | ```
 74 | 修改voc_annotation.py中的classes_path，使其对应cls_classes.txt，并运行voc_annotation.py。  
 75 | 
 76 | 3. 开始网络训练  
 77 | **训练的参数较多，均在train.py中，大家可以在下载库后仔细看注释，其中最重要的部分依然是train.py里的classes_path。**  
 78 | **classes_path用于指向检测类别所对应的txt，这个txt和voc_annotation.py里面的txt一样！训练自己的数据集必须要修改！**  
 79 | 修改完classes_path后就可以运行train.py开始训练了，在训练多个epoch后，权值会生成在logs文件夹中。  
 80 | 
 81 | 4. 训练结果预测  
 82 | 训练结果预测需要用到两个文件，分别是frcnn_predict.py和predict.py。在frcnn_predict.py里面修改model_path以及classes_path。  
 83 | **model_path指向训练好的权值文件，在logs文件夹里。  
 84 | classes_path指向检测类别所对应的txt。**  
 85 | 完成修改后就可以运行predict.py进行检测了。运行后输入图片路径即可检测。  
 86 | 
 87 | ## 预测步骤
 88 | ### a、使用预训练权重
 89 | 1. 下载完库后解压，在百度网盘下载frcnn_weights.pth，放入model_data，运行predict.py，输入  
 90 | ```python
 91 | img/street.jpg
 92 | ```
 93 | 2. 在predict.py里面进行设置可以进行fps测试和video视频检测。  
 94 | ### b、使用自己训练的权重
 95 | 1. 按照训练步骤训练。  
 96 | 2. 在frcnn_predict.py文件里面，在如下部分修改model_path和classes_path使其对应训练好的文件；**model_path对应logs文件夹下面的权值文件，classes_path是model_path对应分的类**。 
 97 | 
 98 | 
 99 | 3. 运行predict.py，输入  
100 | ```python
101 | img/street.jpg
102 | ```
103 | 4.在predict.py里面进行设置可以进行fps测试和video视频检测。  
104 | 
105 | ## 评估步骤 
106 | ### a、评估VOC07+12的测试集
107 | 1. 本文使用VOC格式进行评估。VOC07+12已经划分好了测试集，无需利用voc_annotation.py生成ImageSets文件夹下的txt。
108 | 2. 在frcnn.py里面修改model_path以及classes_path。**model_path指向训练好的权值文件，在logs文件夹里。classes_path指向检测类别所对应的txt。**  
109 | 3. 运行get_map.py即可获得评估结果，评估结果会保存在map_out文件夹中。
110 | 
111 | ### b、评估自己的数据集
112 | 1. 本文使用VOC格式进行评估。  
113 | 2. 如果在训练前已经运行过voc_annotation.py文件，代码会自动将数据集划分成训练集、验证集和测试集。如果想要修改测试集的比例，可以修改voc_annotation.py文件下的trainval_percent。trainval_percent用于指定(训练集+验证集)与测试集的比例，默认情况下 (训练集+验证集):测试集 = 9:1。train_percent用于指定(训练集+验证集)中训练集与验证集的比例，默认情况下 训练集:验证集 = 9:1。
114 | 3. 利用voc_annotation.py划分测试集后，前往get_map.py文件修改classes_path，classes_path用于指向检测类别所对应的txt，这个txt和训练时的txt一样。评估自己的数据集必须要修改。
115 | 4. 在frcnn_predict.py里面修改model_path以及classes_path。**model_path指向训练好的权值文件，在logs文件夹里。classes_path指向检测类别所对应的txt。**  
116 | 5. 运行get_map.py即可获得评估结果，评估结果会保存在map_out文件夹中。
117 | 


--------------------------------------------------------------------------------
/data_expansion.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | """数据增强
  3 |    1. 翻转变换 flip
  4 |    2. 图片裁剪 crop
  5 |    3. 色彩抖动 color jittering
  6 |    4. 平移变换 shift
  7 |    5. 尺度变换 scale
  8 |    6. 对比度变换 contrast
  9 |    7. 噪声扰动 noise
 10 |    8. 旋转变换/反射变换 Rotation/reflection
 11 |    9.直方图增强
 12 |    10.拉普拉斯算子
 13 |    11.对数变换
 14 |    12.伽马变换
 15 |    13.限制对比度自适应直方图均衡化CLAHE
 16 |    14.retinex SSR
 17 |    15.retinex MMR
 18 |    16.
 19 | 
 20 | """
 21 | 
 22 | import logging
 23 | import os
 24 | import random
 25 | import threading
 26 | import time
 27 | from dataclasses import dataclass
 28 | from distutils.log import error
 29 | 
 30 | import cv2
 31 | import numpy as np
 32 | from PIL import Image, ImageEnhance, ImageFile
 33 | 
 34 | 
 35 | # 图片裁剪
 36 | def read_path(file_pathname):
 37 | 
 38 |     for filename in os.listdir(file_pathname):
 39 |         # print(filename)
 40 |         img_filename = os.path.join(file_pathname, filename)  #将图片路径与图片名进行拼接
 41 |         
 42 |         img = cv2.imread(img_filename)       #img_path为图片所在路径
 43 |         crop_img = img[0:3585,0:3629]    #x0,y0为裁剪区域左上坐标；x1,y1为裁剪区域右下坐标（y0：y1，x0:x1）
 44 | 
 45 |         #####save figure
 46 |         # cv2.imwrite(r'date_set\data_source1'+"/"+filename,crop_img)
 47 |         cv2.imwrite(r'jixing\polarity'+"/"+filename,crop_img)
 48 | 
 49 | 
 50 | logger = logging.getLogger(__name__)
 51 | ImageFile.LOAD_TRUNCATED_IMAGES = True
 52 | 
 53 | 
 54 | class DataAugmentation:
 55 |     """
 56 |     包含数据增强的八种方式
 57 |     """
 58 | 
 59 |     def __init__(self):
 60 |         pass
 61 |  
 62 |     @staticmethod
 63 |     def openImage(image):
 64 |         img=cv2.imread(image)
 65 |         return img
 66 | 
 67 |     @staticmethod
 68 |     def randomRotation(image, center=None, scale=1.0):    #mode=Image.BICUBIC
 69 |         """
 70 |          对图像进行随机任意角度(0~360度)旋转
 71 |         :return: 旋转转之后的图像
 72 |         """
 73 |         random_angle = np.random.randint(-180, 180)
 74 |         (h, w) = image.shape[:2]
 75 |         # If no rotation center is specified, the center of the image is set as the rotation center
 76 |         if center is None:
 77 |             center = (w / 2, h / 2)
 78 |         m = cv2.getRotationMatrix2D(center, random_angle, scale)  #center：旋转中心坐标.angle：旋转角度，负号为逆时针，正号为顺时针.scale：缩放比例，1为等比例缩放
 79 |         rotated = cv2.warpAffine(image, m, (w, h))
 80 |         return rotated
 81 |     
 82 |     @staticmethod
 83 |     def transpose(image):
 84 |         """
 85 |         水平垂直翻转
 86 |         :return: 旋转转之后的图像
 87 |         """
 88 |         random_angle = np.random.randint(-2, 2)  #取[-1,1]的随机整数
 89 |         img_filp=cv2.flip(image,random_angle)
 90 |         return img_filp
 91 |     
 92 |     '''噪声抖动'''
 93 | 
 94 |     @staticmethod
 95 |     def randomColor(image):
 96 |         """
 97 |         对图像进行颜色抖动
 98 |         :param image: PIL的图像image
 99 |         :return: 有颜色色差的图像image
100 |         """
101 |         saturation=random.randint(0,1)
102 |         brightness=random.randint(0,1)
103 |         contrast=random.randint(0,1)
104 |         sharpness=random.randint(0,1)
105 |         image=Image.fromarray(cv2.cvtColor(image,cv2.COLOR_BGR2RGB))  #转化为PIL.Image对象,才能使用ImageEnhance.Brightness(image)
106 |         if random.random() < saturation:
107 |             random_factor = np.random.randint(0, 31) / 10.  # 随机因子
108 |             image = ImageEnhance.Color(image).enhance(random_factor)  # 调整图像的饱和度
109 |         if random.random() < brightness:
110 |             random_factor = np.random.randint(10, 21) / 10.  # 随机因子
111 |             image = ImageEnhance.Brightness(image).enhance(random_factor)  # 调整图像的亮度
112 |         if random.random() < contrast:
113 |             random_factor = np.random.randint(10, 21) / 10.  # 随机因1子
114 |             image = ImageEnhance.Contrast(image).enhance(random_factor)  # 调整图像对比度
115 |         if random.random() < sharpness:
116 |             random_factor = np.random.randint(0, 31) / 10.  # 随机因子
117 |             image= ImageEnhance.Sharpness(image).enhance(random_factor)  # 调整图像锐度
118 |         image=cv2.cvtColor(np.asarray(image),cv2.COLOR_RGB2BGR)    #转换为cv格式
119 |         return image
120 |     
121 |     @staticmethod
122 |     def randomGaussian(image, mean=0.2, sigma=0.04):  
123 |         """
124 |          对图像进行高斯噪声处理
125 |         mean:设置高斯分布的均值和方差
126 |         sigma:设置高斯分布的标准差,sigma值越大，噪声越多
127 |         
128 |         返回:
129 |         gaussian_out : 噪声处理后的图片
130 |         """
131 |          # 将图片灰度标准化
132 |         img = image / 255
133 |         # 产生高斯 noise
134 |         noise = np.random.normal(mean, sigma, img.shape)
135 |         # 将噪声和图片叠加
136 |         gaussian_out = img + noise
137 |         # 将超过 1 的置 1，低于 0 的置 0
138 |         gaussian_out = np.clip(gaussian_out, 0, 1)
139 |         # 将图片灰度范围的恢复为 0-255
140 |         gaussian_out = np.uint8(gaussian_out*255)
141 |         # 将噪声范围搞为 0-255
142 |         # noise = np.uint8(noise*255)
143 |         return gaussian_out
144 | 
145 |     @staticmethod
146 |     def Pepper_noise(image):
147 |         '''
148 |         椒盐噪声
149 |         '''
150 |         #设置添加椒盐噪声的数目比例
151 |         s_vs_p = 0.04
152 |         #设置添加噪声图像像素的数目
153 |         amount =0.03
154 |         noisy_img = np.copy(image)
155 |         #添加salt噪声
156 |         num_salt = np.ceil(amount * image.size * s_vs_p)
157 |         #设置添加噪声的坐标位置
158 |         coords = [np.random.randint(0,i - 1, int(num_salt)) for i in image.shape]
159 |         noisy_img[tuple(coords)] = 255
160 |         #添加pepper噪声
161 |         num_pepper = np.ceil(amount * image.size * (1. - s_vs_p))
162 |         #设置添加噪声的坐标位置
163 |         coords = [np.random.randint(0,i - 1, int(num_pepper)) for i in image.shape]
164 |         noisy_img[tuple (coords)] = 0
165 |         return noisy_img
166 |  
167 |     @staticmethod
168 |     def Poisson_noise(image):
169 |         '''泊松噪声'''
170 | 
171 |         #计算图像像素的分布范围
172 |         vals = len(np.unique(image))
173 |         vals = 2 ** np.ceil(np.log2(vals))
174 |         #给图片添加泊松噪声
175 |         noisy_img = np.random.poisson(image * vals) / float(vals)
176 |         return noisy_img
177 | 
178 |     '''图像增强算法'''
179 | 
180 |     @staticmethod
181 |     def hist(image):
182 |         '''直方图均衡增强'''
183 |         r, g, b = cv2.split(image)
184 |         r1 = cv2.equalizeHist(r)
185 |         g1 = cv2.equalizeHist(g)
186 |         b1 = cv2.equalizeHist(b)
187 |         image_equal_clo = cv2.merge([r1, g1, b1])
188 |         return image_equal_clo
189 | 
190 |     @staticmethod
191 |     def laplacian(image):
192 |         '''拉普拉斯算子'''
193 |         kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
194 |         image_lap = cv2.filter2D(image, cv2.CV_8UC3, kernel)
195 |         return image_lap
196 | 
197 |     @staticmethod
198 |     def log(image):
199 |         '''对数变换'''
200 |         image_log = np.uint8(np.log(np.array(image) + 1))
201 |         cv2.normalize(image_log, image_log, 0, 255, cv2.NORM_MINMAX)
202 |         # 转换成8bit图像显示
203 |         cv2.convertScaleAbs(image_log, image_log)
204 |         return image_log
205 | 
206 |     @staticmethod
207 |     def gamma(image):
208 |         '''伽马变换'''
209 |         fgamma = 0.5    #数值越大，生成的图片越黑
210 |         image_gamma = np.uint8(np.power((np.array(image) / 255.0), fgamma) * 255.0)
211 |         cv2.normalize(image_gamma, image_gamma, 0, 255, cv2.NORM_MINMAX)
212 |         cv2.convertScaleAbs(image_gamma, image_gamma)
213 |         return image_gamma
214 | 
215 |     @staticmethod
216 |     def clahe(image):
217 |         '''# 限制对比度自适应直方图均衡化CLAHE'''
218 |         b, g, r = cv2.split(image)
219 |         clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
220 |         b = clahe.apply(b)
221 |         g = clahe.apply(g)
222 |         r = clahe.apply(r)
223 |         image_clahe = cv2.merge([b, g, r])
224 |         return image_clahe
225 |     
226 |     def __replaceZeroes(data):
227 |         min_nonzero = min(data[np.nonzero(data)])
228 |         data[data == 0] = min_nonzero
229 |         return data
230 | 
231 |     def __SSR(src_img, size):
232 | 
233 |         L_blur = cv2.GaussianBlur(src_img, (size, size), 0)
234 |         img =DataAugmentation.__replaceZeroes(src_img)
235 | 
236 |         L_blur =DataAugmentation. __replaceZeroes(L_blur)
237 | 
238 |         dst_Img = cv2.log(img/255.0)
239 |         dst_Lblur = cv2.log(L_blur/255.0)
240 |         dst_IxL = cv2.multiply(dst_Img, dst_Lblur)
241 |         log_R = cv2.subtract(dst_Img, dst_IxL)
242 | 
243 |         dst_R = cv2.normalize(log_R,None, 0, 255, cv2.NORM_MINMAX)
244 |         log_uint8 = cv2.convertScaleAbs(dst_R)
245 |         return log_uint8
246 | 
247 |     @staticmethod
248 |     def SSR_image(image):
249 |         '''SSR_image'''
250 |         size = 3
251 |         b_gray, g_gray, r_gray = cv2.split(image)
252 |         b_gray =DataAugmentation.__SSR(b_gray, size)
253 |         g_gray =DataAugmentation.__SSR(g_gray, size)
254 |         r_gray =DataAugmentation.__SSR(r_gray, size)
255 |         result = cv2.merge([b_gray, g_gray, r_gray])
256 |         return result
257 | 
258 |     # retinex MSR
259 |     def __MSR(img, scales):
260 |         weight = 2 / 3.0
261 |         scales_size = len(scales)
262 |         h, w = img.shape[:2]
263 |         log_R = np.zeros((h, w), dtype=np.float32)
264 | 
265 |         for i in range(scales_size):
266 |             img =DataAugmentation. __replaceZeroes(img)
267 |             L_blur = cv2.GaussianBlur(img, (scales[i], scales[i]), 0)
268 |             L_blur =DataAugmentation. __replaceZeroes(L_blur)
269 |             dst_Img = cv2.log(img/255.0)
270 |             dst_Lblur = cv2.log(L_blur/255.0)
271 |             dst_Ixl = cv2.multiply(dst_Img, dst_Lblur)
272 |             log_R += weight * cv2.subtract(dst_Img, dst_Ixl)
273 | 
274 |         dst_R = cv2.normalize(log_R,None, 0, 255, cv2.NORM_MINMAX)
275 |         log_uint8 = cv2.convertScaleAbs(dst_R)
276 |         return log_uint8
277 | 
278 |     @staticmethod   
279 |     def MSR_image(image):
280 |         '''MSR_image'''
281 |         scales = [15, 101, 301]  # [3,5,9]
282 |         b_gray, g_gray, r_gray = cv2.split(image)
283 |         b_gray =DataAugmentation.__MSR(b_gray, scales)
284 |         g_gray =DataAugmentation. __MSR(g_gray, scales)
285 |         r_gray =DataAugmentation. __MSR(r_gray, scales)
286 |         result = cv2.merge([b_gray, g_gray, r_gray])
287 |         return result
288 | 
289 | 
290 | def imageOps(func_name, image1,  img_des_path, img_file_name, times=1):   #times=1每种方式，每张图片运行一次
291 |     funcMap = {#"randomRotation": DataAugmentation.randomRotation, 
292 |                 "randomcolor": DataAugmentation.randomColor,"transpose": DataAugmentation.transpose, 
293 |                "randomGaussian": DataAugmentation.randomGaussian, "pepper_noise": DataAugmentation.Pepper_noise,
294 |                "Poisson_noise": DataAugmentation.Poisson_noise, "hist": DataAugmentation.hist, 
295 |                "laplacian": DataAugmentation.laplacian,"log": DataAugmentation.log,
296 |                 "gamma": DataAugmentation.gamma, "clahe": DataAugmentation.clahe, 
297 |                 "SSR_image": DataAugmentation.SSR_image, "MSR_image": DataAugmentation.MSR_image
298 |                }
299 |     if funcMap.get(func_name) is None:
300 |         logger.error("%s is not exist", func_name)
301 |         return -1
302 |  
303 |     for _i in range(0, times, 1):
304 |         new_image = funcMap[func_name](image1)   #经过变化后的图片
305 |         # print('new_image：',new_image)
306 |         # path=os.path.join(img_des_path, func_name + str(_i) + img_file_name)  #存图的新名字
307 |         path=os.path.join(img_des_path, img_file_name)
308 |         # print('new_filename：',path)
309 |         cv2.imwrite (path,new_image) 
310 | 
311 | 
312 | # opsList = {"transpose",'randomcolor',"gamma","MSR_image","pepper_noise","hist","log","clahe",'randomGaussian',
313 | #             'Poisson_noise','laplacian','SSR_image'}
314 | opsList = {"clahe"}   #clahe图像增强效果较好
315 |  
316 | def threadOPS(img_path, new_img_path):
317 |     """
318 |     多线程处理事务
319 |     :param src_path: 源文件
320 |     :param des_path: 存放文件
321 |     :return:
322 |     """
323 |     #img path 
324 |     if os.path.isdir(img_path):
325 |         img_names = os.listdir(img_path)
326 |         # print('img_names值为：',img_names)
327 |     else:
328 |         img_names = [img_path]
329 |         # print('img_names1值为：',img_names)
330 |  
331 |     img_num = 0
332 |  
333 |     #img num
334 |     for img_name in img_names:
335 |         tmp_img_name = os.path.join(img_path, img_name)
336 |         if os.path.isdir(tmp_img_name):
337 |             print('contain file folder')
338 |             exit()
339 |         else:
340 |             img_num = img_num + 1
341 |             num = img_num
342 |             # print("num数值为：",num )
343 |  
344 |  
345 |     for i in range(num):
346 |         img_name = img_names[i]
347 |         # print("img_name:",img_name)
348 |         tmp_img_name = os.path.join(img_path, img_name)
349 |         # 读取文件并进行操作
350 |         image1 = DataAugmentation.openImage(tmp_img_name)
351 |         # print("读取文件image：",image1)
352 |         
353 |         # threadImage =[0] * 12   #定义一个元组，其长度为12.
354 |         threadImage ={}           #定义为空字典类型。用来装线程结果信息
355 |         _index = 0
356 |         for ops_name in opsList:
357 |             # print("ops_name:",ops_name)
358 |             #创建一个新线程
359 |             threadImage[_index] = threading.Thread(target=imageOps,
360 |                                                     args=(ops_name, image1, new_img_path,img_name))
361 |             print('threadImage[{}]:{}'.format(_index,threadImage))
362 |             threadImage[_index].start()   #启动线程
363 |             _index += 1      #显示每个线程的起停位置
364 |             time.sleep(0.2)  #线程执行的时间
365 |  
366 |  
367 | if __name__ == '__main__':
368 |     threadOPS(#r"F:\Desktop\PCB_code\date_set\1shujuchuli",
369 |               #r"F:\Desktop\PCB_code\date_set\2shujucunfang"
370 |               r'F:\Desktop\PCB_code\data_set1\data_shiyan',
371 |               r'F:\Desktop\PCB_code\data_set1\data_shiyan_kuochong')
372 | 
373 |     # read_path(r'F:\Desktop\PCB_code\data_set1\data_shiyan')   #图片裁剪
374 | 
375 | 
376 |     '''
377 |     路径问题：
378 |     关于上述路径中，\table\name\rain中的\t,\n,\r都易被识别为转义字符。
379 |     解决的办法主要由以下三种：
380 |     #1
381 |     path=r"C:\data\table\name\rain"
382 |     #前面加r表示不转义
383 | 
384 |     #2
385 |     path="C:\\data\\table\\name\\rain"
386 |     #用\\代替\
387 | 
388 |     #3
389 |     path="C:/data/table/name/rain"
390 |     #用\代替/
391 | 
392 |     '''
393 | 
394 | 


--------------------------------------------------------------------------------
/get_map.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import xml.etree.ElementTree as ET
  3 | 
  4 | from PIL import Image
  5 | from tqdm import tqdm
  6 | 
  7 | from utils.utils import get_classes
  8 | from utils.utils_map import get_coco_map, get_map
  9 | from frcnn_predict import FRCNN
 10 | 
 11 | if __name__ == "__main__":
 12 |     '''
 13 |     Recall和Precision不像AP是一个面积的概念，因此在门限值（Confidence）不同时，网络的Recall和Precision值是不同的。
 14 |     默认情况下，本代码计算的Recall和Precision代表的是当门限值（Confidence）为0.5时，所对应的Recall和Precision值。
 15 | 
 16 |     受到mAP计算原理的限制，网络在计算mAP时需要获得近乎所有的预测框，这样才可以计算不同门限条件下的Recall和Precision值
 17 |     因此，本代码获得的map_out/detection-results/里面的txt的框的数量一般会比直接predict多一些，目的是列出所有可能的预测框，
 18 |     '''
 19 |     #------------------------------------------------------------------------------------------------------------------#
 20 |     #   map_mode用于指定该文件运行时计算的内容
 21 |     #   map_mode为0代表整个map计算流程，包括获得预测结果、获得真实框、计算VOC_map。
 22 |     #   map_mode为1代表仅仅获得预测结果。
 23 |     #   map_mode为2代表仅仅获得真实框。
 24 |     #   map_mode为3代表仅仅计算VOC_map。
 25 |     #   map_mode为4代表利用COCO工具箱计算当前数据集的0.50:0.95map。需要获得预测结果、获得真实框后并安装pycocotools才行
 26 |     #-------------------------------------------------------------------------------------------------------------------#
 27 |     map_mode        = 0
 28 |     #--------------------------------------------------------------------------------------#
 29 |     #   此处的classes_path用于指定需要测量VOC_map的类别
 30 |     #   一般情况下与训练和预测所用的classes_path一致即可
 31 |     #--------------------------------------------------------------------------------------#
 32 |     classes_path    = 'PCB_DataSet/cls_classes.txt'
 33 |     #--------------------------------------------------------------------------------------#
 34 |     #   MINOVERLAP用于指定想要获得的mAP0.x，mAP0.x的意义是什么请同学们百度一下。
 35 |     #   比如计算mAP0.75，可以设定MINOVERLAP = 0.75。
 36 |     #
 37 |     #   当某一预测框与真实框重合度大于MINOVERLAP时，该预测框被认为是正样本，否则为负样本。
 38 |     #   因此MINOVERLAP的值越大，预测框要预测的越准确才能被认为是正样本，此时算出来的mAP值越低，
 39 |     #--------------------------------------------------------------------------------------#
 40 |     MINOVERLAP      = 0.5
 41 |     #--------------------------------------------------------------------------------------#
 42 |     #   受到mAP计算原理的限制，网络在计算mAP时需要获得近乎所有的预测框，这样才可以计算mAP
 43 |     #   因此，confidence的值应当设置的尽量小进而获得全部可能的预测框。
 44 |     #   
 45 |     #   该值一般不调整。因为计算mAP需要获得近乎所有的预测框，此处的confidence不能随便更改。
 46 |     #   想要获得不同门限值下的Recall和Precision值，请修改下方的score_threhold。
 47 |     #--------------------------------------------------------------------------------------#
 48 |     confidence      = 0.02
 49 |     #--------------------------------------------------------------------------------------#
 50 |     #   预测时使用到的非极大抑制值的大小，越大表示非极大抑制越不严格。
 51 |     #   
 52 |     #   该值一般不调整。
 53 |     #--------------------------------------------------------------------------------------#
 54 |     nms_iou         = 0.5
 55 |     #---------------------------------------------------------------------------------------------------------------#
 56 |     #   Recall和Precision不像AP是一个面积的概念，因此在门限值不同时，网络的Recall和Precision值是不同的。
 57 |     #   
 58 |     #   默认情况下，本代码计算的Recall和Precision代表的是当门限值为0.5（此处定义为score_threhold）时所对应的Recall和Precision值。
 59 |     #   因为计算mAP需要获得近乎所有的预测框，上面定义的confidence不能随便更改。
 60 |     #   这里专门定义一个score_threhold用于代表门限值，进而在计算mAP时找到门限值对应的Recall和Precision值。
 61 |     #---------------------------------------------------------------------------------------------------------------#
 62 |     score_threhold  = 0.5
 63 |     #-------------------------------------------------------#
 64 |     #   map_vis用于指定是否开启VOC_map计算的可视化
 65 |     #-------------------------------------------------------#
 66 |     map_vis         = False
 67 |     #-------------------------------------------------------#
 68 |     #   指向VOC数据集所在的文件夹
 69 |     #   默认指向根目录下的VOC数据集
 70 |     #-------------------------------------------------------#
 71 |     VOCdevkit_path  = r'F:\Desktop\PCB_code\PCB_DataSet'
 72 |     #-------------------------------------------------------#
 73 |     #   结果输出的文件夹，默认为map_out
 74 |     #-------------------------------------------------------#
 75 |     map_out_path    = 'faster-rcnn-pytorch-master/map_out'
 76 | 
 77 |     image_ids = open(os.path.join(VOCdevkit_path, "ImageSets/test.txt")).read().strip().split()
 78 | 
 79 |     if not os.path.exists(map_out_path):
 80 |         os.makedirs(map_out_path)
 81 |     if not os.path.exists(os.path.join(map_out_path, 'ground-truth')):
 82 |         os.makedirs(os.path.join(map_out_path, 'ground-truth'))
 83 |     if not os.path.exists(os.path.join(map_out_path, 'detection-results')):
 84 |         os.makedirs(os.path.join(map_out_path, 'detection-results'))
 85 |     if not os.path.exists(os.path.join(map_out_path, 'images-optional')):
 86 |         os.makedirs(os.path.join(map_out_path, 'images-optional'))
 87 | 
 88 |     class_names, _ = get_classes(classes_path)
 89 | 
 90 |     if map_mode == 0 or map_mode == 1:
 91 |         print("Load model.")
 92 |         frcnn = FRCNN(confidence = confidence, nms_iou = nms_iou)
 93 |         print("Load model done.")
 94 | 
 95 |         print("Get predict result.")
 96 |         for image_id in tqdm(image_ids):
 97 |             image_path  = os.path.join(VOCdevkit_path, "JPEGImages/"+image_id+".jpg")
 98 |             image       = Image.open(image_path)
 99 |             if map_vis:
100 |                 image.save(os.path.join(map_out_path, "images-optional/" + image_id + ".jpg"))
101 |             frcnn.get_map_txt(image_id, image, class_names, map_out_path)
102 |         print("Get predict result done.")
103 |         
104 |     if map_mode == 0 or map_mode == 2:
105 |         print("Get ground truth result.")
106 |         for image_id in tqdm(image_ids):
107 |             with open(os.path.join(map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f:
108 |                 root = ET.parse(os.path.join(VOCdevkit_path, "Annotations/"+image_id+".xml")).getroot()
109 |                 for obj in root.findall('object'):
110 |                     difficult_flag = False
111 |                     if obj.find('difficult')!=None:
112 |                         difficult = obj.find('difficult').text
113 |                         if int(difficult)==1:
114 |                             difficult_flag = True
115 |                     obj_name = obj.find('name').text
116 |                     if obj_name not in class_names:
117 |                         continue
118 |                     bndbox  = obj.find('bndbox')
119 |                     left    = bndbox.find('xmin').text
120 |                     top     = bndbox.find('ymin').text
121 |                     right   = bndbox.find('xmax').text
122 |                     bottom  = bndbox.find('ymax').text
123 | 
124 |                     if difficult_flag:
125 |                         new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom))
126 |                     else:
127 |                         new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))
128 |         print("Get ground truth result done.")
129 | 
130 |     if map_mode == 0 or map_mode == 3:
131 |         print("Get map.")
132 |         get_map(MINOVERLAP, True, score_threhold = score_threhold, path = map_out_path)
133 |         print("Get map done.")
134 | 
135 |     if map_mode == 4:
136 |         print("Get map.")
137 |         get_coco_map(class_names = class_names, path = map_out_path)
138 |         print("Get map done.")
139 | 


--------------------------------------------------------------------------------
/nets/FasterRCNN_train.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | 
  4 | from nets.classifier import Resnet50RoIHead, Resnet101RoIHead, VGG16RoIHead,Resnet50_FPNRoIHead
  5 | from nets.vgg16 import decom_vgg16
  6 | from nets.resnet50 import resnet50
  7 | from nets.resnet101 import resnet101
  8 | from nets.resnet50_FPN import resnet50_FPN
  9 | from nets.rpn import RegionProposalNetwork, resnet50_fpn_RPNhead
 10 | 
 11 | 
 12 | 
 13 | class FasterRCNN(nn.Module):
 14 |     def __init__(self,  num_classes,  
 15 |                     mode = "training",
 16 |                     feat_stride = 16,
 17 |                     anchor_scales = [4, 16, 32],
 18 |                     ratios = [0.5, 1, 2],
 19 |                     backbone = 'vgg',
 20 |                     pretrained = False):
 21 |         super(FasterRCNN, self).__init__()  #对继承自父类的属性进行初始化，且用父类的初始化方法来初始化继承的属性
 22 |         self.feat_stride = feat_stride
 23 |         #---------------------------------#
 24 |         #   vgg和resnet50,resnet101,resnet50_FPN主干网络
 25 |         #---------------------------------#
 26 |         if backbone == 'vgg':
 27 |             self.extractor, classifier = decom_vgg16(pretrained)
 28 |             #---------------------------------#
 29 |             #   构建建议框网络
 30 |             #---------------------------------#
 31 |             self.rpn = RegionProposalNetwork(
 32 |                 512, 512,
 33 |                 ratios          = ratios,
 34 |                 anchor_scales   = anchor_scales,
 35 |                 feat_stride     = self.feat_stride,
 36 |                 mode            = mode
 37 |             )
 38 |             #---------------------------------#
 39 |             #   构建分类器网络
 40 |             #---------------------------------#
 41 |             self.head = VGG16RoIHead(
 42 |                 n_class         = num_classes + 1,
 43 |                 roi_size        = 7,
 44 |                 spatial_scale   = 1,
 45 |                 classifier      = classifier
 46 |             )
 47 | 
 48 |         elif backbone == 'resnet50':
 49 |             #   获得图像的特征层和分类层特征信息
 50 |             self.extractor, classifier = resnet50(pretrained)
 51 |             #---------------------------------#
 52 |             #   构建建议框Proposal卷积网络
 53 |             #---------------------------------#
 54 |             self.rpn = RegionProposalNetwork(
 55 |                 1024, 512,
 56 |                 ratios          = ratios,
 57 |                 anchor_scales   = anchor_scales,
 58 |                 feat_stride     = self.feat_stride,
 59 |                 mode            = mode
 60 |             )
 61 |             #---------------------------------#
 62 |             #   构建classifier网络
 63 |             #---------------------------------#
 64 |             self.head = Resnet50RoIHead(
 65 |                 n_class         = num_classes + 1,
 66 |                 roi_size        = 14,
 67 |                 spatial_scale   = 1,
 68 |                 classifier      = classifier
 69 |             )
 70 | 
 71 |         elif backbone=='resnet101':
 72 |             self.extractor, classifier = resnet101(pretrained)
 73 |             #---------------------------------#
 74 |             #   构建建议框Proposal卷积网络
 75 |             #---------------------------------#
 76 |             self.rpn = RegionProposalNetwork(
 77 |                 1024, 512,
 78 |                 ratios          = ratios,
 79 |                 anchor_scales   = anchor_scales,
 80 |                 feat_stride     = self.feat_stride,
 81 |                 mode            = mode
 82 |             )
 83 |             #---------------------------------#
 84 |             #   构建classifier网络
 85 |             #---------------------------------#
 86 |             self.head = Resnet101RoIHead(
 87 |                 n_class         = num_classes + 1,
 88 |                 roi_size        = 14,
 89 |                 spatial_scale   = 1,
 90 |                 classifier      = classifier)
 91 |             
 92 |         elif backbone=='resnet50_FPN':
 93 |             self.extractor, classifier = resnet50_FPN(pretrained)
 94 |             #---------------------------------#
 95 |             #   构建建议框Proposal卷积网络
 96 |             #---------------------------------#
 97 |             ratios = ratios*len(anchor_scales)
 98 |             self.rpn = resnet50_fpn_RPNhead(
 99 |                 256, 256,
100 |                 ratios          = ratios,
101 |                 anchor_scales   = anchor_scales,
102 |                 feat_stride     = self.feat_stride,
103 |                 mode            = mode
104 |             )
105 |             #---------------------------------#
106 |             #   构建classifier网络
107 |             #---------------------------------#
108 |             self.head = Resnet50_FPNRoIHead(
109 |                 n_class         = num_classes + 1,
110 |                 roi_size        = 14,
111 |                 spatial_scale   = 1,
112 |                 classifier      = classifier)
113 |     
114 |     #x= [base_feature, img_size],在Suggestion_box.FasterRCNNTrainer.forward()产生
115 |     def forward(self, x, scale=1., mode="forward"):
116 |         if mode == "forward":
117 |             #---------------------------------#
118 |             #   计算输入图片的大小
119 |             #---------------------------------#
120 |             img_size        = x.shape[2:]
121 |             #---------------------------------#
122 |             #   利用主干网络提取特征
123 |             #---------------------------------#
124 |             base_feature    = self.extractor.forward(x)
125 | 
126 |             #---------------------------------#
127 |             #   获得建议框
128 |             #---------------------------------#
129 |             _, _, rois, roi_indices, _  = self.rpn.forward(base_feature, img_size, scale)
130 |             #---------------------------------------#
131 |             #   获得classifier的分类结果和回归结果
132 |             #---------------------------------------#
133 |             roi_cls_locs, roi_scores    = self.head.forward(base_feature, rois, roi_indices, img_size)
134 |             return roi_cls_locs, roi_scores, rois, roi_indices
135 |         elif mode == "extractor":
136 |             #---------------------------------#
137 |             #   利用主干网络提取特征,resnet50网络特征提取
138 |             #---------------------------------#
139 |             base_feature    = self.extractor.forward(x)
140 |             return base_feature
141 |         elif mode == "rpn":
142 |             base_feature, img_size = x
143 |             #---------------------------------#
144 |             #   获得建议框
145 |             #---------------------------------#
146 |             rpn_locs, rpn_scores, rois, roi_indices, anchor = self.rpn.forward(base_feature, img_size, scale)
147 |             return rpn_locs, rpn_scores, rois, roi_indices, anchor
148 |         elif mode == "head":
149 |             base_feature, rois, roi_indices, img_size = x
150 |             #---------------------------------------#
151 |             #   获得classifier的分类结果和回归结果
152 |             #---------------------------------------#
153 |             roi_cls_locs, roi_scores = self.head.forward(base_feature, rois, roi_indices, img_size)
154 |             return roi_cls_locs, roi_scores
155 |         elif mode == "fpn_head":
156 |             base_feature, rois, roi_indices, img_size = x
157 |             #---------------------------------------#
158 |             #   获得classifier的分类结果和回归结果
159 |             #   取p2~p5层进行分类结果预测
160 |             #---------------------------------------#
161 |             roi_cls_locs, roi_scores = self.head.forward(base_feature[:4], rois[:4], roi_indices[:4], img_size)
162 |             return roi_cls_locs, roi_scores
163 | 
164 |     def freeze_bn(self):
165 |         for m in self.modules():
166 |             if isinstance(m, nn.BatchNorm2d):
167 |                 m.eval()
168 | 


--------------------------------------------------------------------------------
/nets/__init__.py:
--------------------------------------------------------------------------------
1 | #


--------------------------------------------------------------------------------
/nets/__pycache__/FasterRCNN_train.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/FasterRCNN_train.cpython-37.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/FasterRCNN_train.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/FasterRCNN_train.cpython-39.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/Suggestion_box.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/Suggestion_box.cpython-37.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/Suggestion_box.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/Suggestion_box.cpython-39.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/classifier.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/classifier.cpython-37.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/classifier.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/classifier.cpython-39.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/faster_rcnn_feature_extraction.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/faster_rcnn_feature_extraction.cpython-39.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/feature_extraction.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/feature_extraction.cpython-39.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/feature_pyramid_network.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/feature_pyramid_network.cpython-39.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/frcnn.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/frcnn.cpython-39.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/frcnn_training.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/frcnn_training.cpython-39.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/resnet101.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/resnet101.cpython-37.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/resnet101.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/resnet101.cpython-39.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/resnet50.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/resnet50.cpython-37.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/resnet50.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/resnet50.cpython-39.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/resnet50_FPN.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/resnet50_FPN.cpython-37.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/resnet50_FPN.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/resnet50_FPN.cpython-39.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/rpn.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/rpn.cpython-37.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/rpn.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/rpn.cpython-39.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/vgg16.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/vgg16.cpython-37.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/vgg16.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/vgg16.cpython-39.pyc


--------------------------------------------------------------------------------
/nets/classifier.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | 
  3 | import torch
  4 | from torch import nn
  5 | from torchvision.ops import RoIPool,MultiScaleRoIAlign,RoIAlign
  6 | from collections import OrderedDict
  7 | 
  8 | warnings.filterwarnings("ignore")
  9 | 
 10 | class VGG16RoIHead(nn.Module):
 11 |     def __init__(self, n_class, roi_size, spatial_scale, classifier):
 12 |         super(VGG16RoIHead, self).__init__()
 13 |         self.classifier = classifier
 14 |         #--------------------------------------#
 15 |         #   对ROIPooling后的的结果进行回归预测
 16 |         #--------------------------------------#
 17 |         self.cls_loc    = nn.Linear(4096, n_class * 4)
 18 |         #-----------------------------------#
 19 |         #   对ROIPooling后的的结果进行分类
 20 |         #-----------------------------------#
 21 |         self.score      = nn.Linear(4096, n_class)
 22 |         #-----------------------------------#
 23 |         #   权值初始化
 24 |         #-----------------------------------#
 25 |         normal_init(self.cls_loc, 0, 0.001)
 26 |         normal_init(self.score, 0, 0.01)
 27 | 
 28 |         self.roi = RoIPool((roi_size, roi_size), spatial_scale)
 29 |         
 30 |     def forward(self, x, rois, roi_indices, img_size):
 31 |         n, _, _, _ = x.shape
 32 |         if x.is_cuda:
 33 |             roi_indices = roi_indices.cuda()
 34 |             rois = rois.cuda()
 35 |         rois        = torch.flatten(rois, 0, 1)
 36 |         roi_indices = torch.flatten(roi_indices, 0, 1)
 37 | 
 38 |         rois_feature_map = torch.zeros_like(rois)
 39 |         rois_feature_map[:, [0,2]] = rois[:, [0,2]] / img_size[1] * x.size()[3]
 40 |         rois_feature_map[:, [1,3]] = rois[:, [1,3]] / img_size[0] * x.size()[2]
 41 | 
 42 |         indices_and_rois = torch.cat([roi_indices[:, None], rois_feature_map], dim=1)
 43 |         #-----------------------------------#
 44 |         #   利用建议框对公用特征层进行截取
 45 |         #-----------------------------------#
 46 |         pool = self.roi(x, indices_and_rois)
 47 |         #-----------------------------------#
 48 |         #   利用classifier网络进行特征提取
 49 |         #-----------------------------------#
 50 |         pool = pool.view(pool.size(0), -1)
 51 |         #--------------------------------------------------------------#
 52 |         #   当输入为一张图片的时候，这里获得的f7的shape为[300, 4096]
 53 |         #--------------------------------------------------------------#
 54 |         fc7 = self.classifier(pool)
 55 | 
 56 |         roi_cls_locs    = self.cls_loc(fc7)
 57 |         roi_scores      = self.score(fc7)
 58 | 
 59 |         roi_cls_locs    = roi_cls_locs.view(n, -1, roi_cls_locs.size(1))
 60 |         roi_scores      = roi_scores.view(n, -1, roi_scores.size(1))
 61 |         return roi_cls_locs, roi_scores
 62 | 
 63 | # faster rcnn 网络部分的分类
 64 | class Resnet50RoIHead(nn.Module):
 65 |     def __init__(self, n_class, roi_size, spatial_scale, classifier):
 66 |         super(Resnet50RoIHead, self).__init__()
 67 |         self.classifier = classifier
 68 |         #--------------------------------------#
 69 |         #   对ROIPooling后的的结果进行回归预测
 70 |         #   in_features:2048 -> out_features:n_class * 4
 71 |         #--------------------------------------#
 72 |         self.cls_loc = nn.Linear(2048, n_class * 4)
 73 |         #-----------------------------------#
 74 |         #   对ROIPooling后的的结果进行分类
 75 |         #-----------------------------------#
 76 |         self.score = nn.Linear(2048, n_class )
 77 |         #-----------------------------------#
 78 |         #   权值初始化
 79 |         #-----------------------------------#
 80 |         normal_init(self.cls_loc, 0, 0.001)
 81 |         normal_init(self.score, 0, 0.01)
 82 | 
 83 |         self.roi = RoIPool((roi_size, roi_size), spatial_scale)
 84 | 
 85 |     def forward(self, x, rois, roi_indices, img_size):
 86 |         n, _, _, _ = x.shape
 87 |         if x.is_cuda:
 88 |             roi_indices = roi_indices.cuda()
 89 |             rois = rois.cuda()
 90 |         rois        = torch.flatten(rois, 0, 1)
 91 |         roi_indices = torch.flatten(roi_indices, 0, 1)
 92 |         
 93 |         # 对特征层的建议框进行缩放
 94 |         rois_feature_map = torch.zeros_like(rois)
 95 |         rois_feature_map[:, [0,2]] = rois[:, [0,2]] / img_size[1] * x.size()[3]
 96 |         rois_feature_map[:, [1,3]] = rois[:, [1,3]] / img_size[0] * x.size()[2]
 97 | 
 98 |         indices_and_rois = torch.cat([roi_indices[:, None], rois_feature_map], dim=1)
 99 |         #-----------------------------------#
100 |         #   利用建议框对公用特征层进行截取
101 |         #   得到一个将预选框映射到一个预测特征层
102 |         #-----------------------------------#
103 |         pool = self.roi(x, indices_and_rois)
104 |         #-----------------------------------#
105 |         #   利用classifier网络进行特征提取
106 |         #-----------------------------------#
107 |         fc = self.classifier(pool)
108 |         #--------------------------------------------------------------#
109 |         #   当输入为一张图片的时候，这里获得的f7的shape为[300, 2048]
110 |         #--------------------------------------------------------------#
111 |         fc7 = fc.view(fc.size(0), -1)
112 | 
113 |         roi_cls_locs    = self.cls_loc(fc7)
114 |         roi_scores      = self.score(fc7)
115 |         roi_cls_locs    = roi_cls_locs.view(n, -1, roi_cls_locs.size(1))
116 |         roi_scores      = roi_scores.view(n, -1, roi_scores.size(1))
117 |         return roi_cls_locs, roi_scores
118 | 
119 | 
120 | class Resnet50_FPNRoIHead(nn.Module):
121 |     def __init__(self, n_class, roi_size, spatial_scale, classifier):
122 |         super(Resnet50_FPNRoIHead, self).__init__()
123 |         self.classifier = classifier
124 |         #--------------------------------------#
125 |         #   对ROIPooling后的结果进行全连接
126 |         #   最大池化层
127 |         #--------------------------------------#
128 |         self.maxpool =nn.AdaptiveMaxPool2d(7)   # output size = (1, 1)
129 |         self.fc = nn.Linear(12544, 1024)
130 |         #--------------------------------------#
131 |         #   对ROIPooling后的结果进行回归预测
132 |         #   in_features:256 -> out_features:n_class * 4
133 |         #--------------------------------------#
134 |         self.cls_loc = nn.Linear(1024, n_class * 4)
135 |         #-----------------------------------#
136 |         #   对ROIPooling后的的结果进行分类
137 |         #-----------------------------------#
138 |         self.score = nn.Linear(1024, n_class)
139 |         #-----------------------------------#
140 |         #   权值初始化
141 |         #-----------------------------------#
142 |         normal_init(self.fc, 0, 0.001)
143 |         normal_init(self.cls_loc, 0, 0.001)
144 |         normal_init(self.score, 0, 0.01)
145 | 
146 |         self.roi = MultiScaleRoIAlign(featmap_names=['p2', 'p3', 'p4', 'p5'], output_size=7, sampling_ratio=2, canonical_scale=600, canonical_level=4)
147 |         # self.roi = RoIAlign(output_size=7, sampling_ratio=2, spatial_scale=1)
148 | 
149 |     def forward(self, x, rois, roi_indices, img_size):
150 |         # import time
151 |         # start=time.time()
152 |         # 将列表转换为OrderedDict类型，MultiScaleRoIAlign（）数据准备
153 |         Ordered_x = OrderedDict(p2=x[0],p3=x[1],p4=x[2],p5=x[3])
154 |         # img_size_p1 = [(img_size[0], img_size[1])]
155 |         rois_p = []
156 |         # 将每层背景和前景的建议框进行合并
157 |         for p in range(len(x)):
158 |             n, _, _, _ = x[p].shape
159 |             if x[p].is_cuda:
160 |                 roi = rois[p].cuda()
161 |                 rois_p.append(torch.flatten(roi, 0, 1))
162 |             else:
163 |                 rois_p.append(torch.flatten(rois[p], 0, 1))
164 |             
165 | 
166 |         # 将p2~p4层Proposal框合并为一个[tensor]
167 |         rois_x = [torch.cat(rois_p, dim=0)]
168 |         # 使用MultiScaleRoIAlign（）在多尺度特征层进行预测
169 |         pool = self.roi(Ordered_x, rois_x, [img_size])
170 |         #--------------------------------------------------------------#
171 |         # 将 rois 的信息在第一维度上进行展平操作
172 |         # 当输入为一张图片的时候，这里获得的f7的shape为[1024, 12544]
173 |         #--------------------------------------------------------------#
174 |         fc=self.maxpool(pool)
175 |         fc = pool.view(pool.size(0), -1)
176 |         # 进行全链接层。最终输出 1024 维的特征向量。
177 |         fc7 = self.fc(fc)
178 | 
179 |         roi_cls_locs    = self.cls_loc(fc7)
180 |         roi_scores      = self.score(fc7)
181 |         roi_cls_locs    = roi_cls_locs.view(n, -1, roi_cls_locs.size(1))
182 |         roi_scores      = roi_scores.view(n, -1, roi_scores.size(1))
183 | 
184 |         # end=time.time()-start
185 |         # print("代码运行时间：",end)
186 |         return roi_cls_locs, roi_scores
187 | 
188 | 
189 | class Resnet101RoIHead(nn.Module):
190 |     def __init__(self, n_class, roi_size, spatial_scale, classifier):
191 |         super(Resnet101RoIHead, self).__init__()
192 |         self.classifier = classifier
193 |         #--------------------------------------#
194 |         #   对ROIPooling后的的结果进行回归预测
195 |         #--------------------------------------#
196 |         self.cls_loc = nn.Linear(2048, n_class * 4)
197 |         #-----------------------------------#
198 |         #   对ROIPooling后的的结果进行分类
199 |         #-----------------------------------#
200 |         self.score = nn.Linear(2048, n_class)
201 |         #-----------------------------------#
202 |         #   权值初始化
203 |         #-----------------------------------#
204 |         normal_init(self.cls_loc, 0, 0.001)
205 |         normal_init(self.score, 0, 0.01)
206 | 
207 |         self.roi = RoIPool((roi_size, roi_size), spatial_scale)
208 | 
209 |     def forward(self, x, rois, roi_indices, img_size):
210 |         n, _, _, _ = x.shape
211 |         if x.is_cuda:
212 |             roi_indices = roi_indices.cuda()
213 |             rois = rois.cuda()
214 |         rois        = torch.flatten(rois, 0, 1)
215 |         roi_indices = torch.flatten(roi_indices, 0, 1)
216 |         
217 |         rois_feature_map = torch.zeros_like(rois)
218 |         rois_feature_map[:, [0,2]] = rois[:, [0,2]] / img_size[1] * x.size()[3]
219 |         rois_feature_map[:, [1,3]] = rois[:, [1,3]] / img_size[0] * x.size()[2]
220 | 
221 |         indices_and_rois = torch.cat([roi_indices[:, None], rois_feature_map], dim=1)
222 |         #-----------------------------------#
223 |         #   利用建议框对公用特征层进行截取
224 |         #-----------------------------------#
225 |         pool = self.roi(x, indices_and_rois)
226 |         #-----------------------------------#
227 |         #   利用classifier网络进行特征提取
228 |         #-----------------------------------#
229 |         fc7 = self.classifier(pool)
230 |         #--------------------------------------------------------------#
231 |         #   当输入为一张图片的时候，这里获得的f7的shape为[300, 2048]
232 |         #--------------------------------------------------------------#
233 |         fc7 = fc7.view(fc7.size(0), -1)
234 | 
235 |         roi_cls_locs    = self.cls_loc(fc7)
236 |         roi_scores      = self.score(fc7)
237 |         roi_cls_locs    = roi_cls_locs.view(n, -1, roi_cls_locs.size(1))
238 |         roi_scores      = roi_scores.view(n, -1, roi_scores.size(1))
239 |         return roi_cls_locs, roi_scores
240 | 
241 | 
242 | def normal_init(m, mean, stddev, truncated=False):
243 |     if truncated:
244 |         m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean)  # not a perfect approximation
245 |     else:
246 |         m.weight.data.normal_(mean, stddev)
247 |         m.bias.data.zero_()
248 | 


--------------------------------------------------------------------------------
/nets/resnet101.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import math
  3 | from torch.hub import load_state_dict_from_url
  4 | 
  5 | class Bottleneck(nn.Module):
  6 |     """
  7 |     注意：原论文中，在虚线残差结构的主分支上，第一个1x1卷积层的步距是2，第二个3x3卷积层步距是1。
  8 |     但在pytorch官方实现过程中是第一个1x1卷积层的步距是1，第二个3x3卷积层步距是2，
  9 |     这么做的好处是能够在top1上提升大概0.5%的准确率。
 10 |     可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
 11 |     """
 12 |     expansion = 4
 13 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 14 |         super(Bottleneck, self).__init__()
 15 |         #1*1的卷积压缩通道数
 16 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
 17 |         self.bn1 = nn.BatchNorm2d(planes)
 18 |         #3*3卷积特征提取
 19 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 20 |         self.bn2 = nn.BatchNorm2d(planes)
 21 |         #1*1复原通道数 
 22 | 
 23 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 24 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 25 | 
 26 |         self.relu = nn.ReLU(inplace=True)
 27 |         self.downsample = downsample
 28 |         self.stride = stride
 29 | 
 30 |     def forward(self, x):
 31 |         residual = x
 32 | 
 33 |         out = self.conv1(x)
 34 |         out = self.bn1(out)
 35 |         out = self.relu(out)
 36 | 
 37 |         out = self.conv2(out)
 38 |         out = self.bn2(out)
 39 |         out = self.relu(out)
 40 | 
 41 |         out = self.conv3(out)
 42 |         out = self.bn3(out)
 43 |         if self.downsample is not None:
 44 |             residual = self.downsample(x)  #判断是否有残差边，有残差边即为：输入维度和输出维度发生改变，对应conv block
 45 |                                             #无残差边：输入维度=输出维度，对应identity block
 46 | 
 47 |         out += residual
 48 |         out = self.relu(out)
 49 | 
 50 |         return out
 51 | 
 52 | class ResNet101(nn.Module):
 53 |     def __init__(self, block, layers, num_classes=1000):
 54 |         #-----------------------------------#
 55 |         #   假设输入进来的图片是600,600,3
 56 |         #-----------------------------------#
 57 |         self.inplanes = 64
 58 |         super(ResNet101, self).__init__()
 59 | 
 60 |         # input（600,600,3） -> conv2d stride（300,300,64）
 61 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)#输入3通道，卷积核大小kernel_size=7*7，
 62 |                                                                                      #步长stride=2，输出通道数=64，bias偏移量
 63 |         self.bn1 = nn.BatchNorm2d(64)      #标准化（归一化）
 64 |         self.relu = nn.ReLU(inplace=True)  #激活函数
 65 | 
 66 |         # 300,300,64 -> 150,150,64  最大池化
 67 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True)
 68 | 
 69 |         # 150,150,64 -> 150,150,256
 70 |         self.layer1 = self._make_layer(block, 64, layers[0])
 71 |         # 150,150,256 -> 75,75,512
 72 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
 73 |         # 75,75,512 -> 38,38,1024 到这里可以获得一个38,38,1024的共享特征层
 74 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
 75 |         # self.layer4被用在classifier模型中
 76 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
 77 |         
 78 |         self.avgpool = nn.AvgPool2d(7)
 79 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
 80 | 
 81 |         for m in self.modules():
 82 |             if isinstance(m, nn.Conv2d):
 83 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 84 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 85 |             elif isinstance(m, nn.BatchNorm2d):
 86 |                 m.weight.data.fill_(1)
 87 |                 m.bias.data.zero_()
 88 | 
 89 |     # 构建resnet残差结构layer1.。。。layer5
 90 |     def _make_layer(self, block, planes, blocks, stride=1):
 91 |         downsample = None
 92 |         #-------------------------------------------------------------------#
 93 |         #   当模型需要进行高和宽的压缩的时候，就需要用到残差边的downsample（下采样）
 94 |         #-------------------------------------------------------------------#
 95 |         if stride != 1 or self.inplanes != planes * block.expansion:
 96 |             downsample = nn.Sequential(
 97 |                 nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride, bias=False),
 98 |                 nn.BatchNorm2d(planes * block.expansion),
 99 |             )
100 |         layers = []
101 |         layers.append(block(self.inplanes, planes, stride, downsample))
102 |         self.inplanes = planes * block.expansion
103 |         for i in range(1, blocks):
104 |             layers.append(block(self.inplanes, planes))
105 |         return nn.Sequential(*layers)
106 | 
107 |     def forward(self, x):
108 |         x = self.conv1(x)
109 |         x = self.bn1(x)
110 |         x = self.relu(x)
111 |         x = self.maxpool(x)
112 | 
113 |         x = self.layer1(x)
114 |         x = self.layer2(x)
115 |         x = self.layer3(x)
116 |         x = self.layer4(x)
117 | 
118 |         x = self.avgpool(x)
119 |         x = x.view(x.size(0), -1)  #维度变化
120 |         x = self.fc(x)             #全连接层
121 |         return x
122 | 
123 | def resnet101(pretrained = False):
124 |     model = ResNet101(Bottleneck, [3, 4, 23, 3])  #对应resnet101的网络结构shape，第五次压缩是在roi中使用，有3个bottleneck。
125 |     if pretrained:
126 |         state_dict = load_state_dict_from_url("https://download.pytorch.org/models/resnet101-5d3b4d8f.pth", model_dir="./model_data")
127 |         model.load_state_dict(state_dict)
128 |     #----------------------------------------------------------------------------#
129 |     #   获取特征提取部分，从conv1到model.layer3，最终获得一个38,38,1024的特征层
130 |     #----------------------------------------------------------------------------#
131 |     features    = list([model.conv1, model.bn1, model.relu, model.maxpool, model.layer1, model.layer2, model.layer3])
132 |     #----------------------------------------------------------------------------#
133 |     #   获取分类部分，从model.layer4到model.avgpool
134 |     #----------------------------------------------------------------------------#
135 |     classifier  = list([model.layer4, model.avgpool])
136 |     
137 |     features    = nn.Sequential(*features)
138 |     classifier  = nn.Sequential(*classifier)
139 |     return features, classifier
140 | 


--------------------------------------------------------------------------------
/nets/resnet50.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import torch.nn as nn
  4 | from torch.hub import load_state_dict_from_url
  5 | 
  6 | 
  7 | class Bottleneck(nn.Module):
  8 |     """
  9 |     注意：原论文中，在虚线残差结构的主分支上，第一个1x1卷积层的步距是2，第二个3x3卷积层步距是1。
 10 |     但在pytorch官方实现过程中是第一个1x1卷积层的步距是1，第二个3x3卷积层步距是2，
 11 |     这么做的好处是能够在top1上提升大概0.5%的准确率。
 12 |     可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
 13 |     """
 14 |     expansion = 4
 15 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 16 |         super(Bottleneck, self).__init__()
 17 |         #1*1的卷积压缩通道数
 18 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
 19 |         self.bn1 = nn.BatchNorm2d(planes)
 20 |         #3*3卷积特征提取
 21 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 22 |         self.bn2 = nn.BatchNorm2d(planes)
 23 |         #1*1复原通道数 
 24 | 
 25 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 26 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 27 | 
 28 |         self.relu = nn.ReLU(inplace=True)
 29 |         self.downsample = downsample
 30 |         self.stride = stride
 31 | 
 32 |     def forward(self, x):
 33 |         residual = x
 34 | 
 35 |         out = self.conv1(x)
 36 |         out = self.bn1(out)
 37 |         out = self.relu(out)
 38 | 
 39 |         out = self.conv2(out)
 40 |         out = self.bn2(out)
 41 |         out = self.relu(out)
 42 | 
 43 |         out = self.conv3(out)
 44 |         out = self.bn3(out)
 45 |         if self.downsample is not None:
 46 |             residual = self.downsample(x)   #判断是否有残差边，有残差边即为：输入维度和输出维度发生改变，对应conv block
 47 |                                             #无残差边：输入维度=输出维度，对应identity block
 48 | 
 49 |         out += residual
 50 |         out = self.relu(out)
 51 | 
 52 |         return out
 53 | 
 54 | class ResNet(nn.Module):
 55 |     def __init__(self, block, layers, include_top=True,num_classes=1000):
 56 |         #-----------------------------------#
 57 |         #   假设输入进来的图片是600,600,3
 58 |         #-----------------------------------#
 59 |         self.include_top = include_top
 60 |         self.inplanes = 64
 61 |         super(ResNet, self).__init__()
 62 | 
 63 |         # input（600,600,3） -> conv2d stride（300,300,64）
 64 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)#输入3通道，卷积核大小kernel_size=7*7，
 65 |                                                                                      #步长stride=2，输出通道数=64，bias偏移量
 66 |         self.bn1 = nn.BatchNorm2d(64)      #标准化（归一化）
 67 |         self.relu = nn.ReLU(inplace=True)  #激活函数
 68 | 
 69 |         # 300,300,64 -> 150,150,64  最大池化
 70 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True)
 71 | 
 72 |         # 150,150,64 -> 150,150,256
 73 |         self.layer1 = self._make_layer(block, 64, layers[0])
 74 |         # 150,150,256 -> 75,75,512
 75 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
 76 |         # 75,75,512 -> 38,38,1024 到这里可以获得一个38,38,1024的共享特征层
 77 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
 78 |         # self.layer4被用在classifier模型中
 79 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
 80 |         
 81 |         if self.include_top:
 82 |             self.avgpool = nn.AvgPool2d(7)  # output size = (1, 1)
 83 |             self.fc = nn.Linear(512 * block.expansion, num_classes)
 84 |         
 85 |         #resnet模型每层进行参数学习，如：layer1中每层进行模型训练
 86 |         for m in self.modules():
 87 |             if isinstance(m, nn.Conv2d):
 88 |                 new_var = 1
 89 |                 n = m.kernel_size[0] * m.kernel_size[new_var] * m.out_channels  #通道数的改变（如：256->64）
 90 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 91 |             elif isinstance(m, nn.BatchNorm2d):
 92 |                 m.weight.data.fill_(1)          #更改resnet50网络中每层中权重数据
 93 |                 m.bias.data.zero_()
 94 | 
 95 |     def _make_layer(self, block, planes, blocks, stride=1):
 96 |         downsample = None
 97 |         #-------------------------------------------------------------------#
 98 |         #   当模型需要进行高和宽的压缩的时候，就需要用到残差边的downsample（下采样）
 99 |         #-------------------------------------------------------------------#
100 |         if stride != 1 or self.inplanes != planes * block.expansion:
101 |             downsample = nn.Sequential(
102 |                 nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride, bias=False),
103 |                 nn.BatchNorm2d(planes * block.expansion),
104 |             )
105 |         layers = []
106 |         layers.append(block(self.inplanes, planes, stride, downsample))
107 |         self.inplanes = planes * block.expansion
108 |         # resnet50网络层数堆积，layer=[3, 4, 6, 3]
109 |         for i in range(1, blocks):
110 |             layers.append(block(self.inplanes, planes))
111 |         return nn.Sequential(*layers)
112 | 
113 |     def forward(self, x):
114 |         x = self.conv1(x)
115 |         x = self.bn1(x)
116 |         x = self.relu(x)
117 |         x = self.maxpool(x)
118 | 
119 |         x = self.layer1(x)
120 |         x = self.layer2(x)
121 |         x = self.layer3(x)
122 |         x = self.layer4(x)
123 | 
124 |         if self.include_top:
125 |             x = self.avgpool(x)
126 |             # x = torch.flatten(x, 1)
127 |             x = x.view(x.size(0), -1)  # 传入神经网络之前将tensor变形，
128 |             x = self.fc(x)  # 输入全连接层,神经网络输入准备
129 |         
130 |         return x
131 | 
132 | def resnet50(pretrained = False):
133 |     model = ResNet(Bottleneck, [3, 4, 6, 3])  #对应resnet50的网络结构shape，第五次压缩是在roi中使用，有3个bottleneck。
134 |     if pretrained:
135 |         state_dict = load_state_dict_from_url("https://download.pytorch.org/models/resnet50-19c8e357.pth", model_dir="./model_data")
136 |         model.load_state_dict(state_dict)
137 |     #----------------------------------------------------------------------------#
138 |     #   获取特征提取部分，从conv1到model.layer3，最终获得一个38,38,1024的特征层
139 |     #----------------------------------------------------------------------------#
140 |     features    = list([model.conv1, model.bn1, model.relu, model.maxpool, model.layer1, model.layer2, model.layer3])
141 |     print('features:', features)
142 |     #----------------------------------------------------------------------------#
143 |     #   获取分类部分，从model.layer4到model.avgpool
144 |     #----------------------------------------------------------------------------#
145 |     classifier  = list([model.layer4, model.avgpool])
146 |     print('classifier:', classifier)
147 |     
148 |     features    = nn.Sequential(*features)
149 |     print('features:', features)
150 |     classifier  = nn.Sequential(*classifier)
151 |     print('classifier:', classifier)
152 |     return features, classifier
153 | 
154 | 
155 | # net = ResNet(Bottleneck, [3, 4, 6, 3])
156 | # print(net)
157 | 
158 | 


--------------------------------------------------------------------------------
/nets/resnet50_ECA_FPN.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch.nn as nn
  3 | from torch.hub import load_state_dict_from_url
  4 | 
  5 | class eca_layer(nn.Module):
  6 |     """Constructs a ECA module.
  7 | 
  8 |     Args:
  9 |         channel: Number of channels of the input feature map
 10 |         k_size: Adaptive selection of kernel size
 11 |     """
 12 |     def __init__(self, channel, k_size=3):
 13 |         super(eca_layer, self).__init__()
 14 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 15 |         self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False) 
 16 |         self.sigmoid = nn.Sigmoid()
 17 | 
 18 |     def forward(self, x):
 19 |         # feature descriptor on the global spatial information
 20 |         y = self.avg_pool(x)
 21 | 
 22 |         # Two different branches of ECA module
 23 |         y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
 24 | 
 25 |         # Multi-scale information fusion
 26 |         y = self.sigmoid(y)
 27 | 
 28 |         return x * y.expand_as(x)
 29 | 
 30 | 
 31 | class Bottleneck(nn.Module):
 32 |     """
 33 |     注意：原论文中，在虚线残差结构的主分支上，第一个1x1卷积层的步距是2，第二个3x3卷积层步距是1。
 34 |     但在pytorch官方实现过程中是第一个1x1卷积层的步距是1，第二个3x3卷积层步距是2，
 35 |     这么做的好处是能够在top1上提升大概0.5%的准确率。
 36 |     可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
 37 |     """
 38 |     expansion = 4   #通道倍增数
 39 | 
 40 |     def __init__(self, inplanes, planes, stride=1, downsample=None,k_size=3):
 41 |         super(Bottleneck, self).__init__()
 42 | 
 43 |         #1*1的卷积压缩通道数
 44 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
 45 |         self.bn1 = nn.BatchNorm2d(planes)
 46 |         #3*3卷积特征提取
 47 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 48 |         self.bn2 = nn.BatchNorm2d(planes)
 49 |         #1*1复原通道数 
 50 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 51 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 52 | 
 53 |         # 激活+下采样
 54 |         self.relu = nn.ReLU(inplace=True)
 55 |         # 加入ECA模型
 56 |         self.eca = eca_layer(planes * 4, k_size)
 57 | 
 58 |         self.downsample = downsample
 59 |         self.stride = stride
 60 | 
 61 |     def forward(self, x):
 62 |         residual = x
 63 | 
 64 |         out = self.conv1(x)
 65 |         out = self.bn1(out)
 66 |         out = self.relu(out)
 67 | 
 68 |         out = self.conv2(out)
 69 |         out = self.bn2(out)
 70 |         out = self.relu(out)
 71 | 
 72 |         out = self.conv3(out)
 73 |         out = self.bn3(out)
 74 |         out = self.eca(out)
 75 | 
 76 |         if self.downsample is not None:
 77 |             residual = self.downsample(x)   #判断是否有残差边，有残差边即为：输入维度和输出维度发生改变，对应conv block
 78 |                                             #无残差边：输入维度=输出维度，对应identity block
 79 |         out += residual
 80 |         out = self.relu(out)
 81 | 
 82 |         return out
 83 | 
 84 | class ResNet50_ECA_FPN(nn.Module):
 85 |     def __init__(self, block, layers, num_classes=100,k_size=[3, 3, 3, 3]):
 86 |         #-----------------------------------#
 87 |         #   假设输入进来的图片是600,600,3
 88 |         #-----------------------------------#
 89 |         super(ResNet50_ECA_FPN, self).__init__()
 90 |         self.inplanes = 64
 91 | 
 92 |         #处理输入的C1模块（C1代表了RestNet的前几个卷积与池化层）
 93 |         # input（600,600,3） -> conv2d stride（300,300,64）
 94 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)#输入3通道，卷积核大小kernel_size=7*7，
 95 |                                                                                      #步长stride=2，输出通道数=64，bias偏移量
 96 |         self.bn1 = nn.BatchNorm2d(64)      #标准化（归一化）
 97 |         self.relu = nn.ReLU(inplace=True)  #激活函数
 98 | 
 99 |         # 300,300,64 -> 150,150,64  最大池化
100 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True)
101 | 
102 |         ''' Bottom-up layers ,搭建自下而上的C2，C3，C4，C5'''
103 |         # 150,150,64 -> 150,150,256
104 |         self.layer1 = self._make_layer(block, 64, layers[0],int(k_size[0]))
105 |         # 150,150,256 -> 75,75,512
106 |         self.layer2 = self._make_layer(block, 128, layers[1],int(k_size[1]), stride=2)
107 |         # 75,75,512 -> 38,38,1024 到这里可以获得一个38,38,1024的共享特征层
108 |         self.layer3 = self._make_layer(block, 256, layers[2],int(k_size[2]), stride=2)
109 |         # 38,38,1024 -> 19,19,2048 
110 |         self.layer4 = self._make_layer(block, 512, layers[3],int(k_size[3]), stride=2)
111 |         
112 |         # 对C5减少通道数，得到P5
113 |         self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0)  # Reduce channels
114 |         
115 |         # Smooth layers,3x3卷积融合特征
116 |         self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
117 |         self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
118 |         self.smooth3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
119 |         self.smooth4 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
120 | 
121 |         # Lateral layers,横向连接，保证通道数相同
122 |         self.latlayer3 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)
123 |         self.latlayer2 = nn.Conv2d( 512, 256, kernel_size=1, stride=1, padding=0)
124 |         self.latlayer1 = nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0)
125 | 
126 |         # 19,19,p5 ->10,10, p6  最大池化
127 |         self.maxpool_p6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0, ceil_mode=True)
128 | 
129 |         # 平均池化层和全连接层
130 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)
131 |         self.fc = nn.Linear(256, 256)
132 |         
133 |         #resnet模型每层进行参数学习，如：layer1中每层进行模型训练
134 |         for m in self.modules():
135 |             if isinstance(m, nn.Conv2d):
136 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
137 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
138 |             elif isinstance(m, nn.BatchNorm2d):
139 |                 m.weight.data.fill_(1)
140 |                 m.bias.data.zero_()
141 | 
142 |     def _make_layer(self, block, planes, blocks, k_size, stride=1):
143 |         downsample = None
144 |         #-------------------------------------------------------------------#
145 |         #   当模型需要进行高和宽的压缩的时候，就需要用到残差边的downsample（下采样）
146 |         #   将输入的downsample（x）自动按照Sequential（）里面的布局，顺序执行，
147 |         #   目的：优化类似于这种结构：x = self.bn1(x)，x = self.relu(x)，降低运行内存。
148 |         #-------------------------------------------------------------------#
149 |         if stride != 1 or self.inplanes != planes * block.expansion:
150 |             downsample = nn.Sequential(
151 |                 nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride, bias=False),
152 |                 nn.BatchNorm2d(planes * block.expansion),
153 |             )
154 |         layers = []
155 |         layers.append(block(self.inplanes, planes, stride, downsample,k_size))
156 |         self.inplanes = planes * block.expansion
157 |         # resnet50网络层数堆积，layer=[3, 4, 6, 3]
158 |         for i in range(1, blocks):
159 |             layers.append(block(self.inplanes, planes,k_size=k_size))
160 |         return nn.Sequential(*layers)
161 | 
162 |     #   通过上采样后，进行特征融合
163 |     def _upsample_add(self, x, y):
164 |         _,_,H,W = y.size()
165 |         return nn.functional.upsample(x, size=(H,W), mode='bilinear') + y
166 | 
167 |     def forward(self, x):
168 |         # Bottom-up
169 |         x = self.conv1(x)
170 |         x = self.bn1(x)
171 |         x = self.relu(x)
172 |         c1 = self.maxpool(x)
173 | 
174 |         # 自己构建的fpn网络，c1~c4层搭建
175 |         c2 = self.layer1(c1)
176 |         c3 = self.layer2(c2)
177 |         c4 = self.layer3(c3)
178 |         c5 = self.layer4(c4)
179 | 
180 |         # Top-down 降通道数
181 |         p5 = self.toplayer(c5)
182 |         #   upsample
183 |         p4 = self._upsample_add(p5, self.latlayer3(c4))
184 |         p3 = self._upsample_add(p4, self.latlayer2(c3))
185 |         p2 = self._upsample_add(p3, self.latlayer1(c2))
186 |         
187 |         # Smooth,特征提取，卷积的融合，平滑处理
188 |         p5 = self.smooth4(p5)
189 |         #   19,19,256->10,10,256  经过maxpool得到p6，用于rpn网络中
190 |         p6 = self.maxpool_p6(p5)
191 |         p4 = self.smooth3(p4)
192 |         p3 = self.smooth2(p3)
193 |         p2 = self.smooth1(p2)
194 |         
195 |         x = [p2, p3, p4,p5, p6]
196 |         #   对fpn的特征层进行全连接层
197 |         # for key,value in x.items() :
198 |         #     value  = self.avgpool(value)
199 |         #     # view()函数的功能根reshape类似，用来转换size大小。x = x.view(batchsize, -1)中batchsize指转换后有几行，而-1指在不告诉函数有多少列的情况下，根据原tensor数据和batchsize自动分配列数。
200 |         #     value = value.view(value.size(0), -1)
201 |         #     # value = torch.flatten(value, 1) #flatten(x,1)是按照x的第1个维度拼接（按照列来拼接，横向拼接）；flatten(x,0)是按照x的第0个维度拼接（按照行来拼接，纵向拼接）
202 |         #     value = self.fc(value)
203 |         #     # value = value.view(-1)
204 |         #     x.update(key,value)
205 |         
206 |         return x
207 | 
208 | # test
209 | # FPN = ResNet50_ECA_FPN(Bottleneck, [3, 4, 6, 3])
210 | # print('FPN:',FPN)
211 | 
212 | 
213 | def resnet50_ECA_FPN(pretrained=False):
214 |     # 对应resnet50的网络结构shape，第五次压缩是在roi中使用，有3个bottleneck。
215 |     model = ResNet50_ECA_FPN(Bottleneck, [3, 4, 6, 3])
216 |     # print('ResNet50_FPN:',model)
217 |     #----------------------------------------------------------------------------#
218 |     #   获取特征提取部分，从conv1到model.smooth1(p4层)，获得多个p2, p3, p4, p5,p6不同尺度的特征层
219 |     #----------------------------------------------------------------------------#
220 |     # features = list([model.conv1, model.bn1, model.relu,model.maxpool, model.layer1, model.layer2, model.layer3,model.layer4, 
221 |                     # model.toplayer,model.smooth4, model.smooth3, model.smooth2, model.smooth1])
222 |     
223 |     # features = list([model.conv1, model.bn1, model.relu, model.maxpool, model.layer1,
224 |     #                 model.layer2, model.layer3, model.layer4, ])
225 |     #----------------------------------------------------------------------------#
226 |     #   获取分类部分，从model.smooth3（p2）到model.toplayer（p5）特征层
227 |     #----------------------------------------------------------------------------#
228 |     classifier = list([model.smooth1, model.smooth2, model.smooth3, model.smooth4,
229 |                        model.avgpool])
230 |     
231 |     # 特征提取（feature map）
232 |     features = model
233 |     # features    = nn.Sequential(*features)      # 函数参数（位置参数，*可变参数（以tuple/list形式传递），**关键字参数（以字典形式传递），
234 |                                                 # 默认参数（需要放在参数中最右端，避免传参是歧义））
235 |     print('features:', features)
236 |     classifier  = nn.Sequential(*classifier)    #在进行完roipool层后，进行回归和分类预测
237 |     print('classifier:', classifier)
238 |     return features, classifier
239 | 
240 | 


--------------------------------------------------------------------------------
/nets/resnet50_FPN.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch.nn as nn
  3 | from torch.hub import load_state_dict_from_url
  4 | 
  5 | class eca_layer(nn.Module):
  6 |     """Constructs a ECA module.
  7 | 
  8 |     Args:
  9 |         channel: Number of channels of the input feature map
 10 |         k_size: Adaptive selection of kernel size
 11 |     """
 12 |     def __init__(self, channel, k_size=3):
 13 |         super(eca_layer, self).__init__()
 14 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 15 |         self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False) 
 16 |         self.sigmoid = nn.Sigmoid()
 17 | 
 18 |     def forward(self, x):
 19 |         # feature descriptor on the global spatial information
 20 |         y = self.avg_pool(x)
 21 | 
 22 |         # Two different branches of ECA module
 23 |         y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
 24 | 
 25 |         # Multi-scale information fusion
 26 |         y = self.sigmoid(y)
 27 | 
 28 |         return x * y.expand_as(x)
 29 | 
 30 | 
 31 | class Bottleneck(nn.Module):
 32 |     """
 33 |     注意：原论文中，在虚线残差结构的主分支上，第一个1x1卷积层的步距是2，第二个3x3卷积层步距是1。
 34 |     但在pytorch官方实现过程中是第一个1x1卷积层的步距是1，第二个3x3卷积层步距是2，
 35 |     这么做的好处是能够在top1上提升大概0.5%的准确率。
 36 |     可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
 37 |     """
 38 |     expansion = 4   #通道倍增数
 39 | 
 40 |     def __init__(self, inplanes, planes, stride=1, downsample=None,k_size=3):
 41 |         super(Bottleneck, self).__init__()
 42 | 
 43 |         #1*1的卷积压缩通道数
 44 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
 45 |         self.bn1 = nn.BatchNorm2d(planes)
 46 |         #3*3卷积特征提取
 47 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 48 |         self.bn2 = nn.BatchNorm2d(planes)
 49 |         #1*1复原通道数 
 50 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 51 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 52 | 
 53 |         # 激活+下采样
 54 |         self.relu = nn.ReLU(inplace=True)
 55 |         # 加入ECA模型
 56 |         self.eca = eca_layer(planes * 4, k_size)
 57 | 
 58 |         self.downsample = downsample
 59 |         self.stride = stride
 60 | 
 61 |     def forward(self, x):
 62 |         residual = x
 63 | 
 64 |         out = self.conv1(x)
 65 |         out = self.bn1(out)
 66 |         out = self.relu(out)
 67 | 
 68 |         out = self.conv2(out)
 69 |         out = self.bn2(out)
 70 |         out = self.relu(out)
 71 | 
 72 |         out = self.conv3(out)
 73 |         out = self.bn3(out)
 74 |         out = self.eca(out)
 75 | 
 76 |         if self.downsample is not None:
 77 |             residual = self.downsample(x)   #判断是否有残差边，有残差边即为：输入维度和输出维度发生改变，对应conv block
 78 |                                             #无残差边：输入维度=输出维度，对应identity block
 79 |         out += residual
 80 |         out = self.relu(out)
 81 | 
 82 |         return out
 83 | 
 84 | class ResNet50_FPN(nn.Module):
 85 |     def __init__(self, block, layers, num_classes=100,k_size=[1, 1, 1, 1]):
 86 |         #-----------------------------------#
 87 |         #   假设输入进来的图片是600,600,3
 88 |         #-----------------------------------#
 89 |         super(ResNet50_FPN, self).__init__()
 90 |         self.inplanes = 64
 91 | 
 92 |         #处理输入的C1模块（C1代表了RestNet的前几个卷积与池化层）
 93 |         # input（600,600,3） -> conv2d stride（300,300,64）
 94 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)#输入3通道，卷积核大小kernel_size=7*7，
 95 |                                                                                      #步长stride=2，输出通道数=64，bias偏移量
 96 |         self.bn1 = nn.BatchNorm2d(64)      #标准化（归一化）
 97 |         self.relu = nn.ReLU(inplace=True)  #激活函数
 98 | 
 99 |         # 300,300,64 -> 150,150,64  最大池化
100 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True)
101 | 
102 |         ''' Bottom-up layers ,搭建自下而上的C2，C3，C4，C5'''
103 |         # 150,150,64 -> 150,150,256
104 |         self.layer1 = self._make_layer(block, 64, layers[0],int(k_size[0]))
105 |         # 150,150,256 -> 75,75,512
106 |         self.layer2 = self._make_layer(block, 128, layers[1],int(k_size[1]), stride=2)
107 |         # 75,75,512 -> 38,38,1024 到这里可以获得一个38,38,1024的共享特征层
108 |         self.layer3 = self._make_layer(block, 256, layers[2],int(k_size[2]), stride=2)
109 |         # 38,38,1024 -> 19,19,2048 
110 |         self.layer4 = self._make_layer(block, 512, layers[3],int(k_size[3]), stride=2)
111 |         
112 |         # 对C5减少通道数，得到P5
113 |         self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0)  # Reduce channels
114 |         
115 |         # Smooth layers,3x3卷积融合特征
116 |         self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
117 |         self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
118 |         self.smooth3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
119 |         self.smooth4 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
120 | 
121 |         # Lateral layers,横向连接，保证通道数相同
122 |         self.latlayer3 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)
123 |         self.latlayer2 = nn.Conv2d( 512, 256, kernel_size=1, stride=1, padding=0)
124 |         self.latlayer1 = nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0)
125 | 
126 |         # 19,19,p5 ->10,10, p6  最大池化
127 |         self.maxpool_p6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0, ceil_mode=True)
128 | 
129 |         # 最池化层和全连接层
130 |         self.maxpool1 = nn.AdaptiveMaxPool2d(7)  # output size = (1, 1)
131 |         # self.fc = nn.Linear(256, 256)
132 |         
133 |         #resnet模型每层进行参数学习，如：layer1中每层进行模型训练
134 |         for m in self.modules():
135 |             if isinstance(m, nn.Conv2d):
136 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
137 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
138 |             elif isinstance(m, nn.BatchNorm2d):
139 |                 m.weight.data.fill_(1)
140 |                 m.bias.data.zero_()
141 | 
142 |     def _make_layer(self, block, planes, blocks,k_size,  stride=1):
143 |         downsample = None
144 |         #-------------------------------------------------------------------#
145 |         #   当模型需要进行高和宽的压缩的时候，就需要用到残差边的downsample（下采样）
146 |         #   将输入的downsample（x）自动按照Sequential（）里面的布局，顺序执行，
147 |         #   目的：优化类似于这种结构：x = self.bn1(x)，x = self.relu(x)，降低运行内存。
148 |         #-------------------------------------------------------------------#
149 |         if stride != 1 or self.inplanes != planes * block.expansion:
150 |             downsample = nn.Sequential(
151 |                 nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride, bias=False),
152 |                 nn.BatchNorm2d(planes * block.expansion),
153 |             )
154 |         layers = []
155 |         layers.append(block(self.inplanes, planes, stride,downsample,k_size))
156 |         self.inplanes = planes * block.expansion
157 |         # resnet50网络层数堆积，layer=[3, 4, 6, 3]
158 |         for i in range(1, blocks):
159 |             layers.append(block(self.inplanes, planes,k_size))
160 |         return nn.Sequential(*layers)
161 | 
162 |     #   通过上采样后，进行特征融合
163 |     def _upsample_add(self, x, y):
164 |         _,_,H,W = y.size()
165 |         return nn.functional.upsample(x, size=(H,W), mode='bilinear') + y
166 | 
167 |     def forward(self, x):
168 |         # Bottom-up
169 |         x = self.conv1(x)
170 |         x = self.bn1(x)
171 |         x = self.relu(x)
172 |         c1 = self.maxpool(x)
173 | 
174 |         # 自己构建的fpn网络，c1~c4层搭建
175 |         c2 = self.layer1(c1)
176 |         c3 = self.layer2(c2)
177 |         c4 = self.layer3(c3)
178 |         c5 = self.layer4(c4)
179 | 
180 |         # Top-down 降通道数
181 |         p5 = self.toplayer(c5)
182 |         #   upsample
183 |         p4 = self._upsample_add(p5, self.latlayer3(c4))
184 |         p3 = self._upsample_add(p4, self.latlayer2(c3))
185 |         p2 = self._upsample_add(p3, self.latlayer1(c2))
186 |         
187 |         # Smooth,特征提取，卷积的融合，平滑处理
188 |         p5 = self.smooth4(p5)
189 |         #   19,19,256->10,10,256  经过maxpool得到p6，用于rpn网络中
190 |         p6 = self.maxpool_p6(p5)
191 |         p4 = self.smooth3(p4)
192 |         p3 = self.smooth2(p3)
193 |         p2 = self.smooth1(p2)
194 |         
195 |         x = [p2, p3, p4,p5, p6]
196 |         #   对fpn的特征层进行全连接层
197 |         # for key,value in x.items() :
198 |         #     value  = self.avgpool(value)
199 |         #     # view()函数的功能根reshape类似，用来转换size大小。x = x.view(batchsize, -1)中batchsize指转换后有几行，而-1指在不告诉函数有多少列的情况下，根据原tensor数据和batchsize自动分配列数。
200 |         #     value = value.view(value.size(0), -1)
201 |         #     # value = torch.flatten(value, 1) #flatten(x,1)是按照x的第1个维度拼接（按照列来拼接，横向拼接）；flatten(x,0)是按照x的第0个维度拼接（按照行来拼接，纵向拼接）
202 |         #     value = self.fc(value)
203 |         #     # value = value.view(-1)
204 |         #     x.update(key,value)
205 |         
206 |         return x
207 | 
208 | # test
209 | # FPN = ResNet50_FPN(Bottleneck, [3, 4, 6, 3])
210 | # print('FPN:',FPN)
211 | 
212 | 
213 | def resnet50_FPN(pretrained=False):
214 |     # 对应resnet50的网络结构shape，第五次压缩是在roi中使用，有3个bottleneck。
215 |     model = ResNet50_FPN(Bottleneck, [3, 4, 6, 3])
216 |     # print('ResNet50_FPN:',model)
217 |     #----------------------------------------------------------------------------#
218 |     #   获取特征提取部分，从conv1到model.smooth1(p4层)，获得多个p2, p3, p4, p5,p6不同尺度的特征层
219 |     #----------------------------------------------------------------------------#
220 |     # features = list([model.conv1, model.bn1, model.relu,model.maxpool, model.layer1, model.layer2, model.layer3,model.layer4, 
221 |                     # model.toplayer,model.smooth4, model.smooth3, model.smooth2, model.smooth1])
222 |     
223 |     # features = list([model.conv1, model.bn1, model.relu, model.maxpool, model.layer1,
224 |     #                 model.layer2, model.layer3, model.layer4, ])
225 |     #----------------------------------------------------------------------------#
226 |     #   获取分类部分，从model.smooth3（p2）到model.toplayer（p5）特征层
227 |     #----------------------------------------------------------------------------#
228 |     classifier = list([model.smooth1, model.smooth2, model.smooth3, model.smooth4,
229 |                        model.maxpool1])
230 |     
231 |     # 特征提取（feature map）
232 |     features = model
233 |     # features    = nn.Sequential(*features)      # 函数参数（位置参数，*可变参数（以tuple/list形式传递），**关键字参数（以字典形式传递），
234 |                                                 # 默认参数（需要放在参数中最右端，避免传参是歧义））
235 |     print('features:', features)
236 |     classifier  = nn.Sequential(*classifier)    #在进行完roipool层后，进行回归和分类预测
237 |     print('classifier:', classifier)
238 |     return features, classifier
239 | 
240 | 


--------------------------------------------------------------------------------
/nets/rpn.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import torch
  4 | from torch import nn
  5 | from torch.nn import functional as F
  6 | from torchvision.ops import nms
  7 | from utils.anchors import _enumerate_shifted_anchor, generate_anchor_base
  8 | from utils.utils_bbox import loc2bbox
  9 | from collections import OrderedDict
 10 | 
 11 | class ProposalCreator():
 12 |     def __init__(
 13 |         self, 
 14 |         mode, 
 15 |         nms_iou             = 0.7,
 16 |         n_train_pre_nms     = 12000,
 17 |         n_train_post_nms    = 1000,  
 18 |         n_test_pre_nms      = 3000,
 19 |         n_test_post_nms     = 1000, 
 20 |         min_size            = 16
 21 |     
 22 |     ):
 23 |         #-----------------------------------#
 24 |         #   设置预测还是训练
 25 |         #-----------------------------------#
 26 |         self.mode               = mode
 27 |         #-----------------------------------#
 28 |         #   建议框非极大抑制的iou大小
 29 |         #-----------------------------------#
 30 |         self.nms_iou            = nms_iou
 31 |         #-----------------------------------#
 32 |         #   训练用到的建议框数量
 33 |         #-----------------------------------#
 34 |         self.n_train_pre_nms    = n_train_pre_nms
 35 |         self.n_train_post_nms   = n_train_post_nms
 36 |         #-----------------------------------#
 37 |         #   预测用到的建议框数量
 38 |         #-----------------------------------#
 39 |         self.n_test_pre_nms     = n_test_pre_nms
 40 |         self.n_test_post_nms    = n_test_post_nms
 41 |         self.min_size           = min_size
 42 | 
 43 |     def __call__(self, loc, score, anchor, img_size, scale=1.):
 44 |         if self.mode == "training":
 45 |             n_pre_nms   = self.n_train_pre_nms
 46 |             n_post_nms  = self.n_train_post_nms
 47 |         else:
 48 |             n_pre_nms   = self.n_test_pre_nms
 49 |             n_post_nms  = self.n_test_post_nms
 50 | 
 51 |         #-----------------------------------#
 52 |         #   将先验框转换成tensor
 53 |         #-----------------------------------#
 54 |         anchor = torch.from_numpy(anchor).type_as(loc)
 55 |         #-----------------------------------#
 56 |         #   将RPN网络预测结果转化成建议框
 57 |         #-----------------------------------#
 58 |         roi = loc2bbox(anchor, loc)
 59 |         #-----------------------------------#
 60 |         #   防止建议框超出图像边缘
 61 |         #-----------------------------------#
 62 |         roi[:, [0, 2]] = torch.clamp(roi[:, [0, 2]], min = 0, max = img_size[1])
 63 |         roi[:, [1, 3]] = torch.clamp(roi[:, [1, 3]], min = 0, max = img_size[0])
 64 |         
 65 |         #-----------------------------------#
 66 |         #   建议框的宽高的最小值不可以小于16
 67 |         #-----------------------------------#
 68 |         min_size    = self.min_size * scale
 69 |         keep        = torch.where(((roi[:, 2] - roi[:, 0]) >= min_size) & ((roi[:, 3] - roi[:, 1]) >= min_size))[0]
 70 |         #-----------------------------------#
 71 |         #   将对应的建议框保留下来
 72 |         #-----------------------------------#
 73 |         roi         = roi[keep, :]
 74 |         score       = score[keep]
 75 | 
 76 |         #-----------------------------------#
 77 |         #   根据得分进行排序，取出建议框
 78 |         #-----------------------------------#
 79 |         order       = torch.argsort(score, descending=True)
 80 |         if n_pre_nms > 0:
 81 |             order   = order[:n_pre_nms]
 82 |         roi     = roi[order, :]
 83 |         score   = score[order]
 84 | 
 85 |         #-----------------------------------#
 86 |         #   对建议框进行非极大抑制
 87 |         #   使用官方的非极大抑制会快非常多
 88 |         #-----------------------------------#
 89 |         keep    = nms(roi, score, self.nms_iou)
 90 |         if len(keep) < n_post_nms:
 91 |             index_extra = np.random.choice(range(len(keep)), size=(n_post_nms - len(keep)), replace=True)
 92 |             keep        = torch.cat([keep, keep[index_extra]])
 93 |         keep    = keep[:n_post_nms]
 94 |         roi     = roi[keep]
 95 |         return roi
 96 | 
 97 | 
 98 | class resnet50_fpn_RPNhead(nn.Module):
 99 |     def __init__(
100 |         self,
101 |         in_channels=512,
102 |         mid_channels=512,
103 |         ratios=[0.5, 1, 2],
104 |         anchor_scales=[4, 16, 32],
105 |         feat_stride=16,
106 |         mode="training",
107 |     ):
108 |         super(resnet50_fpn_RPNhead, self).__init__()
109 |         #-----------------------------------------#
110 |         #   生成基础先验框，shape为[9, 4]
111 |         #-----------------------------------------#
112 |         self.anchor_base = generate_anchor_base(
113 |             anchor_scales=anchor_scales, ratios=ratios)
114 |         #每个网格上默认的先验框数量
115 |         n_anchor = self.anchor_base.shape[0]
116 | 
117 |         #-----------------------------------------#
118 |         #   先进行一个3x3的卷积，可理解为特征整合
119 |         #-----------------------------------------#
120 |         self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1)
121 |         #-----------------------------------------#
122 |         #   分类预测先验框内部是否包含物体，score为带有18通道的conv1*1卷积，
123 |         #-----------------------------------------#
124 |         self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0)
125 |         #-----------------------------------------#
126 |         #   回归预测对先验框进行调整，loc带有36通道的conv1*1卷积
127 |         #-----------------------------------------#
128 |         self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0)
129 | 
130 |         #-----------------------------------------#
131 |         #   特征点间距步长
132 |         #-----------------------------------------#
133 |         self.feat_stride = feat_stride
134 |         #-----------------------------------------#
135 |         #   用于对建议框解码并进行非极大抑制
136 |         #-----------------------------------------#
137 |         self.proposal_layer = ProposalCreator(mode)
138 |         #--------------------------------------#
139 |         #   对FPN的网络部分进行权值初始化
140 |         #--------------------------------------#
141 |         normal_init(self.conv1, 0, 0.01)
142 |         normal_init(self.score, 0, 0.01)
143 |         normal_init(self.loc, 0, 0.01)
144 | 
145 |     #输入的x为feature map共享特征p2~p6层，
146 |     def forward(self, x, img_size, scale=1.):
147 |         rois = []       
148 |         roi_indices =[]        
149 |         rpn_locs =[]
150 |         rpn_scores=[]  
151 |         anchor = []
152 |         #对p2~p5层分别进行建议框生成
153 |         for p in x:
154 |             n, _, h, w = p.shape
155 |             #-----------------------------------------#
156 |             #   先进行一个3x3的卷积，可理解为特征整合
157 |             #-----------------------------------------#
158 |             p = F.relu(self.conv1(p))  # 激活函数
159 |             #-----------------------------------------#
160 |             #   回归预测对先验框进行调整
161 |             #   view(n, -1, 4)：n个（m/（4*n））行4列的新tensor形状。
162 |             #   交换后n（第0维度）=batch_size(这里为2，表示背景和物体形状)，
163 |             #   -1（1维度）：表示每个先验框（自行计算），
164 |             #   4（第2维度）=调整先验框位置的四个参数。
165 |             #-----------------------------------------#
166 |             rpn_locs_k = self.loc(p)
167 |             rpn_locs_k = rpn_locs_k.permute(0, 2, 3, 1).contiguous().view(n, -1, 4)
168 |             #-----------------------------------------#
169 |             # torch.transpose():交换指定的两个维度的内容
170 |             # torch.permute():一次性交换多个维度。
171 |             # contiguous()：相当于是在permute(0, 2, 3, 1)tensor中复制一份，在用于view()中的tensor进行结构改变，而不影响前面的数据内容和结构。
172 |             # torch.view():首先，view()函数会将Tensor所有维度拉平成一维（m），然后再根据传入的的维度信息重构出一个Tensor。
173 |             #
174 |             # Tensor与ndarray数组一样，
175 |             #
176 |             # 分类预测先验框内部是否包含物体
177 |             #
178 |             #-----------------------------------------#
179 |             rpn_scores_k = self.score(p)
180 |             rpn_scores_k = rpn_scores_k.permute(0, 2, 3, 1).contiguous().view(n, -1, 2)
181 | 
182 |             #--------------------------------------------------------------------------------------#
183 |             #   进行softmax概率计算，每个先验框只有两个判别结果
184 |             #   内部包含物体或者内部不包含物体，rpn_softmax_scores[:, :, 1]的内容为包含物体的概率
185 |             #--------------------------------------------------------------------------------------#
186 |             rpn_softmax_scores = F.softmax(rpn_scores_k, dim=-1)
187 |             rpn_fg_scores = rpn_softmax_scores[:, :, 1].contiguous()
188 |             rpn_fg_scores = rpn_fg_scores.view(n, -1)
189 | 
190 |             #------------------------------------------------------------------------------------------------#
191 |             #   生成先验框，此时获得的anchor是布满网格点的，当输入图片为600,600,3的时候，shape为(12996, 4)
192 |             #------------------------------------------------------------------------------------------------#
193 |             anchor_k = _enumerate_shifted_anchor(np.array(self.anchor_base), self.feat_stride, h, w)
194 |             rois_k = []
195 |             roi_indices_k = []
196 |             #分离开背景和前景 
197 |             for i in range(n):
198 |                 roi = self.proposal_layer(rpn_locs_k[i], rpn_fg_scores[i], anchor_k, img_size, scale=scale)
199 |                 batch_index = i * torch.ones((len(roi),))
200 |                 rois_k.append(roi.unsqueeze(0))
201 |                 roi_indices_k.append(batch_index.unsqueeze(0))
202 | 
203 |             #------------------------------------------------------------------#
204 |             #   获得RPN网络的预测结果，进行格式调整，把五个特征层的结果进行堆叠
205 |             #------------------------------------------------------------------#
206 |             rois.append(torch.cat(rois_k, dim=0).type_as(p))
207 |             roi_indices.append(torch.cat(roi_indices_k, dim=0).type_as(p))
208 |             anchor.append(torch.from_numpy(anchor_k).unsqueeze(0).float().to(p.device))
209 |             rpn_locs.append(rpn_locs_k)
210 |             rpn_scores.append(rpn_scores_k)
211 | 
212 |         return rpn_locs, rpn_scores, rois, roi_indices, anchor
213 | 
214 | 
215 | class RegionProposalNetwork(nn.Module):
216 |     def __init__(
217 |         self, 
218 |         in_channels     = 512, 
219 |         mid_channels    = 512, 
220 |         ratios          = [0.5, 1, 2],
221 |         anchor_scales   = [4, 16, 32], 
222 |         feat_stride     = 16,
223 |         mode            = "training",
224 |     ):
225 |         super(RegionProposalNetwork, self).__init__()
226 |         #-----------------------------------------#
227 |         #   生成基础先验框，shape为[9, 4]
228 |         #-----------------------------------------#
229 |         self.anchor_base    = generate_anchor_base(anchor_scales = anchor_scales, ratios = ratios)
230 |         #每个网格上默认的先验框数量
231 |         n_anchor            = self.anchor_base.shape[0]
232 | 
233 |         #-----------------------------------------#
234 |         #   先进行一个3x3的卷积，可理解为特征整合
235 |         #-----------------------------------------#
236 |         self.conv1  = nn.Conv2d(in_channels, mid_channels, 3, 1, 1)
237 |         #-----------------------------------------#
238 |         #   分类预测先验框内部是否包含物体，score为带有18通道的conv1*1卷积，
239 |         #-----------------------------------------#
240 |         self.score  = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0)
241 |         #-----------------------------------------#
242 |         #   回归预测对先验框进行调整，loc带有36通道的conv1*1卷积
243 |         #-----------------------------------------#
244 |         self.loc    = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0)
245 | 
246 |         #-----------------------------------------#
247 |         #   特征点间距步长
248 |         #-----------------------------------------#
249 |         self.feat_stride    = feat_stride
250 |         #-----------------------------------------#
251 |         #   用于对建议框解码并进行非极大抑制
252 |         #-----------------------------------------#
253 |         self.proposal_layer = ProposalCreator(mode)
254 |         #--------------------------------------#
255 |         #   对FPN的网络部分进行权值初始化
256 |         #--------------------------------------#
257 |         normal_init(self.conv1, 0, 0.01)
258 |         normal_init(self.score, 0, 0.01)
259 |         normal_init(self.loc, 0, 0.01)
260 | 
261 |     #输入的x为feature map共享特征层，
262 |     def forward(self, x, img_size, scale=1.):
263 |         n, _, h, w = x.shape 
264 |         #-----------------------------------------#
265 |         #   先进行一个3x3的卷积，可理解为特征整合
266 |         #-----------------------------------------#
267 |         x = F.relu(self.conv1(x))  #激活函数
268 |         #-----------------------------------------#
269 |         #   回归预测对先验框进行调整
270 |         # view(n, -1, 4)：n个（m/（4*n））行4列的新tensor形状。
271 |         # 交换后n（第0维度）=batch_size(这里为2，表示背景和物体形状)，
272 |         # -1（1维度）：表示每个先验框（自行计算），
273 |         # 4（第2维度）=调整先验框位置的四个参数。
274 |         #-----------------------------------------#
275 |         rpn_locs = self.loc(x)
276 |         rpn_locs = rpn_locs.permute(0, 2, 3, 1).contiguous().view(n, -1, 4)
277 |         #-----------------------------------------#
278 |         # torch.transpose():交换指定的两个维度的内容
279 |         # torch.permute():一次性交换多个维度。
280 |         # contiguous()：相当于是在permute(0, 2, 3, 1)tensor中复制一份，在用于view()中的tensor进行结构改变，而不影响前面的数据内容和结构。
281 |         # torch.view():首先，view()函数会将Tensor所有维度拉平成一维（m），然后再根据传入的的维度信息重构出一个Tensor。
282 |         # 
283 |         # Tensor与ndarray数组一样，
284 |         # 
285 |         # 分类预测先验框内部是否包含物体
286 |         #
287 |         #-----------------------------------------#
288 |         rpn_scores = self.score(x)
289 |         rpn_scores = rpn_scores.permute(0, 2, 3, 1).contiguous().view(n, -1, 2)
290 |         
291 |         #--------------------------------------------------------------------------------------#
292 |         #   进行softmax概率计算，每个先验框只有两个判别结果
293 |         #   内部包含物体或者内部不包含物体，rpn_softmax_scores[:, :, 1]的内容为包含物体的概率
294 |         #--------------------------------------------------------------------------------------#
295 |         rpn_softmax_scores  = F.softmax(rpn_scores, dim=-1)
296 |         rpn_fg_scores       = rpn_softmax_scores[:, :, 1].contiguous()
297 |         rpn_fg_scores       = rpn_fg_scores.view(n, -1)
298 | 
299 |         #------------------------------------------------------------------------------------------------#
300 |         #   生成先验框，此时获得的anchor是布满网格点的，当输入图片为600,600,3的时候，shape为(12996, 4)
301 |         #------------------------------------------------------------------------------------------------#
302 |         anchor = _enumerate_shifted_anchor(np.array(self.anchor_base), self.feat_stride, h, w)
303 |         rois        = list()
304 |         roi_indices = list()
305 |         for i in range(n):
306 |             roi         = self.proposal_layer(rpn_locs[i], rpn_fg_scores[i], anchor, img_size, scale = scale)
307 |             batch_index = i * torch.ones((len(roi),))
308 |             rois.append(roi.unsqueeze(0))
309 |             roi_indices.append(batch_index.unsqueeze(0))
310 | 
311 |         rois        = torch.cat(rois, dim=0).type_as(x)
312 |         roi_indices = torch.cat(roi_indices, dim=0).type_as(x)
313 |         anchor      = torch.from_numpy(anchor).unsqueeze(0).float().to(x.device)
314 |         
315 |         return rpn_locs, rpn_scores, rois, roi_indices, anchor
316 | 
317 | 
318 | def normal_init(m, mean, stddev, truncated=False):
319 |     if truncated:
320 |         m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean)  # not a perfect approximation
321 |     else:
322 |         m.weight.data.normal_(mean, stddev)
323 |         m.bias.data.zero_()
324 | 


--------------------------------------------------------------------------------
/nets/vgg16.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.hub import load_state_dict_from_url
  4 | 
  5 | 
  6 | #--------------------------------------#
  7 | #   VGG16的结构
  8 | #--------------------------------------#
  9 | class VGG(nn.Module):
 10 |     def __init__(self, features, num_classes=1000, init_weights=True):
 11 |         super(VGG, self).__init__()
 12 |         self.features = features
 13 |         #--------------------------------------#
 14 |         #   平均池化到7x7大小
 15 |         #--------------------------------------#
 16 |         self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
 17 |         #--------------------------------------#
 18 |         #   分类部分
 19 |         #--------------------------------------#
 20 |         self.classifier = nn.Sequential(
 21 |             nn.Linear(512 * 7 * 7, 4096),
 22 |             nn.ReLU(True),
 23 |             nn.Dropout(),
 24 |             nn.Linear(4096, 4096),
 25 |             nn.ReLU(True),
 26 |             nn.Dropout(),
 27 |             nn.Linear(4096, num_classes),
 28 |         )
 29 |         if init_weights:
 30 |             self._initialize_weights()
 31 | 
 32 |     def forward(self, x):
 33 |         #--------------------------------------#
 34 |         #   特征提取
 35 |         #--------------------------------------#
 36 |         x = self.features(x)
 37 |         #--------------------------------------#
 38 |         #   平均池化
 39 |         #--------------------------------------#
 40 |         x = self.avgpool(x)
 41 |         #--------------------------------------#
 42 |         #   平铺后
 43 |         #--------------------------------------#
 44 |         x = torch.flatten(x, 1)
 45 |         #--------------------------------------#
 46 |         #   分类部分
 47 |         #--------------------------------------#
 48 |         x = self.classifier(x)
 49 |         return x
 50 | 
 51 |     def _initialize_weights(self):
 52 |         for m in self.modules():
 53 |             if isinstance(m, nn.Conv2d):
 54 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
 55 |                 if m.bias is not None:
 56 |                     nn.init.constant_(m.bias, 0)
 57 |             elif isinstance(m, nn.BatchNorm2d):
 58 |                 nn.init.constant_(m.weight, 1)
 59 |                 nn.init.constant_(m.bias, 0)
 60 |             elif isinstance(m, nn.Linear):
 61 |                 nn.init.normal_(m.weight, 0, 0.01)
 62 |                 nn.init.constant_(m.bias, 0)
 63 | 
 64 | '''
 65 | 假设输入图像为(600, 600, 3)，随着cfg的循环，特征层变化如下：
 66 | 600,600,3 -> 600,600,64 -> 600,600,64 -> 300,300,64 -> 300,300,128 -> 300,300,128 -> 150,150,128 -> 150,150,256 -> 150,150,256 -> 150,150,256 
 67 | -> 75,75,256 -> 75,75,512 -> 75,75,512 -> 75,75,512 -> 37,37,512 ->  37,37,512 -> 37,37,512 -> 37,37,512
 68 | 到cfg结束，我们获得了一个37,37,512的特征层
 69 | '''
 70 | 
 71 | cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']
 72 | 
 73 | #--------------------------------------#
 74 | #   特征提取部分
 75 | #--------------------------------------#
 76 | def make_layers(cfg, batch_norm=False):
 77 |     layers = []
 78 |     in_channels = 3
 79 |     for v in cfg:
 80 |         if v == 'M':
 81 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
 82 |         else:
 83 |             conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
 84 |             if batch_norm:
 85 |                 layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
 86 |             else:
 87 |                 layers += [conv2d, nn.ReLU(inplace=True)]
 88 |             in_channels = v
 89 |     return nn.Sequential(*layers)
 90 | 
 91 | def decom_vgg16(pretrained = False):
 92 |     model = VGG(make_layers(cfg))
 93 |     if pretrained:
 94 |         state_dict = load_state_dict_from_url("https://download.pytorch.org/models/vgg16-397923af.pth", model_dir="./model_data")
 95 |         model.load_state_dict(state_dict)
 96 |     #----------------------------------------------------------------------------#
 97 |     #   获取特征提取部分，最终获得一个37,37,1024的特征层
 98 |     #----------------------------------------------------------------------------#
 99 |     features    = list(model.features)[:30]
100 |     #----------------------------------------------------------------------------#
101 |     #   获取分类部分，需要除去Dropout部分
102 |     #----------------------------------------------------------------------------#
103 |     classifier  = list(model.classifier)
104 |     del classifier[6]
105 |     del classifier[5]
106 |     del classifier[2]
107 | 
108 |     features    = nn.Sequential(*features)
109 |     classifier  = nn.Sequential(*classifier)
110 |     return features, classifier
111 | 


--------------------------------------------------------------------------------
/order_name.py:
--------------------------------------------------------------------------------
  1 | #...........................#
  2 | #对文件夹中的文件进行重命名
  3 | #...........................#
  4 | import os
  5 | import xml
  6 | from xml.dom import minidom
  7 | import xml.etree.cElementTree as ET
  8 | 
  9 | def myrename(file_path):
 10 |     file_list=os.listdir(file_path)
 11 |     for i,fi in enumerate(file_list):
 12 |         old_dir=os.path.join(file_path,fi)
 13 |         print('wenjianmingzi :',old_dir)
 14 |         # 删除名字中的空格
 15 |         new_name = fi.replace(" ", "_")
 16 |         print("新名字为：",new_name)
 17 | 
 18 |         # # 顺序命名
 19 |         # # new_name=str(i+1)+"."+str(fi.split(".")[-1])
 20 |         new_dir=os.path.join(file_path,new_name)
 21 |         try:
 22 |             os.rename(old_dir,new_dir)
 23 |         except Exception as e:
 24 |             print(e)
 25 |             print("Failed!")
 26 |         else:
 27 |             print("SUcess!")
 28 | 
 29 | 
 30 | #...........................#
 31 | #对xml文件内的filename和path名进行重命名
 32 | #...........................#
 33 | 
 34 | def xml_name(xmlpath):
 35 |     files = os.listdir(xmlpath)  # 得到文件夹下所有文件名称
 36 |     count = 0
 37 |     for xmlFile in files:  # 遍历文件夹
 38 |         if not os.path.isdir(xmlFile):  # 判断是否是文件夹,不是文件夹才打开
 39 |             name1 = xmlFile.split('.')[0]
 40 |             dom = xml.dom.minidom.parse(xmlpath + '/' + xmlFile)
 41 |             root = dom.documentElement
 42 |             #filename重命名
 43 |             newfilename = root.getElementsByTagName('filename')
 44 |             t=newfilename[0].firstChild.data = name1 + '.jpg'
 45 |             print('t:',t )
 46 |             #path重命名
 47 |             newpath = root.getElementsByTagName('path')
 48 |             t1=newpath[0].firstChild.data =xmlpath +'\\'+ name1 +'.jpg'
 49 |             print('t1:',t1 )
 50 | 
 51 |             with open(os.path.join(xmlpath, xmlFile), 'w',) as fh:
 52 |                 print('fh:',fh )
 53 |                 dom.writexml(fh)
 54 |                 print('写入name/pose OK!')
 55 |             count = count + 1
 56 | 
 57 | 
 58 | # 删除xml文件中显示的版本号
 59 | def delete_xmlversion(xmlpath,savedir):
 60 |     
 61 |     files = os.listdir(xmlpath)
 62 |     for ml in files:
 63 |         if '.xml' in ml:
 64 |             fo = open(savedir + '/' + '{}'.format(ml), 'w', encoding='utf-8')
 65 |             print('{}'.format(ml))
 66 |             fi = open(xmlpath + '/' + '{}'.format(ml), 'r')
 67 |             content = fi.readlines()
 68 |             for line in content:
 69 |                 # line = line.replace('a', 'b')        # 例：将a替换为b
 70 |                 line = line.replace('<?xml version="1.0" ?>', '')
 71 |                 # line = line.replace('<folder>测试图片</folder>', '<folder>车辆图片</folder>')
 72 |                 # line = line.replace('<name>class1</name>', '<name>class2</name>')
 73 |                 fo.write(line)
 74 |             fo.close()
 75 |             print('替换成功')
 76 | 
 77 | 
 78 | #删除xml文件中部分不要的标签信息
 79 | def Delete_part_information_xml(path_root,xy_classes):
 80 |     for anno_path in path_root:
 81 |         xml_list=os.listdir(anno_path)
 82 |         print("打开{}文件".format(xml_list))
 83 |         for annoxml in xml_list:
 84 |             path_xml=os.path.join(anno_path,annoxml)
 85 |             print('保存文件路径为{}'.format(path_xml))
 86 |             tree =ET.parse(path_xml)
 87 |             root=tree.getroot()
 88 | 
 89 |             for child in root.findall('object'):
 90 |                 name = child.find('name').text
 91 |                 if not name in xy_classes:
 92 |                     root.remove(child)
 93 |             print(annoxml)
 94 |             tree.write(os.path.join(r'F:\Desktop\PCB_code\PCB_DataSet\Annotations—new', annoxml))  #处理结束后保存的路径
 95 | 
 96 | 
 97 | 
 98 | 
 99 | if __name__=="__main__":
100 |     file_path=r"F:\Desktop\PCB_code\date_set\new_data"   #完整路径+文件名
101 |     # xmlpath="F:\\桌面\\PCB_code\\date_set\\Image_label_source"
102 |     # savedir = r'F:\桌面\PCB_code\date_set\3' #删除xml文件中显示的版本号后存放文件位置
103 |     # xmlpath=r'F:\桌面\PCB_code\date_set\label'
104 |     myrename(file_path)        #图片重命名文件
105 | 
106 |     #对xml文件中的名字进行修改
107 |     # myrename(xmlpath)          #1、xml文件名重命名
108 |     # xml_name(xmlpath)          #2、xml文件内的filename和path重命名
109 |     # delete_xmlversion(xmlpath,savedir)  #删除经过xml重命名后文件内的版本号
110 | 
111 |     #删除xml文件中部分不要的标签信息
112 |     path_root=r'F:\Desktop\PCB_code\PCB_DataSet\Annotations'
113 |     xy_classes=['Speaker',"Bat","2USB","Rj45+2USB","Cap_cross","Cap_blue_black","Jumper04p",
114 |                 "Jumper10p", "HDD","Power08p","Power04p","Power24p"]  
115 |     Delete_part_information_xml(path_root,xy_classes)
116 | 
117 | 


--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
  1 | #----------------------------------------------------#
  2 | #   将单张图片预测、摄像头检测和FPS测试功能
  3 | #   整合到了一个py文件中，通过指定mode进行模式的修改。
  4 | #----------------------------------------------------#
  5 | import time
  6 | 
  7 | import cv2
  8 | import numpy as np
  9 | from PIL import Image
 10 | 
 11 | from frcnn_predict import FRCNN
 12 | 
 13 | if __name__ == "__main__":
 14 |     frcnn = FRCNN()
 15 |     #----------------------------------------------------------------------------------------------------------#
 16 |     #   mode用于指定测试的模式：
 17 |     #   'predict'           表示单张图片预测，如果想对预测过程进行修改，如保存图片，截取对象等，可以先看下方详细的注释
 18 |     #   'video'             表示视频检测，可调用摄像头或者视频进行检测，详情查看下方注释。
 19 |     #   'fps'               表示测试fps，使用的图片是img里面的street.jpg，详情查看下方注释。
 20 |     #   'dir_predict'       表示遍历文件夹进行检测并保存。默认遍历img文件夹，保存img_out文件夹，详情查看下方注释。
 21 |     #----------------------------------------------------------------------------------------------------------#
 22 |     mode = "dir_predict"
 23 |     #-------------------------------------------------------------------------#
 24 |     #   crop                指定了是否在单张图片预测后对目标进行截取
 25 |     #   count               指定了是否进行目标的计数
 26 |     #   crop、count仅在mode='predict'时有效
 27 |     #-------------------------------------------------------------------------#
 28 |     crop            = False
 29 |     count           = False
 30 |     #----------------------------------------------------------------------------------------------------------#
 31 |     #   video_path          用于指定视频的路径，当video_path=0时表示检测摄像头
 32 |     #                       想要检测视频，则设置如video_path = "xxx.mp4"即可，代表读取出根目录下的xxx.mp4文件。
 33 |     #   video_save_path     表示视频保存的路径，当video_save_path=""时表示不保存
 34 |     #                       想要保存视频，则设置如video_save_path = "yyy.mp4"即可，代表保存为根目录下的yyy.mp4文件。
 35 |     #   video_fps           用于保存的视频的fps
 36 |     #
 37 |     #   video_path、video_save_path和video_fps仅在mode='video'时有效
 38 |     #   保存视频时需要ctrl+c退出或者运行到最后一帧才会完成完整的保存步骤。
 39 |     #----------------------------------------------------------------------------------------------------------#
 40 |     video_path      = 0
 41 |     video_save_path = ""
 42 |     video_fps       = 25.0
 43 |     #----------------------------------------------------------------------------------------------------------#
 44 |     #   test_interval       用于指定测量fps的时候，图片检测的次数。理论上test_interval越大，fps越准确。
 45 |     #   fps_image_path      用于指定测试的fps图片
 46 |     #   
 47 |     #   test_interval和fps_image_path仅在mode='fps'有效
 48 |     #----------------------------------------------------------------------------------------------------------#
 49 |     test_interval   = 100
 50 |     fps_image_path  = "img/street.jpg"
 51 |     #-------------------------------------------------------------------------#
 52 |     #   dir_origin_path     指定了用于检测的图片的文件夹路径
 53 |     #   dir_save_path       指定了检测完图片的保存路径
 54 |     #   
 55 |     #   dir_origin_path和dir_save_path仅在mode='dir_predict'时有效
 56 |     #-------------------------------------------------------------------------#
 57 |     dir_origin_path = "faster-rcnn-pytorch-master/prediction_img"
 58 |     dir_save_path   = "faster-rcnn-pytorch-master/prediction_img_out"
 59 | 
 60 |     if mode == "predict":
 61 |         '''
 62 |         1、该代码无法直接进行批量预测，如果想要批量预测，可以利用os.listdir()遍历文件夹，利用Image.open打开图片文件进行预测。
 63 |         具体流程可以参考get_dr_txt.py，在get_dr_txt.py即实现了遍历还实现了目标信息的保存。
 64 |         2、如果想要进行检测完的图片的保存，利用r_image.save("img.jpg")即可保存，直接在predict.py里进行修改即可。 
 65 |         3、如果想要获得预测框的坐标，可以进入frcnn.detect_image函数，在绘图部分读取top，left，bottom，right这四个值。
 66 |         4、如果想要利用预测框截取下目标，可以进入frcnn.detect_image函数，在绘图部分利用获取到的top，left，bottom，right这四个值
 67 |         在原图上利用矩阵的方式进行截取。
 68 |         5、如果想要在预测图上写额外的字，比如检测到的特定目标的数量，可以进入frcnn.detect_image函数，在绘图部分对predicted_class进行判断，
 69 |         比如判断if predicted_class == 'car': 即可判断当前目标是否为车，然后记录数量即可。利用draw.text即可写字。
 70 |         '''
 71 |         while True:
 72 |             img = input('Input image filename:')
 73 |             try:
 74 |                 image = Image.open(img)
 75 |             except:
 76 |                 print('Open Error! Try again!')
 77 |                 continue
 78 |             else:
 79 |                 r_image = frcnn.detect_image(image, crop = crop, count = count)
 80 |                 r_image.show()
 81 | 
 82 |     elif mode == "video":
 83 |         capture=cv2.VideoCapture(video_path)
 84 |         if video_save_path!="":
 85 |             fourcc = cv2.VideoWriter_fourcc(*'XVID')
 86 |             size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
 87 |             out = cv2.VideoWriter(video_save_path, fourcc, video_fps, size)
 88 | 
 89 |         fps = 0.0
 90 |         while(True):
 91 |             t1 = time.time()
 92 |             # 读取某一帧
 93 |             ref,frame=capture.read()
 94 |             # 格式转变，BGRtoRGB
 95 |             frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
 96 |             # 转变成Image
 97 |             frame = Image.fromarray(np.uint8(frame))
 98 |             # 进行检测
 99 |             frame = np.array(frcnn.detect_image(frame))
100 |             # RGBtoBGR满足opencv显示格式
101 |             frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)
102 |             
103 |             fps  = ( fps + (1./(time.time()-t1)) ) / 2
104 |             print("fps= %.2f"%(fps))
105 |             frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
106 |             
107 |             cv2.imshow("video",frame)
108 |             c= cv2.waitKey(1) & 0xff 
109 |             if video_save_path!="":
110 |                 out.write(frame)
111 | 
112 |             if c==27:
113 |                 capture.release()
114 |                 break
115 |         capture.release()
116 |         out.release()
117 |         cv2.destroyAllWindows()
118 | 
119 |     elif mode == "fps":
120 |         img = Image.open(fps_image_path)
121 |         tact_time = frcnn.get_FPS(img, test_interval)
122 |         print(str(tact_time) + ' seconds, ' + str(1/tact_time) + 'FPS, @batch_size 1')
123 | 
124 |     elif mode == "dir_predict":
125 |         import os
126 |         from tqdm import tqdm
127 | 
128 |         img_names = os.listdir(dir_origin_path)
129 |         for img_name in tqdm(img_names):
130 |             if img_name.lower().endswith(('.bmp', '.dib', '.png', '.jpg', '.jpeg', '.pbm', '.pgm', '.ppm', '.tif', '.tiff')):
131 |                 image_path  = os.path.join(dir_origin_path, img_name)
132 |                 image       = Image.open(image_path)
133 |                 r_image     = frcnn.detect_image(image)
134 |                 if not os.path.exists(dir_save_path):
135 |                     os.makedirs(dir_save_path)
136 |                 r_image.save(os.path.join(dir_save_path, img_name.replace(".jpg", ".png")), quality=95, subsampling=0)
137 | 
138 |     else:
139 |         raise AssertionError("Please specify the correct mode: 'predict', 'video', 'fps' or 'dir_predict'.")
140 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | scipy==1.2.1
 2 | numpy==1.17.0
 3 | matplotlib==3.1.2
 4 | opencv_python==4.1.2.30
 5 | torch==1.2.0
 6 | torchvision==0.4.0
 7 | tqdm==4.60.0
 8 | Pillow==8.2.0
 9 | h5py==2.10.0
10 | 


--------------------------------------------------------------------------------
/summary.py:
--------------------------------------------------------------------------------
 1 | #--------------------------------------------#
 2 | #   该部分代码用于看网络结构
 3 | #--------------------------------------------#
 4 | import torch
 5 | from thop import clever_format, profile
 6 | from torchsummary import summary
 7 | 
 8 | from nets.faster_rcnn_feature_extraction import FasterRCNN
 9 | 
10 | if __name__ == "__main__":
11 |     input_shape     = [600, 600]
12 |     num_classes     = 21
13 |     
14 |     device  = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
15 |     model   = FasterRCNN(num_classes, backbone = 'vgg').to(device)
16 |     summary(model, (3, input_shape[0], input_shape[1]))
17 |     
18 |     dummy_input     = torch.randn(1, 3, input_shape[0], input_shape[1]).to(device)
19 |     flops, params   = profile(model.to(device), (dummy_input, ), verbose=False)
20 |     #--------------------------------------------------------#
21 |     #   flops * 2是因为profile没有将卷积作为两个operations
22 |     #   有些论文将卷积算乘法、加法两个operations。此时乘2
23 |     #   有些论文只考虑乘法的运算次数，忽略加法。此时不乘2
24 |     #   本代码选择乘2，参考YOLOX。
25 |     #--------------------------------------------------------#
26 |     flops           = flops * 2
27 |     flops, params   = clever_format([flops, params], "%.3f")
28 |     print('Total GFLOPS: %s' % (flops))
29 |     print('Total params: %s' % (params))
30 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | #


--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/anchors.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/anchors.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/anchors.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/anchors.cpython-39.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/callbacks.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/callbacks.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/callbacks.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/callbacks.cpython-39.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/dataloader.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/dataloader.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/dataloader.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/dataloader.cpython-39.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/utils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils.cpython-39.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/utils_bbox.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils_bbox.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/utils_bbox.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils_bbox.cpython-39.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/utils_fit.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils_fit.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/utils_fit.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils_fit.cpython-39.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/utils_map.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils_map.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/utils_map.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils_map.cpython-39.pyc


--------------------------------------------------------------------------------
/utils/anchors.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | #--------------------------------------------#
 5 | #   生成基础的先验框
 6 | #--------------------------------------------#
 7 | def generate_anchor_base(base_size=16, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32]):
 8 |     anchor_base = np.zeros((len(ratios) * len(anchor_scales), 4), dtype=np.float32)
 9 |     for i in range(len(ratios)):
10 |         for j in range(len(anchor_scales)):
11 |             h = base_size * anchor_scales[j] * np.sqrt(ratios[i])
12 |             w = base_size * anchor_scales[j] * np.sqrt(1. / ratios[i])
13 | 
14 |             index = i * len(anchor_scales) + j
15 |             anchor_base[index, 0] = - h / 2.
16 |             anchor_base[index, 1] = - w / 2.
17 |             anchor_base[index, 2] = h / 2.
18 |             anchor_base[index, 3] = w / 2.
19 |     return anchor_base
20 | 
21 | #--------------------------------------------#
22 | #   对基础先验框进行拓展对应到所有特征点上
23 | #--------------------------------------------#
24 | def _enumerate_shifted_anchor(anchor_base, feat_stride, height, width):
25 |     #---------------------------------#
26 |     #   计算网格中心点
27 |     #---------------------------------#
28 |     shift_x             = np.arange(0, width * feat_stride, feat_stride)
29 |     shift_y             = np.arange(0, height * feat_stride, feat_stride)
30 |     shift_x, shift_y    = np.meshgrid(shift_x, shift_y)
31 |     shift               = np.stack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel(),), axis=1)
32 | 
33 |     #---------------------------------#
34 |     #   每个网格点上的9个先验框
35 |     #---------------------------------#
36 |     A       = anchor_base.shape[0]
37 |     K       = shift.shape[0]
38 |     anchor  = anchor_base.reshape((1, A, 4)) + shift.reshape((K, 1, 4))
39 |     #---------------------------------#
40 |     #   所有的先验框
41 |     #---------------------------------#
42 |     anchor  = anchor.reshape((K * A, 4)).astype(np.float32)
43 |     return anchor
44 | 
45 | if __name__ == "__main__":
46 |     import matplotlib.pyplot as plt
47 |     nine_anchors = generate_anchor_base()
48 |     print(nine_anchors)
49 | 
50 |     height, width, feat_stride  = 38,38,16
51 |     anchors_all                 = _enumerate_shifted_anchor(nine_anchors, feat_stride, height, width)
52 |     print(np.shape(anchors_all))
53 |     
54 |     fig     = plt.figure()
55 |     ax      = fig.add_subplot(111)
56 |     plt.ylim(-300,900)
57 |     plt.xlim(-300,900)
58 |     shift_x = np.arange(0, width * feat_stride, feat_stride)
59 |     shift_y = np.arange(0, height * feat_stride, feat_stride)
60 |     shift_x, shift_y = np.meshgrid(shift_x, shift_y)
61 |     plt.scatter(shift_x,shift_y)
62 |     box_widths  = anchors_all[:,2]-anchors_all[:,0]
63 |     box_heights = anchors_all[:,3]-anchors_all[:,1]
64 |     
65 |     for i in [108, 109, 110, 111, 112, 113, 114, 115, 116]:
66 |         rect = plt.Rectangle([anchors_all[i, 0],anchors_all[i, 1]],box_widths[i],box_heights[i],color="r",fill=False)
67 |         ax.add_patch(rect)
68 |     plt.show()
69 |     
70 |     


--------------------------------------------------------------------------------
/utils/callbacks.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import matplotlib
  4 | import torch
  5 | 
  6 | matplotlib.use('Agg')
  7 | import shutil
  8 | 
  9 | import numpy as np
 10 | from matplotlib import pyplot as plt
 11 | from PIL import Image
 12 | from scipy import signal
 13 | from torch.utils.tensorboard import SummaryWriter
 14 | # from tqdm import tqdm
 15 | import tqdm
 16 | 
 17 | from .utils import cvtColor, get_new_img_size, preprocess_input, resize_image
 18 | from .utils_bbox import DecodeBox
 19 | from .utils_map import get_coco_map, get_map
 20 | 
 21 | 
 22 | class LossHistory():
 23 |     def __init__(self, log_dir, model, input_shape):
 24 |         self.log_dir    = log_dir
 25 |         self.losses     = []
 26 |         self.val_loss   = []
 27 |         
 28 |         os.makedirs(self.log_dir)
 29 |         self.writer     = SummaryWriter(self.log_dir)
 30 |         # try:
 31 |         #     dummy_input     = torch.randn(2, 3, input_shape[0], input_shape[1])
 32 |         #     self.writer.add_graph(model, dummy_input)
 33 |         # except:
 34 |         #     pass
 35 | 
 36 |     def append_loss(self, epoch, loss, val_loss):
 37 |         if not os.path.exists(self.log_dir):
 38 |             os.makedirs(self.log_dir)
 39 | 
 40 |         self.losses.append(loss)
 41 |         self.val_loss.append(val_loss)
 42 | 
 43 |         with open(os.path.join(self.log_dir, "epoch_loss.txt"), 'a') as f:
 44 |             f.write(str(loss))
 45 |             f.write("\n")
 46 |         with open(os.path.join(self.log_dir, "epoch_val_loss.txt"), 'a') as f:
 47 |             f.write(str(val_loss))
 48 |             f.write("\n")
 49 | 
 50 |         self.writer.add_scalar('loss', loss, epoch)
 51 |         self.writer.add_scalar('val_loss', val_loss, epoch)
 52 |         self.loss_plot()
 53 | 
 54 |     def loss_plot(self):
 55 |         iters = range(len(self.losses))
 56 | 
 57 |         plt.figure()
 58 |         plt.plot(iters, self.losses, 'red', linewidth = 2, label='train loss')
 59 |         plt.plot(iters, self.val_loss, 'coral', linewidth = 2, label='val loss')
 60 |         try:
 61 |             if len(self.losses) < 25:
 62 |                 num = 5
 63 |             else:
 64 |                 num = 15
 65 |             
 66 |             # plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss')
 67 |             plt.plot(iters, signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss')
 68 |             # plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss')
 69 |             plt.plot(iters, signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss')
 70 |         except:
 71 |             pass
 72 | 
 73 |         plt.grid(True)
 74 |         plt.xlabel('Epoch')
 75 |         plt.ylabel('Loss')
 76 |         plt.legend(loc="upper right")
 77 | 
 78 |         plt.savefig(os.path.join(self.log_dir, "epoch_loss.png"))
 79 | 
 80 |         plt.cla()
 81 |         plt.close("all")
 82 | 
 83 | class EvalCallback():
 84 |     def __init__(self, net, input_shape, class_names, num_classes, val_lines, log_dir, cuda, \
 85 |             map_out_path=".temp_map_out", max_boxes=100, confidence=0.05, nms_iou=0.5, letterbox_image=True, MINOVERLAP=0.5, eval_flag=True, period=1):
 86 |         super(EvalCallback, self).__init__()
 87 |         
 88 |         self.net                = net
 89 |         self.input_shape        = input_shape
 90 |         self.class_names        = class_names
 91 |         self.num_classes        = num_classes
 92 |         self.val_lines          = val_lines
 93 |         self.log_dir            = log_dir
 94 |         self.cuda               = cuda
 95 |         self.map_out_path       = map_out_path
 96 |         self.max_boxes          = max_boxes
 97 |         self.confidence         = confidence
 98 |         self.nms_iou            = nms_iou
 99 |         self.letterbox_image    = letterbox_image
100 |         self.MINOVERLAP         = MINOVERLAP
101 |         self.eval_flag          = eval_flag
102 |         self.period             = period
103 |         
104 |         self.std    = torch.Tensor([0.1, 0.1, 0.2, 0.2]).repeat(self.num_classes + 1)[None]
105 |         if self.cuda:
106 |             self.std    = self.std.cuda()
107 |         self.bbox_util  = DecodeBox(self.std, self.num_classes)
108 | 
109 |         self.maps       = [0]
110 |         self.epoches    = [0]
111 |         if self.eval_flag:
112 |             with open(os.path.join(self.log_dir, "epoch_map.txt"), 'a') as f:
113 |                 f.write(str(0))
114 |                 f.write("\n")
115 | 
116 |     #---------------------------------------------------#
117 |     #   检测图片
118 |     #---------------------------------------------------#
119 |     def get_map_txt(self, image_id, image, class_names, map_out_path):
120 |         f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w")
121 |         #---------------------------------------------------#
122 |         #   计算输入图片的高和宽
123 |         #---------------------------------------------------#
124 |         image_shape = np.array(np.shape(image)[0:2])
125 |         input_shape = get_new_img_size(image_shape[0], image_shape[1])
126 |         #---------------------------------------------------------#
127 |         #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
128 |         #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
129 |         #---------------------------------------------------------#
130 |         image       = cvtColor(image)
131 |         
132 |         #---------------------------------------------------------#
133 |         #   给原图像进行resize，resize到短边为600的大小上
134 |         #---------------------------------------------------------#
135 |         image_data  = resize_image(image, [input_shape[1], input_shape[0]])
136 |         #---------------------------------------------------------#
137 |         #   添加上batch_size维度
138 |         #---------------------------------------------------------#
139 |         image_data  = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)
140 | 
141 |         with torch.no_grad():
142 |             images = torch.from_numpy(image_data)
143 |             if self.cuda:
144 |                 images = images.cuda()
145 | 
146 |             roi_cls_locs, roi_scores, rois, _ = self.net(images)
147 |             #-------------------------------------------------------------#
148 |             #   利用classifier的预测结果对建议框进行解码，获得预测框
149 |             #-------------------------------------------------------------#
150 |             results = self.bbox_util.forward(roi_cls_locs, roi_scores, rois, image_shape, input_shape, 
151 |                                                     nms_iou = self.nms_iou, confidence = self.confidence)
152 |             #--------------------------------------#
153 |             #   如果没有检测到物体，则返回原图
154 |             #--------------------------------------#
155 |             if len(results[0]) <= 0:
156 |                 return 
157 | 
158 |             top_label   = np.array(results[0][:, 5], dtype = 'int32')
159 |             top_conf    = results[0][:, 4]
160 |             top_boxes   = results[0][:, :4]
161 | 
162 |         top_100     = np.argsort(top_conf)[::-1][:self.max_boxes]
163 |         top_boxes   = top_boxes[top_100]
164 |         top_conf    = top_conf[top_100]
165 |         top_label   = top_label[top_100]
166 | 
167 |         for i, c in list(enumerate(top_label)):
168 |             predicted_class = self.class_names[int(c)]
169 |             box             = top_boxes[i]
170 |             score           = str(top_conf[i])
171 | 
172 |             top, left, bottom, right = box
173 |             if predicted_class not in class_names:
174 |                 continue
175 | 
176 |             f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom))))
177 | 
178 |         f.close()
179 |         return 
180 |     
181 |     def on_epoch_end(self, epoch):
182 |         if epoch % self.period == 0 and self.eval_flag:
183 |             if not os.path.exists(self.map_out_path):
184 |                 os.makedirs(self.map_out_path)
185 |             if not os.path.exists(os.path.join(self.map_out_path, "ground-truth")):
186 |                 os.makedirs(os.path.join(self.map_out_path, "ground-truth"))
187 |             if not os.path.exists(os.path.join(self.map_out_path, "detection-results")):
188 |                 os.makedirs(os.path.join(self.map_out_path, "detection-results"))
189 |             print("Get map.")
190 |             for annotation_line in tqdm(self.val_lines):
191 |                 line        = annotation_line.split()
192 |                 image_id    = os.path.basename(line[0]).split('.')[0]
193 |                 #------------------------------#
194 |                 #   读取图像并转换成RGB图像
195 |                 #------------------------------#
196 |                 image       = Image.open(line[0])
197 |                 #------------------------------#
198 |                 #   获得预测框
199 |                 #------------------------------#
200 |                 gt_boxes    = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
201 |                 #------------------------------#
202 |                 #   获得预测txt
203 |                 #------------------------------#
204 |                 self.get_map_txt(image_id, image, self.class_names, self.map_out_path)
205 |                 
206 |                 #------------------------------#
207 |                 #   获得真实框txt
208 |                 #------------------------------#
209 |                 with open(os.path.join(self.map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f:
210 |                     for box in gt_boxes:
211 |                         left, top, right, bottom, obj = box
212 |                         obj_name = self.class_names[obj]
213 |                         new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))
214 |                         
215 |             print("Calculate Map.")
216 |             try:
217 |                 temp_map = get_coco_map(class_names = self.class_names, path = self.map_out_path)[1]
218 |             except:
219 |                 temp_map = get_map(self.MINOVERLAP, False, path = self.map_out_path)
220 |             self.maps.append(temp_map)
221 |             self.epoches.append(epoch)
222 | 
223 |             with open(os.path.join(self.log_dir, "epoch_map.txt"), 'a') as f:
224 |                 f.write(str(temp_map))
225 |                 f.write("\n")
226 |             
227 |             plt.figure()
228 |             plt.plot(self.epoches, self.maps, 'red', linewidth = 2, label='train map')
229 | 
230 |             plt.grid(True)
231 |             plt.xlabel('Epoch')
232 |             plt.ylabel('Map %s'%str(self.MINOVERLAP))
233 |             plt.title('A Map Curve')
234 |             plt.legend(loc="upper right")
235 | 
236 |             plt.savefig(os.path.join(self.log_dir, "epoch_map.png"))
237 |             plt.cla()
238 |             plt.close("all")
239 | 
240 |             print("Get map done.")
241 |             shutil.rmtree(self.map_out_path)
242 | 


--------------------------------------------------------------------------------
/utils/dataloader.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import torch
  4 | from PIL import Image
  5 | from torch.utils.data.dataset import Dataset
  6 | 
  7 | from utils.utils import cvtColor, preprocess_input
  8 | 
  9 | 
 10 | class FRCNNDataset(Dataset):
 11 |     def __init__(self, annotation_lines, input_shape = [600, 600], train = True):
 12 |         self.annotation_lines   = annotation_lines
 13 |         self.length             = len(annotation_lines)
 14 |         self.input_shape        = input_shape
 15 |         self.train              = train
 16 | 
 17 |     def __len__(self):
 18 |         return self.length
 19 | 
 20 |     def __getitem__(self, index):
 21 |         index       = index % self.length
 22 |         #---------------------------------------------------#
 23 |         #   训练时进行数据的随机增强
 24 |         #   验证时不进行数据的随机增强
 25 |         #---------------------------------------------------#
 26 |         image, y    = self.get_random_data(self.annotation_lines[index], self.input_shape[0:2], random = self.train)
 27 |         image       = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))
 28 |         box_data    = np.zeros((len(y), 5))
 29 |         if len(y) > 0:
 30 |             box_data[:len(y)] = y
 31 | 
 32 |         box         = box_data[:, :4]
 33 |         label       = box_data[:, -1]
 34 |         return image, box, label
 35 | 
 36 |     def rand(self, a=0, b=1):
 37 |         return np.random.rand()*(b-a) + a
 38 | 
 39 |     def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True):
 40 |         line = annotation_line.split()
 41 |         #------------------------------#
 42 |         #   读取图像并转换成RGB图像
 43 |         #------------------------------#
 44 |         # image   = Image.open('./PCB_DataSet/JPEGImages/'+line[0]+'.jpg')
 45 |         image   = Image.open(line[0])
 46 |         image   = cvtColor(image)
 47 |         #------------------------------#
 48 |         #   获得图像的高宽与目标高宽
 49 |         #------------------------------#
 50 |         iw, ih  = image.size
 51 |         h, w    = input_shape
 52 |         #------------------------------#
 53 |         #   获得预测框
 54 |         #------------------------------#
 55 |         box     = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
 56 | 
 57 |         if not random:
 58 |             scale = min(w/iw, h/ih)
 59 |             nw = int(iw*scale)
 60 |             nh = int(ih*scale)
 61 |             dx = (w-nw)//2
 62 |             dy = (h-nh)//2
 63 | 
 64 |             #---------------------------------#
 65 |             #   将图像多余的部分加上灰条
 66 |             #---------------------------------#
 67 |             image       = image.resize((nw,nh), Image.BICUBIC)
 68 |             new_image   = Image.new('RGB', (w,h), (128,128,128))
 69 |             new_image.paste(image, (dx, dy))
 70 |             image_data  = np.array(new_image, np.float32)
 71 | 
 72 |             #---------------------------------#
 73 |             #   对真实框进行调整
 74 |             #---------------------------------#
 75 |             if len(box)>0:
 76 |                 np.random.shuffle(box)
 77 |                 box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
 78 |                 box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
 79 |                 box[:, 0:2][box[:, 0:2]<0] = 0
 80 |                 box[:, 2][box[:, 2]>w] = w
 81 |                 box[:, 3][box[:, 3]>h] = h
 82 |                 box_w = box[:, 2] - box[:, 0]
 83 |                 box_h = box[:, 3] - box[:, 1]
 84 |                 box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
 85 | 
 86 |             return image_data, box
 87 |                 
 88 |         #------------------------------------------#
 89 |         #   对图像进行缩放并且进行长和宽的扭曲
 90 |         #------------------------------------------#
 91 |         new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
 92 |         scale = self.rand(.25, 2)
 93 |         if new_ar < 1:
 94 |             nh = int(scale*h)
 95 |             nw = int(nh*new_ar)
 96 |         else:
 97 |             nw = int(scale*w)
 98 |             nh = int(nw/new_ar)
 99 |         image = image.resize((nw,nh), Image.BICUBIC)
100 | 
101 |         #------------------------------------------#
102 |         #   将图像多余的部分加上灰条
103 |         #------------------------------------------#
104 |         dx = int(self.rand(0, w-nw))
105 |         dy = int(self.rand(0, h-nh))
106 |         new_image = Image.new('RGB', (w,h), (128,128,128))
107 |         new_image.paste(image, (dx, dy))
108 |         image = new_image
109 | 
110 |         #------------------------------------------#
111 |         #   翻转图像
112 |         #------------------------------------------#
113 |         flip = self.rand()<.5
114 |         if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
115 | 
116 |         image_data      = np.array(image, np.uint8)
117 |         #---------------------------------#
118 |         #   对图像进行色域变换
119 |         #   计算色域变换的参数
120 |         #---------------------------------#
121 |         r               = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
122 |         #---------------------------------#
123 |         #   将图像转到HSV上
124 |         #---------------------------------#
125 |         hue, sat, val   = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
126 |         dtype           = image_data.dtype
127 |         #---------------------------------#
128 |         #   应用变换
129 |         #---------------------------------#
130 |         x       = np.arange(0, 256, dtype=r.dtype)
131 |         lut_hue = ((x * r[0]) % 180).astype(dtype)
132 |         lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
133 |         lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
134 | 
135 |         image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
136 |         image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)
137 | 
138 |         #---------------------------------#
139 |         #   对真实框进行调整
140 |         #---------------------------------#
141 |         if len(box)>0:
142 |             np.random.shuffle(box)
143 |             box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
144 |             box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
145 |             if flip: box[:, [0,2]] = w - box[:, [2,0]]
146 |             box[:, 0:2][box[:, 0:2]<0] = 0
147 |             box[:, 2][box[:, 2]>w] = w
148 |             box[:, 3][box[:, 3]>h] = h
149 |             box_w = box[:, 2] - box[:, 0]
150 |             box_h = box[:, 3] - box[:, 1]
151 |             box = box[np.logical_and(box_w>1, box_h>1)] 
152 |         
153 |         return image_data, box
154 | 
155 | # DataLoader中collate_fn使用
156 | def frcnn_dataset_collate(batch):
157 |     images = []
158 |     bboxes = []
159 |     labels = []
160 |     for img, box, label in batch:
161 |         images.append(img)
162 |         bboxes.append(box)
163 |         labels.append(label)
164 |     images = torch.from_numpy(np.array(images))
165 |     return images, bboxes, labels
166 | 
167 | 


--------------------------------------------------------------------------------
/utils/kmeans_anchors/Bikmeans_anchors.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from read_voc import VOCDataSet
  3 | 
  4 | # bik-means算法
  5 | """
  6 | Args:
  7 |     boxes: 需要聚类的bboxes
  8 |     k: 簇数(聚成几类)
  9 |     dist: 更新簇坐标的方法(默认使用中位数，比均值效果略好)
 10 | """
 11 | 
 12 | def load_data_set(fileName):
 13 |     """加载数据集"""
 14 |     dataSet = []  # 初始化一个空列表
 15 |     fr = open(fileName)
 16 |     for line in fr.readlines():
 17 |         # 按tab分割字段，将每行元素分割为list的元素
 18 |         curLine = line.strip().split('\t')
 19 |         # 用list函数把map函数返回的迭代器遍历展开成一个列表
 20 |         # 其中map(float, curLine)表示把列表的每个值用float函数转成float型，并返回迭代器
 21 |         fltLine = list(map(float, curLine))
 22 |         dataSet.append(fltLine)
 23 |     return dataSet
 24 | 
 25 | 
 26 | def distance_euclidean(vector1, vector2):
 27 |     """计算欧氏距离"""
 28 |     return np.sqrt(sum(np.power(vector1-vector2, 2)))  # 返回两个向量的距离
 29 | 
 30 | 
 31 | def rand_center(dataSet, k):
 32 |     """构建一个包含K个随机质心的集合"""
 33 |     n = np.shape(dataSet)[1]  # 获取样本特征值
 34 | 
 35 |     # 初始化质心，创建(k,n)个以0填充的矩阵
 36 |     centroids = np.mat(np.zeros((k, n)))  # 每个质心有n个坐标值，总共要k个质心
 37 |     # 遍历特征值
 38 |     for j in range(n):
 39 |         # 计算每一列的最小值
 40 |         minJ = min(dataSet[:, j])
 41 |         # 计算每一列的范围值
 42 |         rangeJ = float(max(dataSet[:, j]) - minJ)
 43 |         # 计算每一列的质心，并将其赋给centroids
 44 |         centroids[:, j] = minJ + rangeJ * np.random.rand(k, 1)
 45 |     
 46 |     # 返回质心
 47 |     return centroids
 48 | 
 49 | 
 50 | def k_means(dataSet, k, distMeas=distance_euclidean, creatCent=rand_center):
 51 |     """K-means聚类算法"""
 52 |     m = np.shape(dataSet)[0]  # 行数
 53 |     # 建立簇分配结果矩阵，第一列存放该数据所属中心点，第二列是该数据到中心点的距离
 54 |     clusterAssment = np.mat(np.zeros((m, 2)))
 55 |     centroids = creatCent(dataSet, k)  # 质心，即聚类点
 56 |     # 用来判定聚类是否收敛
 57 |     clusterChanged = True
 58 |     while clusterChanged:
 59 |         clusterChanged = False
 60 |         for i in range(m):  # 把每一个数据划分到离他最近的中心点
 61 |             minDist = np.inf  # 无穷大
 62 |             minIndex = -1  # 初始化
 63 |             for j in range(k):
 64 |                 # 计算各点与新的聚类中心的距离
 65 |                 distJI = distMeas(centroids[j, :], dataSet[i, :])
 66 |                 if distJI < minDist:
 67 |                     # 如果第i个数据点到第j中心点更近，则将i归属为j
 68 |                     minDist = distJI
 69 |                     minIndex = j
 70 |             # 如果分配发生变化，则需要继续迭代
 71 |             if clusterAssment[i, 0] != minIndex:
 72 |                 clusterChanged = True
 73 |             # 并将第i个数据点的分配情况存入字典
 74 |             clusterAssment[i, :] = minIndex, minDist**2
 75 |         # print(centroids)
 76 |         for cent in range(k):  # 重新计算中心点
 77 |             # 去第一列等于cent的所有列
 78 |             ptsInClust = dataSet[np.nonzero(clusterAssment[:, 0].A == cent)[0]]
 79 |             # 算出这些数据的中心点
 80 |             centroids[cent, :] = np.mean(ptsInClust, axis=0)
 81 |     return centroids, clusterAssment
 82 | 
 83 | 
 84 | def biKmeans(dataMat, k, distMeas=distance_euclidean):
 85 |     """二分k-means算法"""
 86 |     m = np.shape(dataMat)[0]
 87 |     # 创建一个矩阵来存储数据集中每个点的簇分配结果及平方误差
 88 |     clusterAssment = np.mat(np.zeros((m, 2)))
 89 |     # 根据数据集均值获取第一个质心
 90 |     centroid0 = np.mean(dataMat, axis=0).tolist()[0]
 91 |     # 用一个列表来保留所有的质心
 92 |     centList = [centroid0]
 93 |     # 遍历数据集中所有点来计算每个点到质心的距离
 94 |     for j in range(m):
 95 |         clusterAssment[j, 1] = distMeas(np.mat(centroid0), dataMat[j, :]) ** 2
 96 |     # 对簇不停的进行划分,直到得到想要的簇数目为止
 97 |     while (len(centList) < k):
 98 |         # 初始化最小SSE为无穷大,用于比较划分前后的SSE
 99 |         lowestSSE = np.inf  # 无穷大
100 |         # 通过考察簇列表中的值来获得当前簇的数目,遍历所有的簇来决定最佳的簇进行划分
101 |         for i in range(len(centList)):
102 |             # 对每一个簇,将该簇中的所有点看成一个小的数据集
103 |             ptsInCurrCluster = dataMat[np.nonzero(
104 |                 clusterAssment[:, 0].A == i)[0], :]
105 |             # 将ptsInCurrCluster输入到函数kMeans中进行处理,k=2,
106 |             # kMeans会生成两个质心(簇),同时给出每个簇的误差值
107 |             centroidMat, splitClustAss = k_means(ptsInCurrCluster, 2, distMeas)
108 |             # 划分数据的SSE与未划分的之和作为本次划分的总误差
109 |             sseSplit = sum(splitClustAss[:, 1])  # 划分数据集的SSE
110 |             sseNotSplit = sum(clusterAssment[np.nonzero(clusterAssment[:, 0].A != i)[0], 1])  # 未划分数据集的SSE
111 |             print('划分数据集的SSE, and 未划分的SSE: ', sseSplit, sseNotSplit)
112 |             # 将划分与未划分的SSE求和与最小SSE相比较 确定是否划分
113 |             if (sseSplit + sseNotSplit) < lowestSSE:
114 |                 bestCentToSplit = i  # 当前最适合做划分的中心点
115 |                 bestNewCents = centroidMat  # 划分后的两个新中心点
116 |                 bestClustAss = splitClustAss.copy()  # 划分点的聚类信息
117 |                 lowestSSE = sseSplit + sseNotSplit
118 |         # 找出最好的簇分配结果
119 |         # 调用kmeans函数并且指定簇数为2时,会得到两个编号分别为0和1的结果簇
120 |         bestClustAss[np.nonzero(bestClustAss[:, 0].A == 1)[0], 0] = len(centList)
121 |         # 更新为最佳质心
122 |         bestClustAss[np.nonzero(bestClustAss[:, 0].A == 0)[0], 0] = bestCentToSplit
123 |         print('本次最适合划分的质心: ', bestCentToSplit)
124 |         print('被划分数据集样本数量: ', len(bestClustAss))
125 |         # 更新质心列表
126 |         # 更新原质心list中的第i个质心为使用二分kMeans后bestNewCents的第一个质心
127 |         centList[bestCentToSplit] = bestNewCents[0, :].tolist()[0]
128 |         # 添加bestNewCents的第二个质心
129 |         centList.append(bestNewCents[1, :].tolist()[0])
130 |         # 重新分配最好簇下的数据(质心)以及SSE
131 |         clusterAssment[np.nonzero(clusterAssment[:, 0].A == bestCentToSplit)[0], :] = bestClustAss
132 | 
133 |     return np.mat(centList), clusterAssment
134 | 
135 | def main(img_size=600, k=9, thr=0.25, gen=1000):
136 |     # 从数据集中读取所有图片的wh以及对应bboxes的wh
137 |     dataset = VOCDataSet(voc_root="/data", year="2012", txt_name="train.txt")
138 |     im_wh, boxes_wh = dataset.get_info()
139 | 
140 | 
141 | if __name__ == "__main__":
142 |     import matplotlib.pyplot as plt
143 |     # nine_anchors = generate_anchor_base()
144 |     # print(nine_anchors)
145 | 
146 |     # height, width, feat_stride  = 38,38,16
147 |     # anchors_all                 = _enumerate_shifted_anchor(nine_anchors, feat_stride, height, width)
148 |     # print(np.shape(anchors_all))
149 |     
150 |     # fig     = plt.figure()
151 |     # ax      = fig.add_subplot(111)
152 |     # plt.ylim(-300,900)
153 |     # plt.xlim(-300,900)
154 |     # shift_x = np.arange(0, width * feat_stride, feat_stride)
155 |     # shift_y = np.arange(0, height * feat_stride, feat_stride)
156 |     # shift_x, shift_y = np.meshgrid(shift_x, shift_y)
157 |     # plt.scatter(shift_x,shift_y)
158 |     # box_widths  = anchors_all[:,2]-anchors_all[:,0]
159 |     # box_heights = anchors_all[:,3]-anchors_all[:,1]
160 |     
161 |     # for i in [108, 109, 110, 111, 112, 113, 114, 115, 116]:
162 |     #     rect = plt.Rectangle([anchors_all[i, 0],anchors_all[i, 1]],box_widths[i],box_heights[i],color="r",fill=False)
163 |     #     ax.add_patch(rect)
164 |     # plt.show()
165 |     
166 |     # 测试biKmeans算法
167 |     datMat = np.mat(load_data_set(r'F:\Desktop\PCB_code\PCB_DataSet\trainval.txt'))
168 |     # 5个anchor框，
169 |     centList, clusterAssment = biKmeans(datMat, 5)
170 |     print("质心结果：", centList)
171 |     print("聚类结果：", clusterAssment)
172 |     # 可视化
173 |     plt.scatter(np.array(datMat)[:, 0], np.array(datMat)[:, 1], c=np.array(clusterAssment)[:, 0].T)
174 |     plt.scatter(centList[:, 0].tolist(), centList[:, 1].tolist(), c="r")
175 |     plt.show()
176 | 
177 | 


--------------------------------------------------------------------------------
/utils/kmeans_anchors/main.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | from tqdm import tqdm
 4 | from scipy.cluster.vq import kmeans
 5 | 
 6 | from read_voc import VOCDataSet
 7 | from yolo_kmeans import k_means, wh_iou
 8 | 
 9 | 
10 | def anchor_fitness(k: np.ndarray, wh: np.ndarray, thr: float):  # mutation fitness
11 |     r = wh[:, None] / k[None]
12 |     x = np.minimum(r, 1. / r).min(2)  # ratio metric
13 |     # x = wh_iou(wh, k)  # iou metric
14 |     best = x.max(1)
15 |     f = (best * (best > thr).astype(np.float32)).mean()  # fitness
16 |     bpr = (best > thr).astype(np.float32).mean()  # best possible recall
17 |     return f, bpr
18 | 
19 | 
20 | def main(img_size=512, n=9, thr=0.25, gen=1000):
21 |     # 从数据集中读取所有图片的wh以及对应bboxes的wh
22 |     dataset = VOCDataSet(voc_root="/data", year="2012", txt_name="train.txt")
23 |     im_wh, boxes_wh = dataset.get_info()
24 | 
25 |     # 最大边缩放到img_size
26 |     im_wh = np.array(im_wh, dtype=np.float32)
27 |     shapes = img_size * im_wh / im_wh.max(1, keepdims=True)
28 |     wh0 = np.concatenate([l * s for s, l in zip(shapes, boxes_wh)])  # wh
29 | 
30 |     # Filter 过滤掉小目标
31 |     i = (wh0 < 3.0).any(1).sum()
32 |     if i:
33 |         print(f'WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')
34 |     wh = wh0[(wh0 >= 2.0).any(1)]  # 只保留wh都大于等于2个像素的box
35 | 
36 |     # Kmeans calculation
37 |     # print(f'Running kmeans for {n} anchors on {len(wh)} points...')
38 |     # s = wh.std(0)  # sigmas for whitening
39 |     # k, dist = kmeans(wh / s, n, iter=30)  # points, mean distance
40 |     # assert len(k) == n, print(f'ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}')
41 |     # k *= s
42 |     k = k_means(wh, n)
43 | 
44 |     # 按面积排序
45 |     k = k[np.argsort(k.prod(1))]  # sort small to large
46 |     f, bpr = anchor_fitness(k, wh, thr)
47 |     print("kmeans: " + " ".join([f"[{int(i[0])}, {int(i[1])}]" for i in k]))
48 |     print(f"fitness: {f:.5f}, best possible recall: {bpr:.5f}")
49 | 
50 |     # Evolve
51 |     # 遗传算法(在kmeans的结果基础上变异mutation)
52 |     npr = np.random
53 |     f, sh, mp, s = anchor_fitness(k, wh, thr)[0], k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma
54 |     pbar = tqdm(range(gen), desc=f'Evolving anchors with Genetic Algorithm:')  # progress bar
55 |     for _ in pbar:
56 |         v = np.ones(sh)
57 |         while (v == 1).all():  # mutate until a change occurs (prevent duplicates)
58 |             v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
59 |         kg = (k.copy() * v).clip(min=2.0)
60 |         fg, bpr = anchor_fitness(kg, wh, thr)
61 |         if fg > f:
62 |             f, k = fg, kg.copy()
63 |             pbar.desc = f'Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'
64 | 
65 |     # 按面积排序
66 |     k = k[np.argsort(k.prod(1))]  # sort small to large
67 |     print("genetic: " + " ".join([f"[{int(i[0])}, {int(i[1])}]" for i in k]))
68 |     print(f"fitness: {f:.5f}, best possible recall: {bpr:.5f}")
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     main()
73 | 


--------------------------------------------------------------------------------
/utils/kmeans_anchors/plot_kmeans.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from matplotlib import pyplot as plt
 3 | np.random.seed(0)
 4 | 
 5 | colors = np.array(['blue', 'black'])
 6 | 
 7 | 
 8 | def plot_clusters(data, cls, clusters, title=""):
 9 |     if cls is None:
10 |         c = [colors[0]] * data.shape[0]
11 |     else:
12 |         c = colors[cls].tolist()
13 | 
14 |     plt.scatter(data[:, 0], data[:, 1], c=c)
15 |     for i, clus in enumerate(clusters):
16 |         plt.scatter(clus[0], clus[1], c='gold', marker='*', s=150)
17 |     plt.title(title)
18 |     plt.show()
19 |     plt.close()
20 | 
21 | 
22 | def distances(data, clusters):
23 |     xy1 = data[:, None]  # [N,1,2]
24 |     xy2 = clusters[None]  # [1,M,2]
25 |     d = np.sum(np.power(xy2 - xy1, 2), axis=-1)
26 |     return d
27 | 
28 | 
29 | def k_means(data, k, dist=np.mean):
30 |     """
31 |     k-means methods
32 |     Args:
33 |         data: 需要聚类的data
34 |         k: 簇数(聚成几类)
35 |         dist: 更新簇坐标的方法
36 |     """
37 |     data_number = data.shape[0]
38 |     last_nearest = np.zeros((data_number,))
39 | 
40 |     # init k clusters
41 |     clusters = data[np.random.choice(data_number, k, replace=False)]
42 |     print(f"random cluster: \n {clusters}")
43 |     # plot
44 |     plot_clusters(data, None, clusters, "random clusters")
45 | 
46 |     step = 0
47 |     while True:
48 |         d = distances(data, clusters)
49 |         current_nearest = np.argmin(d, axis=1)
50 | 
51 |         # plot
52 |         plot_clusters(data, current_nearest, clusters, f"step {step}")
53 |         
54 |         if (last_nearest == current_nearest).all():
55 |             break  # clusters won't change
56 |         for cluster in range(k):
57 |             # update clusters
58 |             clusters[cluster] = dist(data[current_nearest == cluster], axis=0)
59 |         last_nearest = current_nearest
60 |         step += 1
61 | 
62 |     return clusters
63 | 
64 | 
65 | def main():
66 |     x1, y1 = [np.random.normal(loc=1., size=150) for _ in range(2)]
67 |     x2, y2 = [np.random.normal(loc=5., size=150) for _ in range(2)]
68 | 
69 |     x = np.concatenate([x1, x2])
70 |     y = np.concatenate([y1, y2])
71 | 
72 |     plt.scatter(x, y, c='blue')
73 |     plt.title("initial data")
74 |     plt.show()
75 |     plt.close()
76 | 
77 |     clusters = k_means(np.concatenate([x[:, None], y[:, None]], axis=-1), k=2)
78 |     print(f"k-means fluster: \n {clusters}")
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     main()
83 | 


--------------------------------------------------------------------------------
/utils/kmeans_anchors/read_voc.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from tqdm import tqdm
 3 | from lxml import etree
 4 | 
 5 | 
 6 | class VOCDataSet(object):
 7 |     def __init__(self, voc_root, txt_name: str = "train.txt"):
 8 |         self.root = voc_root
 9 |         self.annotations_root = os.path.join(self.root, "Annotations")
10 | 
11 |         # read train.txt or val.txt file
12 |         txt_path = os.path.join(self.root, "ImageSets", txt_name)
13 |         assert os.path.exists(txt_path), "not found {} file.".format(txt_name)
14 | 
15 |         with open(txt_path) as read:
16 |             self.xml_list = [os.path.join(self.annotations_root, line.strip() + ".xml")
17 |                              for line in read.readlines() if len(line.strip()) > 0]
18 | 
19 |         # check file
20 |         assert len(self.xml_list) > 0, "in '{}' file does not find any information.".format(txt_path)
21 |         for xml_path in self.xml_list:
22 |             assert os.path.exists(xml_path), "not found '{}' file.".format(xml_path)
23 | 
24 |     def __len__(self):
25 |         return len(self.xml_list)
26 | 
27 |     def parse_xml_to_dict(self, xml):
28 |         """
29 |         将xml文件解析成字典形式，参考tensorflow的recursive_parse_xml_to_dict
30 |         Args:
31 |             xml: xml tree obtained by parsing XML file contents using lxml.etree
32 | 
33 |         Returns:
34 |             Python dictionary holding XML contents.
35 |         """
36 | 
37 |         if len(xml) == 0:  # 遍历到底层，直接返回tag对应的信息
38 |             return {xml.tag: xml.text}
39 | 
40 |         result = {}
41 |         for child in xml:
42 |             child_result = self.parse_xml_to_dict(child)  # 递归遍历标签信息
43 |             if child.tag != 'object':
44 |                 result[child.tag] = child_result[child.tag]
45 |             else:
46 |                 if child.tag not in result:  # 因为object可能有多个，所以需要放入列表里
47 |                     result[child.tag] = []
48 |                 result[child.tag].append(child_result[child.tag])
49 |         return {xml.tag: result}
50 | 
51 |     def get_info(self):
52 |         im_wh_list = []
53 |         boxes_wh_list = []
54 |         for xml_path in tqdm(self.xml_list, desc="read data info."):
55 |             # read xml
56 |             with open(xml_path) as fid:
57 |                 xml_str = fid.read()
58 |             xml = etree.fromstring(xml_str)
59 |             data = self.parse_xml_to_dict(xml)["annotation"]
60 |             im_height = int(data["size"]["height"])
61 |             im_width = int(data["size"]["width"])
62 | 
63 |             wh = []
64 |             for obj in data["object"]:
65 |                 xmin = float(obj["bndbox"]["xmin"])
66 |                 xmax = float(obj["bndbox"]["xmax"])
67 |                 ymin = float(obj["bndbox"]["ymin"])
68 |                 ymax = float(obj["bndbox"]["ymax"])
69 |                 wh.append([(xmax - xmin) / im_width, (ymax - ymin) / im_height])
70 | 
71 |             if len(wh) == 0:
72 |                 continue
73 | 
74 |             im_wh_list.append([im_width, im_height])
75 |             boxes_wh_list.append(wh)
76 | 
77 |         return im_wh_list, boxes_wh_list
78 | 


--------------------------------------------------------------------------------
/utils/kmeans_anchors/yolo_kmeans.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def wh_iou(wh1, wh2):
 5 |     # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
 6 |     wh1 = wh1[:, None]  # [N,1,2]
 7 |     wh2 = wh2[None]  # [1,M,2]
 8 |     inter = np.minimum(wh1, wh2).prod(2)  # [N,M]
 9 |     return inter / (wh1.prod(2) + wh2.prod(2) - inter)  # iou = inter / (area1 + area2 - inter)
10 | 
11 | 
12 | def k_means(boxes, k, dist=np.median):
13 |     """
14 |     yolo k-means methods
15 |     refer: https://github.com/qqwweee/keras-yolo3/blob/master/kmeans.py
16 |     Args:
17 |         boxes: 需要聚类的bboxes
18 |         k: 簇数(聚成几类)
19 |         dist: 更新簇坐标的方法(默认使用中位数，比均值效果略好)
20 |     """
21 |     box_number = boxes.shape[0]
22 |     last_nearest = np.zeros((box_number,))
23 |     # np.random.seed(0)  # 固定随机数种子
24 | 
25 |     # init k clusters
26 |     clusters = boxes[np.random.choice(box_number, k, replace=False)]
27 | 
28 |     while True:
29 |         distances = 1 - wh_iou(boxes, clusters)
30 |         current_nearest = np.argmin(distances, axis=1)
31 |         if (last_nearest == current_nearest).all():
32 |             break  # clusters won't change
33 |         for cluster in range(k):
34 |             # update clusters
35 |             clusters[cluster] = dist(boxes[current_nearest == cluster], axis=0)
36 | 
37 |         last_nearest = current_nearest
38 | 
39 |     return clusters
40 | 


--------------------------------------------------------------------------------
/utils/soft_nms.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | def bbox_iou(self, box1, box2, x1y1x2y2=True):
  4 |     """
  5 |         计算IOU
  6 |     """
  7 |     if not x1y1x2y2:
  8 |         b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
  9 |         b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
 10 |         b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
 11 |         b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
 12 |     else:
 13 |         b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
 14 |         b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
 15 | 
 16 |     inter_rect_x1 = torch.max(b1_x1, b2_x1)
 17 |     inter_rect_y1 = torch.max(b1_y1, b2_y1)
 18 |     inter_rect_x2 = torch.min(b1_x2, b2_x2)
 19 |     inter_rect_y2 = torch.min(b1_y2, b2_y2)
 20 | 
 21 |     inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1, min=0) * \
 22 |                 torch.clamp(inter_rect_y2 - inter_rect_y1, min=0)
 23 |                 
 24 |     b1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1)
 25 |     b2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1)
 26 |     
 27 |     iou = inter_area / torch.clamp(b1_area + b2_area - inter_area, min = 1e-6)
 28 | 
 29 |     return iou
 30 | 
 31 | def non_max_suppression(self, prediction, num_classes, input_shape, image_shape, letterbox_image, conf_thres=0.5, nms_thres=0.4, sigma=0.5):
 32 |     #----------------------------------------------------------#
 33 |     #   将预测结果的格式转换成左上角右下角的格式。
 34 |     #   prediction  [batch_size, num_anchors, 85]
 35 |     #----------------------------------------------------------#
 36 |     box_corner          = prediction.new(prediction.shape)
 37 |     box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
 38 |     box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
 39 |     box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
 40 |     box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
 41 |     prediction[:, :, :4] = box_corner[:, :, :4]
 42 | 
 43 |     output = [None for _ in range(len(prediction))]
 44 |     for i, image_pred in enumerate(prediction):
 45 |         #----------------------------------------------------------#
 46 |         #   对种类预测部分取max。
 47 |         #   class_conf  [num_anchors, 1]    种类置信度
 48 |         #   class_pred  [num_anchors, 1]    种类
 49 |         #----------------------------------------------------------#
 50 |         class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True)
 51 | 
 52 |         #----------------------------------------------------------#
 53 |         #   利用置信度进行第一轮筛选
 54 |         #----------------------------------------------------------#
 55 |         conf_mask = (image_pred[:, 4] * class_conf[:, 0] >= conf_thres).squeeze()
 56 | 
 57 |         #----------------------------------------------------------#
 58 |         #   根据置信度进行预测结果的筛选
 59 |         #----------------------------------------------------------#
 60 |         image_pred = image_pred[conf_mask]
 61 |         class_conf = class_conf[conf_mask]
 62 |         class_pred = class_pred[conf_mask]
 63 |         if not image_pred.size(0):
 64 |             continue
 65 |         #-------------------------------------------------------------------------#
 66 |         #   detections  [num_anchors, 7]
 67 |         #   7的内容为：x1, y1, x2, y2, obj_conf, class_conf, class_pred
 68 |         #-------------------------------------------------------------------------#
 69 |         detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)
 70 | 
 71 |         #------------------------------------------#
 72 |         #   获得预测结果中包含的所有种类
 73 |         #------------------------------------------#
 74 |         unique_labels = detections[:, -1].cpu().unique()
 75 | 
 76 |         if prediction.is_cuda:
 77 |             unique_labels = unique_labels.cuda()
 78 |             detections = detections.cuda()
 79 | 
 80 |         for c in unique_labels:
 81 |             #------------------------------------------#
 82 |             #   获得某一类得分筛选后全部的预测结果
 83 |             #------------------------------------------#
 84 |             detections_class = detections[detections[:, -1] == c]
 85 | 
 86 |             # #------------------------------------------#
 87 |             # #   使用官方自带的非极大抑制会速度更快一些！
 88 |             # #------------------------------------------#
 89 |             # keep = nms(
 90 |             #     detections_class[:, :4],
 91 |             #     detections_class[:, 4] * detections_class[:, 5],
 92 |             #     nms_thres
 93 |             # )
 94 |             # max_detections = detections_class[keep]
 95 |             
 96 |             # 按照存在物体的置信度排序
 97 |             _, conf_sort_index = torch.sort(detections_class[:, 4]*detections_class[:, 5], descending=True)
 98 |             detections_class = detections_class[conf_sort_index]
 99 |             # 进行非极大抑制
100 |             max_detections = []
101 |             while detections_class.size(0):
102 |                 # 取出这一类置信度最高的，一步一步往下判断，判断重合程度是否大于nms_thres，如果是则去除掉
103 |                 max_detections.append(detections_class[0].unsqueeze(0))
104 |                 if len(detections_class) == 1:
105 |                     break
106 |                 ious                    = self.bbox_iou(max_detections[-1], detections_class[1:])
107 |                 # 计算soft-nms新权重，将获得的iou取高斯指数后*原得分
108 |                 detections_class[1:, 4] = torch.exp(-(ious * ious) / sigma) * detections_class[1:, 4]
109 |                 detections_class        = detections_class[1:]
110 |                 # 对新的得分进行重新排序
111 |                 detections_class        = detections_class[detections_class[:, 4] >= conf_thres]
112 |                 arg_sort                = torch.argsort(detections_class[:, 4], descending = True)
113 |                 detections_class        = detections_class[arg_sort]
114 |             # 堆叠
115 |             max_detections = torch.cat(max_detections).data
116 |             
117 |             # Add max detections to outputs
118 |             output[i] = max_detections if output[i] is None else torch.cat((output[i], max_detections))
119 |         
120 |         if output[i] is not None:
121 |             output[i]           = output[i].cpu().numpy()
122 |             box_xy, box_wh      = (output[i][:, 0:2] + output[i][:, 2:4])/2, output[i][:, 2:4] - output[i][:, 0:2]
123 |             output[i][:, :4]    = self.yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image)
124 |     
125 |     return output
126 | 
127 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from PIL import Image
 3 | 
 4 | #---------------------------------------------------------#
 5 | #   将图像转换成RGB图像，防止灰度图在预测时报错。
 6 | #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
 7 | #---------------------------------------------------------#
 8 | def cvtColor(image):
 9 |     if len(np.shape(image)) == 3 and np.shape(image)[2] == 3:
10 |         return image 
11 |     else:
12 |         image = image.convert('RGB')
13 |         return image 
14 | 
15 | #---------------------------------------------------#
16 | #   对输入图像进行resize
17 | #---------------------------------------------------#
18 | def resize_image(image, size):
19 |     w, h        = size
20 |     new_image   = image.resize((w, h), Image.BICUBIC)
21 |     return new_image
22 | 
23 | #---------------------------------------------------#
24 | #   获得类
25 | #---------------------------------------------------#
26 | def get_classes(classes_path):
27 |     with open(classes_path, encoding='utf-8') as f:
28 |         class_names = f.readlines()
29 |     class_names = [c.strip() for c in class_names]
30 |     return class_names, len(class_names)
31 | 
32 | #---------------------------------------------------#
33 | #   获得学习率
34 | #---------------------------------------------------#
35 | def get_lr(optimizer):
36 |     for param_group in optimizer.param_groups:
37 |         return param_group['lr']
38 | 
39 | def preprocess_input(image):
40 |     image /= 255.0
41 |     return image
42 | 
43 | def show_config(**kwargs):
44 |     print('Configurations:')
45 |     print('-' * 70)
46 |     print('|%25s | %40s|' % ('keys', 'values'))
47 |     print('-' * 70)
48 |     for key, value in kwargs.items():
49 |         print('|%25s | %40s|' % (str(key), str(value)))
50 |     print('-' * 70)
51 | 
52 | def get_new_img_size(height, width, img_min_side=600):
53 |     if width <= height:
54 |         f = float(img_min_side) / width
55 |         resized_height = int(f * height)
56 |         resized_width = int(img_min_side)
57 |     else:
58 |         f = float(img_min_side) / height
59 |         resized_width = int(f * width)
60 |         resized_height = int(img_min_side)
61 | 
62 |     return resized_height, resized_width
63 | 


--------------------------------------------------------------------------------
/utils/utils_bbox.py:
--------------------------------------------------------------------------------
  1 | from matplotlib import pyplot as plt
  2 | from numpy import *
  3 | import numpy as np
  4 | import torch
  5 | from torch.nn import functional as F
  6 | from torchvision.ops import nms
  7 | 
  8 | 
  9 | #src_bbox先验框，loc建议框结果
 10 | def loc2bbox(src_bbox, loc):
 11 |     if src_bbox.size()[0] == 0:
 12 |         return torch.zeros((0, 4), dtype=loc.dtype)
 13 | 
 14 |     #计算先验框的宽、高，中心坐标
 15 |     src_width   = torch.unsqueeze(src_bbox[:, 2] - src_bbox[:, 0], -1)
 16 |     src_height  = torch.unsqueeze(src_bbox[:, 3] - src_bbox[:, 1], -1)
 17 |     src_ctr_x   = torch.unsqueeze(src_bbox[:, 0], -1) + 0.5 * src_width
 18 |     src_ctr_y   = torch.unsqueeze(src_bbox[:, 1], -1) + 0.5 * src_height
 19 | 
 20 |     #对先验框进行大小、坐标调整参数
 21 |     #[:,0::4]:所有行中，列下标为0,1,2，。。。改变其二维表格中的值。
 22 |     dx          = loc[:, 0::4]
 23 |     dy          = loc[:, 1::4]
 24 |     dw          = loc[:, 2::4]
 25 |     dh          = loc[:, 3::4]
 26 | 
 27 |     #先验框调整过程 
 28 |     ctr_x = dx * src_width + src_ctr_x
 29 |     ctr_y = dy * src_height + src_ctr_y
 30 |     w = torch.exp(dw) * src_width
 31 |     h = torch.exp(dh) * src_height
 32 | 
 33 |     dst_bbox = torch.zeros_like(loc)
 34 |     dst_bbox[:, 0::4] = ctr_x - 0.5 * w
 35 |     dst_bbox[:, 1::4] = ctr_y - 0.5 * h
 36 |     dst_bbox[:, 2::4] = ctr_x + 0.5 * w
 37 |     dst_bbox[:, 3::4] = ctr_y + 0.5 * h
 38 | 
 39 |     return dst_bbox
 40 | 
 41 | class DecodeBox():
 42 |     def __init__(self, std, num_classes):
 43 |         self.std            = std
 44 |         self.num_classes    = num_classes + 1    
 45 | 
 46 |     def frcnn_correct_boxes(self, box_xy, box_wh, input_shape, image_shape):
 47 |         #-----------------------------------------------------------------#
 48 |         #   把y轴放前面是因为方便预测框和图像的宽高进行相乘
 49 |         #-----------------------------------------------------------------#
 50 |         box_yx = box_xy[..., ::-1]
 51 |         box_hw = box_wh[..., ::-1]
 52 |         input_shape = np.array(input_shape)
 53 |         image_shape = np.array(image_shape)
 54 | 
 55 |         box_mins    = box_yx - (box_hw / 2.)
 56 |         box_maxes   = box_yx + (box_hw / 2.)
 57 |         boxes  = np.concatenate([box_mins[..., 0:1], box_mins[..., 1:2], box_maxes[..., 0:1], box_maxes[..., 1:2]], axis=-1)
 58 |         boxes *= np.concatenate([image_shape, image_shape], axis=-1)
 59 |         return boxes
 60 | 
 61 |     def forward(self, roi_cls_locs, roi_scores, rois, image_shape, input_shape, nms_iou = 0.3, confidence = 0.5):
 62 |         results = []
 63 |         bs      = len(roi_cls_locs)
 64 |         #--------------------------------#
 65 |         #   batch_size, num_rois, 4
 66 |         #--------------------------------#
 67 |         rois    = rois.view((bs, -1, 4))
 68 |         #----------------------------------------------------------------------------------------------------------------#
 69 |         #   对每一张图片进行处理，由于在predict.py的时候，我们只输入一张图片，所以for i in range(len(mbox_loc))只进行一次
 70 |         #----------------------------------------------------------------------------------------------------------------#
 71 |         for i in range(bs):
 72 |             #----------------------------------------------------------#
 73 |             #   对回归参数进行reshape
 74 |             #----------------------------------------------------------#
 75 |             roi_cls_loc = roi_cls_locs[i] * self.std
 76 |             #----------------------------------------------------------#
 77 |             #   第一维度是建议框的数量，第二维度是每个种类
 78 |             #   第三维度是对应种类的调整参数
 79 |             #----------------------------------------------------------#
 80 |             roi_cls_loc = roi_cls_loc.view([-1, self.num_classes, 4])
 81 | 
 82 |             #-------------------------------------------------------------#
 83 |             #   利用classifier网络的预测结果对建议框进行调整获得预测框
 84 |             #   num_rois, 4 -> num_rois, 1, 4 -> num_rois, num_classes, 4
 85 |             #-------------------------------------------------------------#
 86 |             roi         = rois[i].view((-1, 1, 4)).expand_as(roi_cls_loc)
 87 |             cls_bbox    = loc2bbox(roi.contiguous().view((-1, 4)), roi_cls_loc.contiguous().view((-1, 4)))
 88 |             cls_bbox    = cls_bbox.view([-1, (self.num_classes), 4])
 89 |             #-------------------------------------------------------------#
 90 |             #   对预测框进行归一化，调整到0-1之间
 91 |             #-------------------------------------------------------------#
 92 |             cls_bbox[..., [0, 2]] = (cls_bbox[..., [0, 2]]) / input_shape[1]
 93 |             cls_bbox[..., [1, 3]] = (cls_bbox[..., [1, 3]]) / input_shape[0]
 94 | 
 95 |             roi_score   = roi_scores[i]
 96 |             prob        = F.softmax(roi_score, dim=-1)
 97 | 
 98 |             results.append([])
 99 |             for c in range(1, self.num_classes):
100 |                 #--------------------------------#
101 |                 #   取出属于该类的所有框的置信度
102 |                 #   判断是否大于门限
103 |                 #--------------------------------#
104 |                 c_confs     = prob[:, c]
105 |                 c_confs_m   = c_confs > confidence
106 | 
107 |                 if len(c_confs[c_confs_m]) > 0:
108 |                     #-----------------------------------------#
109 |                     #   取出得分高于confidence的框
110 |                     #-----------------------------------------#
111 |                     boxes_to_process = cls_bbox[c_confs_m, c]
112 |                     confs_to_process = c_confs[c_confs_m]
113 | 
114 |                     keep = nms(
115 |                         boxes_to_process,
116 |                         confs_to_process,
117 |                         nms_iou
118 |                     )
119 |                     #-----------------------------------------#
120 |                     #   取出在非极大抑制中效果较好的内容
121 |                     #-----------------------------------------#
122 |                     good_boxes  = boxes_to_process[keep]
123 |                     confs       = confs_to_process[keep][:, None]
124 |                     labels      = (c - 1) * torch.ones((len(keep), 1)).cuda() if confs.is_cuda else (c - 1) * torch.ones((len(keep), 1))
125 |                     #-----------------------------------------#
126 |                     #   将label、置信度、框的位置进行堆叠。
127 |                     #-----------------------------------------#
128 |                     c_pred      = torch.cat((good_boxes, confs, labels), dim=1).cpu().numpy()
129 |                     # 添加进result里
130 |                     results[-1].extend(c_pred)
131 | 
132 |             if len(results[-1]) > 0:
133 |                 results[-1] = np.array(results[-1])
134 |                 box_xy, box_wh = (results[-1][:, 0:2] + results[-1][:, 2:4])/2, results[-1][:, 2:4] - results[-1][:, 0:2]
135 |                 results[-1][:, :4] = self.frcnn_correct_boxes(box_xy, box_wh, input_shape, image_shape)
136 | 
137 |         return results
138 | 
139 | 
140 | 


--------------------------------------------------------------------------------
/utils/utils_fit.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch
 4 | from tqdm import tqdm
 5 | 
 6 | from utils.utils import get_lr
 7 | 
 8 | 
 9 | def fit_one_epoch(model, train_util, loss_history, eval_callback, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val, Epoch, cuda, fp16, scaler, save_period, save_dir):
10 |     total_loss = 0
11 |     rpn_loc_loss = 0
12 |     rpn_cls_loss = 0
13 |     roi_loc_loss = 0
14 |     roi_cls_loss = 0
15 |     
16 |     val_loss = 0
17 |     print('Start Train')
18 |     with tqdm(total=epoch_step,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
19 |         for iteration, batch in enumerate(gen):
20 |             if iteration >= epoch_step:
21 |                 break
22 |             images, boxes, labels = batch[0], batch[1], batch[2]
23 |             with torch.no_grad():
24 |                 if cuda:
25 |                     images = images.cuda()  #shape=[2，3，600.600]
26 | 
27 |             rpn_loc, rpn_cls, roi_loc, roi_cls, total = train_util.train_step(images, boxes, labels, 1, fp16, scaler)
28 |             total_loss      += total.item()
29 |             rpn_loc_loss    += rpn_loc.item()
30 |             rpn_cls_loss    += rpn_cls.item()
31 |             roi_loc_loss    += roi_loc.item()
32 |             roi_cls_loss    += roi_cls.item()
33 |             
34 |             pbar.set_postfix(**{'total_loss'    : total_loss / (iteration + 1), 
35 |                                 'rpn_loc'       : rpn_loc_loss / (iteration + 1),  
36 |                                 'rpn_cls'       : rpn_cls_loss / (iteration + 1), 
37 |                                 'roi_loc'       : roi_loc_loss / (iteration + 1), 
38 |                                 'roi_cls'       : roi_cls_loss / (iteration + 1), 
39 |                                 'lr'            : get_lr(optimizer)})
40 |             pbar.update(1)
41 | 
42 |     print('Finish Train')
43 |     print('Start Validation')
44 |     with tqdm(total=epoch_step_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
45 |         for iteration, batch in enumerate(gen_val):
46 |             if iteration >= epoch_step_val:
47 |                 break
48 |             images, boxes, labels = batch[0], batch[1], batch[2]
49 |             with torch.no_grad():
50 |                 if cuda:
51 |                     images = images.cuda()
52 | 
53 |                 train_util.optimizer.zero_grad()
54 |                 _, _, _, _, val_total = train_util.forward(images, boxes, labels, 1)
55 |                 val_loss += val_total.item()
56 |                 
57 |                 pbar.set_postfix(**{'val_loss'  : val_loss / (iteration + 1)})
58 |                 pbar.update(1)
59 | 
60 |     print('Finish Validation')
61 |     loss_history.append_loss(epoch + 1, total_loss / epoch_step, val_loss / epoch_step_val)
62 |     eval_callback.on_epoch_end(epoch + 1)
63 |     print('Epoch:'+ str(epoch + 1) + '/' + str(Epoch))
64 |     print('Total Loss: %.3f || Val Loss: %.3f ' % (total_loss / epoch_step, val_loss / epoch_step_val))
65 |     
66 |     #-----------------------------------------------#
67 |     #   保存权值
68 |     #-----------------------------------------------#
69 |     if (epoch + 1) % save_period == 0 or epoch + 1 == Epoch:
70 |         torch.save(model.state_dict(), os.path.join(save_dir, 'ep%03d-loss%.3f-val_loss%.3f.pth' % (epoch + 1, total_loss / epoch_step, val_loss / epoch_step_val)))
71 | 
72 |     if len(loss_history.val_loss) <= 1 or (val_loss / epoch_step_val) <= min(loss_history.val_loss):
73 |         print('Save best model to best_epoch_weights.pth')
74 |         torch.save(model.state_dict(), os.path.join(save_dir, "best_epoch_weights.pth"))
75 |             
76 |     torch.save(model.state_dict(), os.path.join(save_dir, "last_epoch_weights.pth"))
77 | 
78 | 


--------------------------------------------------------------------------------
/voc_annotation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | import xml.etree.ElementTree as ET
  4 | 
  5 | import numpy as np
  6 | 
  7 | from utils.utils import get_classes
  8 | 
  9 | #--------------------------------------------------------------------------------------------------------------------------------#
 10 | #   annotation_mode用于指定该文件运行时计算的内容
 11 | #   annotation_mode为0代表整个标签处理过程，包括获得VOCdevkit/VOC2007/ImageSets里面的txt以及训练用的2007_train.txt、2007_val.txt
 12 | #   annotation_mode为1代表获得VOCdevkit/VOC2007/ImageSets里面的txt
 13 | #   annotation_mode为2代表获得训练用的2007_train.txt、2007_val.txt
 14 | #--------------------------------------------------------------------------------------------------------------------------------#
 15 | annotation_mode     = 0
 16 | #-------------------------------------------------------------------#
 17 | #   必须要修改，用于生成2007_train.txt、2007_val.txt的目标信息
 18 | #   与训练和预测所用的classes_path一致即可
 19 | #   如果生成的2007_train.txt里面没有目标信息
 20 | #   那么就是因为classes没有设定正确
 21 | #   仅在annotation_mode为0和2的时候有效
 22 | #-------------------------------------------------------------------#
 23 | classes_path        = r'F:\Desktop\PCB_code\PCB_DataSet\cls_classes.txt'
 24 | #--------------------------------------------------------------------------------------------------------------------------------#
 25 | #   trainval_percent用于指定(训练集+验证集)与测试集的比例，默认情况下 (训练集+验证集):测试集 = 9:1
 26 | #   train_percent用于指定(训练集+验证集)中训练集与验证集的比例，默认情况下 训练集:验证集 = 9:1
 27 | #   仅在annotation_mode为0和1的时候有效
 28 | #--------------------------------------------------------------------------------------------------------------------------------#
 29 | trainval_percent    = 0.9
 30 | train_percent       = 0.9
 31 | #-------------------------------------------------------#
 32 | #   指向VOC数据集所在的文件夹
 33 | #   默认指向根目录下的VOC数据集
 34 | #-------------------------------------------------------#
 35 | PCB_DataSet_path=r'PCB_DataSet'
 36 | 
 37 | PCB_Data_Sets=['trainval','test']
 38 | classes, _      = get_classes(classes_path)
 39 | 
 40 | #-------------------------------------------------------# 
 41 | #   统计目标数量
 42 | #-------------------------------------------------------#
 43 | photo_nums  = np.zeros(len(PCB_Data_Sets))
 44 | nums        = np.zeros(len(classes))
 45 | def convert_annotation(image_id, list_file):
 46 |     in_file = open(os.path.join(PCB_DataSet_path, 'Annotations/%s.xml'%(image_id)), encoding='utf-8')
 47 |     tree=ET.parse(in_file)
 48 |     root = tree.getroot()
 49 | 
 50 |     for obj in root.iter('object'):
 51 |         difficult = 0 
 52 |         if obj.find('difficult')!=None:
 53 |             difficult = obj.find('difficult').text
 54 |         cls = obj.find('name').text
 55 |         if cls not in classes or int(difficult)==1:
 56 |             continue
 57 |         cls_id = classes.index(cls)
 58 |         xmlbox = obj.find('bndbox')
 59 |         b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text)))
 60 |         list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
 61 |         
 62 |         nums[classes.index(cls)] = nums[classes.index(cls)] + 1
 63 |         
 64 | if __name__ == "__main__":
 65 |     random.seed(0)
 66 |     if " " in os.path.abspath(PCB_DataSet_path):
 67 |         raise ValueError("数据集存放的文件夹路径与图片名称中不可以存在空格，否则会影响正常的模型训练，请注意修改。")
 68 | 
 69 |     if annotation_mode == 0 or annotation_mode == 1:
 70 |         print("Generate txt in ImageSets.")
 71 |         xmlfilepath     = os.path.join(PCB_DataSet_path, 'Annotations')
 72 |         saveBasePath    = os.path.join(PCB_DataSet_path, 'ImageSets')
 73 |         temp_xml        = os.listdir(xmlfilepath)
 74 |         total_xml       = []
 75 |         for xml in temp_xml:
 76 |             if xml.endswith(".xml"):
 77 |                 total_xml.append(xml)
 78 | 
 79 |         num     = len(total_xml)  
 80 |         list    = range(num)  
 81 |         tv      = int(num*trainval_percent)  
 82 |         tr      = int(tv*train_percent)  
 83 |         trainval= random.sample(list,tv)  
 84 |         train   = random.sample(trainval,tr)  
 85 |         
 86 |         print("train and val size",tv)
 87 |         print("train size",tr)
 88 |         ftrainval   = open(os.path.join(saveBasePath,'trainval.txt'), 'w')  
 89 |         ftest       = open(os.path.join(saveBasePath,'test.txt'), 'w')  
 90 |         ftrain      = open(os.path.join(saveBasePath,'train.txt'), 'w')  
 91 |         fval        = open(os.path.join(saveBasePath,'val.txt'), 'w')  
 92 |         
 93 |         for i in list:  
 94 |             name=total_xml[i][:-4]+'\n'  
 95 |             if i in trainval:  
 96 |                 ftrainval.write(name)  
 97 |                 if i in train:  
 98 |                     ftrain.write(name)  
 99 |                 else:  
100 |                     fval.write(name)  
101 |             else:  
102 |                 ftest.write(name)  
103 |         
104 |         ftrainval.close()  
105 |         ftrain.close()  
106 |         fval.close()  
107 |         ftest.close()
108 |         print("Generate txt in ImageSets done.")
109 | 
110 |     if annotation_mode == 0 or annotation_mode == 2:
111 |         print("Generate PCB_train.txt and PCB_val.txt for train.")
112 |         type_index = 0
113 |         for image_set in PCB_Data_Sets:
114 |             image_ids = open(os.path.join(PCB_DataSet_path, 'ImageSets/%s.txt'%(image_set)), encoding='utf-8').read().strip().split()
115 |             list_file = open(os.path.join(PCB_DataSet_path,'%s.txt'%( image_set)), 'w', encoding='utf-8')#保存训练集和测试集
116 |             for image_id in image_ids:
117 |                 list_file.write('%s/JPEGImages/%s.jpg'%(os.path.abspath(PCB_DataSet_path),image_id))#在训练集和测试集中写入图片路径信息
118 | 
119 |                 convert_annotation(image_id, list_file)
120 |                 list_file.write('\n')
121 |             photo_nums[type_index] = len(image_ids)
122 |             type_index += 1
123 |             list_file.close()
124 |         print("Generate PCB_train.txt and PCB_val.txt for train done.")
125 |         
126 |         def printTable(List1, List2):
127 |             for i in range(len(List1[0])):
128 |                 print("|", end=' ')
129 |                 for j in range(len(List1)):
130 |                     print(List1[j][i].rjust(int(List2[j])), end=' ')
131 |                     print("|", end=' ')
132 |                 print()
133 | 
134 |         str_nums = [str(int(x)) for x in nums]
135 |         tableData = [
136 |             classes, str_nums
137 |         ]
138 |         colWidths = [0]*len(tableData)
139 |         len1 = 0
140 |         for i in range(len(tableData)):
141 |             for j in range(len(tableData[i])):
142 |                 if len(tableData[i][j]) > colWidths[i]:
143 |                     colWidths[i] = len(tableData[i][j])
144 |         printTable(tableData, colWidths)
145 | 
146 |         if photo_nums[0] <= 500:
147 |             print("训练集数量小于500，属于较小的数据量，请注意设置较大的训练世代（Epoch）以满足足够的梯度下降次数（Step）。")
148 | 
149 |         if np.sum(nums) == 0:
150 |             print("在数据集中并未获得任何目标，请注意修改classes_path对应自己的数据集，并且保证标签名字正确，否则训练将会没有任何效果！")
151 |             print("在数据集中并未获得任何目标，请注意修改classes_path对应自己的数据集，并且保证标签名字正确，否则训练将会没有任何效果！")
152 |             print("在数据集中并未获得任何目标，请注意修改classes_path对应自己的数据集，并且保证标签名字正确，否则训练将会没有任何效果！")
153 |             print("（重要的事情说三遍）。")
154 | 


--------------------------------------------------------------------------------