├── .gitignore ├── Data_preproces ├── data_expansion.py ├── order_name.py └── voc_annotation.py ├── LICENSE ├── README.md ├── data_expansion.py ├── frcnn_predict.py ├── get_map.py ├── nets ├── FasterRCNN_train.py ├── Suggestion_box.py ├── __init__.py ├── __pycache__ │ ├── FasterRCNN_train.cpython-37.pyc │ ├── FasterRCNN_train.cpython-39.pyc │ ├── Suggestion_box.cpython-37.pyc │ ├── Suggestion_box.cpython-39.pyc │ ├── __init__.cpython-37.pyc │ ├── __init__.cpython-39.pyc │ ├── classifier.cpython-37.pyc │ ├── classifier.cpython-39.pyc │ ├── faster_rcnn_feature_extraction.cpython-39.pyc │ ├── feature_extraction.cpython-39.pyc │ ├── feature_pyramid_network.cpython-39.pyc │ ├── frcnn.cpython-39.pyc │ ├── frcnn_training.cpython-39.pyc │ ├── resnet101.cpython-37.pyc │ ├── resnet101.cpython-39.pyc │ ├── resnet50.cpython-37.pyc │ ├── resnet50.cpython-39.pyc │ ├── resnet50_FPN.cpython-37.pyc │ ├── resnet50_FPN.cpython-39.pyc │ ├── rpn.cpython-37.pyc │ ├── rpn.cpython-39.pyc │ ├── vgg16.cpython-37.pyc │ └── vgg16.cpython-39.pyc ├── classifier.py ├── resnet101.py ├── resnet50.py ├── resnet50_ECA_FPN.py ├── resnet50_FPN.py ├── rpn.py └── vgg16.py ├── order_name.py ├── predict.py ├── qa.md ├── requirements.txt ├── summary.py ├── train.py ├── utils ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ ├── __init__.cpython-39.pyc │ ├── anchors.cpython-37.pyc │ ├── anchors.cpython-39.pyc │ ├── callbacks.cpython-37.pyc │ ├── callbacks.cpython-39.pyc │ ├── dataloader.cpython-37.pyc │ ├── dataloader.cpython-39.pyc │ ├── utils.cpython-37.pyc │ ├── utils.cpython-39.pyc │ ├── utils_bbox.cpython-37.pyc │ ├── utils_bbox.cpython-39.pyc │ ├── utils_fit.cpython-37.pyc │ ├── utils_fit.cpython-39.pyc │ ├── utils_map.cpython-37.pyc │ └── utils_map.cpython-39.pyc ├── anchors.py ├── callbacks.py ├── dataloader.py ├── kmeans_anchors │ ├── Bikmeans_anchors.py │ ├── main.py │ ├── plot_kmeans.py │ ├── read_voc.py │ └── yolo_kmeans.py ├── soft_nms.py ├── utils.py ├── utils_bbox.py ├── utils_fit.py └── utils_map.py └── voc_annotation.py /.gitignore: -------------------------------------------------------------------------------- 1 | # ignore map, miou, datasets 2 | map_out/ 3 | miou_out/ 4 | VOCdevkit/ 5 | datasets/ 6 | Medical_Datasets/ 7 | lfw/ 8 | logs/ 9 | model_data/ 10 | .temp_map_out/ 11 | 12 | # Byte-compiled / optimized / DLL files 13 | __pycache__/ 14 | *.py[cod] 15 | *$py.class 16 | 17 | # C extensions 18 | *.so 19 | 20 | # Distribution / packaging 21 | .Python 22 | build/ 23 | develop-eggs/ 24 | dist/ 25 | downloads/ 26 | eggs/ 27 | .eggs/ 28 | lib/ 29 | lib64/ 30 | parts/ 31 | sdist/ 32 | var/ 33 | wheels/ 34 | pip-wheel-metadata/ 35 | share/python-wheels/ 36 | *.egg-info/ 37 | .installed.cfg 38 | *.egg 39 | MANIFEST 40 | 41 | # PyInstaller 42 | # Usually these files are written by a python script from a template 43 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 44 | *.manifest 45 | *.spec 46 | 47 | # Installer logs 48 | pip-log.txt 49 | pip-delete-this-directory.txt 50 | 51 | # Unit test / coverage reports 52 | htmlcov/ 53 | .tox/ 54 | .nox/ 55 | .coverage 56 | .coverage.* 57 | .cache 58 | nosetests.xml 59 | coverage.xml 60 | *.cover 61 | *.py,cover 62 | .hypothesis/ 63 | .pytest_cache/ 64 | 65 | # Translations 66 | *.mo 67 | *.pot 68 | 69 | # Django stuff: 70 | *.log 71 | local_settings.py 72 | db.sqlite3 73 | db.sqlite3-journal 74 | 75 | # Flask stuff: 76 | instance/ 77 | .webassets-cache 78 | 79 | # Scrapy stuff: 80 | .scrapy 81 | 82 | # Sphinx documentation 83 | docs/_build/ 84 | 85 | # PyBuilder 86 | target/ 87 | 88 | # Jupyter Notebook 89 | .ipynb_checkpoints 90 | 91 | # IPython 92 | profile_default/ 93 | ipython_config.py 94 | 95 | # pyenv 96 | .python-version 97 | 98 | # pipenv 99 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 100 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 101 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 102 | # install all needed dependencies. 103 | #Pipfile.lock 104 | 105 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 106 | __pypackages__/ 107 | 108 | # Celery stuff 109 | celerybeat-schedule 110 | celerybeat.pid 111 | 112 | # SageMath parsed files 113 | *.sage.py 114 | 115 | # Environments 116 | .env 117 | .venv 118 | env/ 119 | venv/ 120 | ENV/ 121 | env.bak/ 122 | venv.bak/ 123 | 124 | # Spyder project settings 125 | .spyderproject 126 | .spyproject 127 | 128 | # Rope project settings 129 | .ropeproject 130 | 131 | # mkdocs documentation 132 | /site 133 | 134 | # mypy 135 | .mypy_cache/ 136 | .dmypy.json 137 | dmypy.json 138 | 139 | # Pyre type checker 140 | .pyre/ 141 | -------------------------------------------------------------------------------- /Data_preproces/data_expansion.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """数据增强 3 | 1. 翻转变换 flip 4 | 2. 图片裁剪 crop 5 | 3. 色彩抖动 color jittering 6 | 4. 平移变换 shift 7 | 5. 尺度变换 scale 8 | 6. 对比度变换 contrast 9 | 7. 噪声扰动 noise 10 | 8. 旋转变换/反射变换 Rotation/reflection 11 | 9.直方图增强 12 | 10.拉普拉斯算子 13 | 11.对数变换 14 | 12.伽马变换 15 | 13.限制对比度自适应直方图均衡化CLAHE 16 | 14.retinex SSR 17 | 15.retinex MMR 18 | 16. 19 | 20 | """ 21 | 22 | import logging 23 | import os 24 | import random 25 | import threading 26 | import time 27 | from dataclasses import dataclass 28 | from distutils.log import error 29 | 30 | import cv2 31 | import numpy as np 32 | from PIL import Image, ImageEnhance, ImageFile 33 | 34 | 35 | # 图片裁剪 36 | def read_path(file_pathname): 37 | 38 | for filename in os.listdir(file_pathname): 39 | # print(filename) 40 | img_filename = os.path.join(file_pathname, filename) #将图片路径与图片名进行拼接 41 | 42 | img = cv2.imread(img_filename) #img_path为图片所在路径 43 | crop_img = img[0:3585,0:3629] #x0,y0为裁剪区域左上坐标;x1,y1为裁剪区域右下坐标(y0:y1,x0:x1) 44 | 45 | #####save figure 46 | # cv2.imwrite(r'date_set\data_source1'+"/"+filename,crop_img) 47 | cv2.imwrite(r'jixing\polarity'+"/"+filename,crop_img) 48 | 49 | 50 | logger = logging.getLogger(__name__) 51 | ImageFile.LOAD_TRUNCATED_IMAGES = True 52 | 53 | 54 | class DataAugmentation: 55 | """ 56 | 包含数据增强的八种方式 57 | """ 58 | 59 | def __init__(self): 60 | pass 61 | 62 | @staticmethod 63 | def openImage(image): 64 | img=cv2.imread(image) 65 | return img 66 | 67 | @staticmethod 68 | def randomRotation(image, center=None, scale=1.0): #mode=Image.BICUBIC 69 | """ 70 | 对图像进行随机任意角度(0~360度)旋转 71 | :return: 旋转转之后的图像 72 | """ 73 | random_angle = np.random.randint(-180, 180) 74 | (h, w) = image.shape[:2] 75 | # If no rotation center is specified, the center of the image is set as the rotation center 76 | if center is None: 77 | center = (w / 2, h / 2) 78 | m = cv2.getRotationMatrix2D(center, random_angle, scale) #center:旋转中心坐标.angle:旋转角度,负号为逆时针,正号为顺时针.scale:缩放比例,1为等比例缩放 79 | rotated = cv2.warpAffine(image, m, (w, h)) 80 | return rotated 81 | 82 | @staticmethod 83 | def transpose(image): 84 | """ 85 | 水平垂直翻转 86 | :return: 旋转转之后的图像 87 | """ 88 | random_angle = np.random.randint(-2, 2) #取[-1,1]的随机整数 89 | img_filp=cv2.flip(image,random_angle) 90 | return img_filp 91 | 92 | '''噪声抖动''' 93 | 94 | @staticmethod 95 | def randomColor(image): 96 | """ 97 | 对图像进行颜色抖动 98 | :param image: PIL的图像image 99 | :return: 有颜色色差的图像image 100 | """ 101 | saturation=random.randint(0,1) 102 | brightness=random.randint(0,1) 103 | contrast=random.randint(0,1) 104 | sharpness=random.randint(0,1) 105 | image=Image.fromarray(cv2.cvtColor(image,cv2.COLOR_BGR2RGB)) #转化为PIL.Image对象,才能使用ImageEnhance.Brightness(image) 106 | if random.random() < saturation: 107 | random_factor = np.random.randint(0, 31) / 10. # 随机因子 108 | image = ImageEnhance.Color(image).enhance(random_factor) # 调整图像的饱和度 109 | if random.random() < brightness: 110 | random_factor = np.random.randint(10, 21) / 10. # 随机因子 111 | image = ImageEnhance.Brightness(image).enhance(random_factor) # 调整图像的亮度 112 | if random.random() < contrast: 113 | random_factor = np.random.randint(10, 21) / 10. # 随机因1子 114 | image = ImageEnhance.Contrast(image).enhance(random_factor) # 调整图像对比度 115 | if random.random() < sharpness: 116 | random_factor = np.random.randint(0, 31) / 10. # 随机因子 117 | image= ImageEnhance.Sharpness(image).enhance(random_factor) # 调整图像锐度 118 | image=cv2.cvtColor(np.asarray(image),cv2.COLOR_RGB2BGR) #转换为cv格式 119 | return image 120 | 121 | @staticmethod 122 | def randomGaussian(image, mean=0.2, sigma=0.04): 123 | """ 124 | 对图像进行高斯噪声处理 125 | mean:设置高斯分布的均值和方差 126 | sigma:设置高斯分布的标准差,sigma值越大,噪声越多 127 | 128 | 返回: 129 | gaussian_out : 噪声处理后的图片 130 | """ 131 | # 将图片灰度标准化 132 | img = image / 255 133 | # 产生高斯 noise 134 | noise = np.random.normal(mean, sigma, img.shape) 135 | # 将噪声和图片叠加 136 | gaussian_out = img + noise 137 | # 将超过 1 的置 1,低于 0 的置 0 138 | gaussian_out = np.clip(gaussian_out, 0, 1) 139 | # 将图片灰度范围的恢复为 0-255 140 | gaussian_out = np.uint8(gaussian_out*255) 141 | # 将噪声范围搞为 0-255 142 | # noise = np.uint8(noise*255) 143 | return gaussian_out 144 | 145 | @staticmethod 146 | def Pepper_noise(image): 147 | ''' 148 | 椒盐噪声 149 | ''' 150 | #设置添加椒盐噪声的数目比例 151 | s_vs_p = 0.04 152 | #设置添加噪声图像像素的数目 153 | amount =0.03 154 | noisy_img = np.copy(image) 155 | #添加salt噪声 156 | num_salt = np.ceil(amount * image.size * s_vs_p) 157 | #设置添加噪声的坐标位置 158 | coords = [np.random.randint(0,i - 1, int(num_salt)) for i in image.shape] 159 | noisy_img[tuple(coords)] = 255 160 | #添加pepper噪声 161 | num_pepper = np.ceil(amount * image.size * (1. - s_vs_p)) 162 | #设置添加噪声的坐标位置 163 | coords = [np.random.randint(0,i - 1, int(num_pepper)) for i in image.shape] 164 | noisy_img[tuple (coords)] = 0 165 | return noisy_img 166 | 167 | @staticmethod 168 | def Poisson_noise(image): 169 | '''泊松噪声''' 170 | 171 | #计算图像像素的分布范围 172 | vals = len(np.unique(image)) 173 | vals = 2 ** np.ceil(np.log2(vals)) 174 | #给图片添加泊松噪声 175 | noisy_img = np.random.poisson(image * vals) / float(vals) 176 | return noisy_img 177 | 178 | '''图像增强算法''' 179 | 180 | @staticmethod 181 | def hist(image): 182 | '''直方图均衡增强''' 183 | r, g, b = cv2.split(image) 184 | r1 = cv2.equalizeHist(r) 185 | g1 = cv2.equalizeHist(g) 186 | b1 = cv2.equalizeHist(b) 187 | image_equal_clo = cv2.merge([r1, g1, b1]) 188 | return image_equal_clo 189 | 190 | @staticmethod 191 | def laplacian(image): 192 | '''拉普拉斯算子''' 193 | kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]]) 194 | image_lap = cv2.filter2D(image, cv2.CV_8UC3, kernel) 195 | return image_lap 196 | 197 | @staticmethod 198 | def log(image): 199 | '''对数变换''' 200 | image_log = np.uint8(np.log(np.array(image) + 1)) 201 | cv2.normalize(image_log, image_log, 0, 255, cv2.NORM_MINMAX) 202 | # 转换成8bit图像显示 203 | cv2.convertScaleAbs(image_log, image_log) 204 | return image_log 205 | 206 | @staticmethod 207 | def gamma(image): 208 | '''伽马变换''' 209 | fgamma = 0.5 #数值越大,生成的图片越黑 210 | image_gamma = np.uint8(np.power((np.array(image) / 255.0), fgamma) * 255.0) 211 | cv2.normalize(image_gamma, image_gamma, 0, 255, cv2.NORM_MINMAX) 212 | cv2.convertScaleAbs(image_gamma, image_gamma) 213 | return image_gamma 214 | 215 | @staticmethod 216 | def clahe(image): 217 | '''# 限制对比度自适应直方图均衡化CLAHE''' 218 | b, g, r = cv2.split(image) 219 | clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) 220 | b = clahe.apply(b) 221 | g = clahe.apply(g) 222 | r = clahe.apply(r) 223 | image_clahe = cv2.merge([b, g, r]) 224 | return image_clahe 225 | 226 | def __replaceZeroes(data): 227 | min_nonzero = min(data[np.nonzero(data)]) 228 | data[data == 0] = min_nonzero 229 | return data 230 | 231 | def __SSR(src_img, size): 232 | 233 | L_blur = cv2.GaussianBlur(src_img, (size, size), 0) 234 | img =DataAugmentation.__replaceZeroes(src_img) 235 | 236 | L_blur =DataAugmentation. __replaceZeroes(L_blur) 237 | 238 | dst_Img = cv2.log(img/255.0) 239 | dst_Lblur = cv2.log(L_blur/255.0) 240 | dst_IxL = cv2.multiply(dst_Img, dst_Lblur) 241 | log_R = cv2.subtract(dst_Img, dst_IxL) 242 | 243 | dst_R = cv2.normalize(log_R,None, 0, 255, cv2.NORM_MINMAX) 244 | log_uint8 = cv2.convertScaleAbs(dst_R) 245 | return log_uint8 246 | 247 | @staticmethod 248 | def SSR_image(image): 249 | '''SSR_image''' 250 | size = 3 251 | b_gray, g_gray, r_gray = cv2.split(image) 252 | b_gray =DataAugmentation.__SSR(b_gray, size) 253 | g_gray =DataAugmentation.__SSR(g_gray, size) 254 | r_gray =DataAugmentation.__SSR(r_gray, size) 255 | result = cv2.merge([b_gray, g_gray, r_gray]) 256 | return result 257 | 258 | # retinex MSR 259 | def __MSR(img, scales): 260 | weight = 2 / 3.0 261 | scales_size = len(scales) 262 | h, w = img.shape[:2] 263 | log_R = np.zeros((h, w), dtype=np.float32) 264 | 265 | for i in range(scales_size): 266 | img =DataAugmentation. __replaceZeroes(img) 267 | L_blur = cv2.GaussianBlur(img, (scales[i], scales[i]), 0) 268 | L_blur =DataAugmentation. __replaceZeroes(L_blur) 269 | dst_Img = cv2.log(img/255.0) 270 | dst_Lblur = cv2.log(L_blur/255.0) 271 | dst_Ixl = cv2.multiply(dst_Img, dst_Lblur) 272 | log_R += weight * cv2.subtract(dst_Img, dst_Ixl) 273 | 274 | dst_R = cv2.normalize(log_R,None, 0, 255, cv2.NORM_MINMAX) 275 | log_uint8 = cv2.convertScaleAbs(dst_R) 276 | return log_uint8 277 | 278 | @staticmethod 279 | def MSR_image(image): 280 | '''MSR_image''' 281 | scales = [15, 101, 301] # [3,5,9] 282 | b_gray, g_gray, r_gray = cv2.split(image) 283 | b_gray =DataAugmentation.__MSR(b_gray, scales) 284 | g_gray =DataAugmentation. __MSR(g_gray, scales) 285 | r_gray =DataAugmentation. __MSR(r_gray, scales) 286 | result = cv2.merge([b_gray, g_gray, r_gray]) 287 | return result 288 | 289 | 290 | def imageOps(func_name, image1, img_des_path, img_file_name, times=1): #times=1每种方式,每张图片运行一次 291 | funcMap = {#"randomRotation": DataAugmentation.randomRotation, 292 | "randomcolor": DataAugmentation.randomColor,"transpose": DataAugmentation.transpose, 293 | "randomGaussian": DataAugmentation.randomGaussian, "pepper_noise": DataAugmentation.Pepper_noise, 294 | "Poisson_noise": DataAugmentation.Poisson_noise, "hist": DataAugmentation.hist, 295 | "laplacian": DataAugmentation.laplacian,"log": DataAugmentation.log, 296 | "gamma": DataAugmentation.gamma, "clahe": DataAugmentation.clahe, 297 | "SSR_image": DataAugmentation.SSR_image, "MSR_image": DataAugmentation.MSR_image 298 | } 299 | if funcMap.get(func_name) is None: 300 | logger.error("%s is not exist", func_name) 301 | return -1 302 | 303 | for _i in range(0, times, 1): 304 | new_image = funcMap[func_name](image1) #经过变化后的图片 305 | # print('new_image:',new_image) 306 | # path=os.path.join(img_des_path, func_name + str(_i) + img_file_name) #存图的新名字 307 | path=os.path.join(img_des_path, img_file_name) 308 | # print('new_filename:',path) 309 | cv2.imwrite (path,new_image) 310 | 311 | 312 | # opsList = {"transpose",'randomcolor',"gamma","MSR_image","pepper_noise","hist","log","clahe",'randomGaussian', 313 | # 'Poisson_noise','laplacian','SSR_image'} 314 | opsList = {"clahe"} #clahe图像增强效果较好 315 | 316 | def threadOPS(img_path, new_img_path): 317 | """ 318 | 多线程处理事务 319 | :param src_path: 源文件 320 | :param des_path: 存放文件 321 | :return: 322 | """ 323 | #img path 324 | if os.path.isdir(img_path): 325 | img_names = os.listdir(img_path) 326 | # print('img_names值为:',img_names) 327 | else: 328 | img_names = [img_path] 329 | # print('img_names1值为:',img_names) 330 | 331 | img_num = 0 332 | 333 | #img num 334 | for img_name in img_names: 335 | tmp_img_name = os.path.join(img_path, img_name) 336 | if os.path.isdir(tmp_img_name): 337 | print('contain file folder') 338 | exit() 339 | else: 340 | img_num = img_num + 1 341 | num = img_num 342 | # print("num数值为:",num ) 343 | 344 | 345 | for i in range(num): 346 | img_name = img_names[i] 347 | # print("img_name:",img_name) 348 | tmp_img_name = os.path.join(img_path, img_name) 349 | # 读取文件并进行操作 350 | image1 = DataAugmentation.openImage(tmp_img_name) 351 | # print("读取文件image:",image1) 352 | 353 | # threadImage =[0] * 12 #定义一个元组,其长度为12. 354 | threadImage ={} #定义为空字典类型。用来装线程结果信息 355 | _index = 0 356 | for ops_name in opsList: 357 | # print("ops_name:",ops_name) 358 | #创建一个新线程 359 | threadImage[_index] = threading.Thread(target=imageOps, 360 | args=(ops_name, image1, new_img_path,img_name)) 361 | print('threadImage[{}]:{}'.format(_index,threadImage)) 362 | threadImage[_index].start() #启动线程 363 | _index += 1 #显示每个线程的起停位置 364 | time.sleep(0.2) #线程执行的时间 365 | 366 | 367 | if __name__ == '__main__': 368 | threadOPS(#r"F:\Desktop\PCB_code\date_set\1shujuchuli", 369 | #r"F:\Desktop\PCB_code\date_set\2shujucunfang" 370 | r'F:\Desktop\PCB_code\data_set1\data_shiyan', 371 | r'F:\Desktop\PCB_code\data_set1\data_shiyan_kuochong') 372 | 373 | # read_path(r'F:\Desktop\PCB_code\data_set1\data_shiyan') #图片裁剪 374 | 375 | 376 | ''' 377 | 路径问题: 378 | 关于上述路径中,\table\name\rain中的\t,\n,\r都易被识别为转义字符。 379 | 解决的办法主要由以下三种: 380 | #1 381 | path=r"C:\data\table\name\rain" 382 | #前面加r表示不转义 383 | 384 | #2 385 | path="C:\\data\\table\\name\\rain" 386 | #用\\代替\ 387 | 388 | #3 389 | path="C:/data/table/name/rain" 390 | #用\代替/ 391 | 392 | ''' 393 | 394 | -------------------------------------------------------------------------------- /Data_preproces/order_name.py: -------------------------------------------------------------------------------- 1 | #...........................# 2 | #对文件夹中的文件进行重命名 3 | #...........................# 4 | import os 5 | import xml 6 | from xml.dom import minidom 7 | import xml.etree.cElementTree as ET 8 | 9 | def myrename(file_path): 10 | file_list=os.listdir(file_path) 11 | for i,fi in enumerate(file_list): 12 | old_dir=os.path.join(file_path,fi) 13 | print('wenjianmingzi :',old_dir) 14 | # 删除名字中的空格 15 | new_name = fi.replace(" ", "_") 16 | print("新名字为:",new_name) 17 | 18 | # # 顺序命名 19 | # # new_name=str(i+1)+"."+str(fi.split(".")[-1]) 20 | new_dir=os.path.join(file_path,new_name) 21 | try: 22 | os.rename(old_dir,new_dir) 23 | except Exception as e: 24 | print(e) 25 | print("Failed!") 26 | else: 27 | print("SUcess!") 28 | 29 | 30 | #...........................# 31 | #对xml文件内的filename和path名进行重命名 32 | #...........................# 33 | 34 | def xml_name(xmlpath): 35 | files = os.listdir(xmlpath) # 得到文件夹下所有文件名称 36 | count = 0 37 | for xmlFile in files: # 遍历文件夹 38 | if not os.path.isdir(xmlFile): # 判断是否是文件夹,不是文件夹才打开 39 | name1 = xmlFile.split('.')[0] 40 | dom = xml.dom.minidom.parse(xmlpath + '/' + xmlFile) 41 | root = dom.documentElement 42 | #filename重命名 43 | newfilename = root.getElementsByTagName('filename') 44 | t=newfilename[0].firstChild.data = name1 + '.jpg' 45 | print('t:',t ) 46 | #path重命名 47 | newpath = root.getElementsByTagName('path') 48 | t1=newpath[0].firstChild.data =xmlpath +'\\'+ name1 +'.jpg' 49 | print('t1:',t1 ) 50 | 51 | with open(os.path.join(xmlpath, xmlFile), 'w',) as fh: 52 | print('fh:',fh ) 53 | dom.writexml(fh) 54 | print('写入name/pose OK!') 55 | count = count + 1 56 | 57 | 58 | # 删除xml文件中显示的版本号 59 | def delete_xmlversion(xmlpath,savedir): 60 | 61 | files = os.listdir(xmlpath) 62 | for ml in files: 63 | if '.xml' in ml: 64 | fo = open(savedir + '/' + '{}'.format(ml), 'w', encoding='utf-8') 65 | print('{}'.format(ml)) 66 | fi = open(xmlpath + '/' + '{}'.format(ml), 'r') 67 | content = fi.readlines() 68 | for line in content: 69 | # line = line.replace('a', 'b') # 例:将a替换为b 70 | line = line.replace('', '') 71 | # line = line.replace('测试图片', '车辆图片') 72 | # line = line.replace('class1', 'class2') 73 | fo.write(line) 74 | fo.close() 75 | print('替换成功') 76 | 77 | 78 | #删除xml文件中部分不要的标签信息 79 | def Delete_part_information_xml(path_root,xy_classes): 80 | for anno_path in path_root: 81 | xml_list=os.listdir(anno_path) 82 | print("打开{}文件".format(xml_list)) 83 | for annoxml in xml_list: 84 | path_xml=os.path.join(anno_path,annoxml) 85 | print('保存文件路径为{}'.format(path_xml)) 86 | tree =ET.parse(path_xml) 87 | root=tree.getroot() 88 | 89 | for child in root.findall('object'): 90 | name = child.find('name').text 91 | if not name in xy_classes: 92 | root.remove(child) 93 | print(annoxml) 94 | tree.write(os.path.join(r'F:\Desktop\PCB_code\PCB_DataSet\Annotations—new', annoxml)) #处理结束后保存的路径 95 | 96 | 97 | 98 | 99 | if __name__=="__main__": 100 | file_path=r"F:\Desktop\PCB_code\date_set\new_data" #完整路径+文件名 101 | # xmlpath="F:\\桌面\\PCB_code\\date_set\\Image_label_source" 102 | # savedir = r'F:\桌面\PCB_code\date_set\3' #删除xml文件中显示的版本号后存放文件位置 103 | # xmlpath=r'F:\桌面\PCB_code\date_set\label' 104 | myrename(file_path) #图片重命名文件 105 | 106 | #对xml文件中的名字进行修改 107 | # myrename(xmlpath) #1、xml文件名重命名 108 | # xml_name(xmlpath) #2、xml文件内的filename和path重命名 109 | # delete_xmlversion(xmlpath,savedir) #删除经过xml重命名后文件内的版本号 110 | 111 | #删除xml文件中部分不要的标签信息 112 | path_root=r'F:\Desktop\PCB_code\PCB_DataSet\Annotations' 113 | xy_classes=['Speaker',"Bat","2USB","Rj45+2USB","Cap_cross","Cap_blue_black","Jumper04p", 114 | "Jumper10p", "HDD","Power08p","Power04p","Power24p"] 115 | Delete_part_information_xml(path_root,xy_classes) 116 | 117 | -------------------------------------------------------------------------------- /Data_preproces/voc_annotation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import xml.etree.ElementTree as ET 4 | 5 | import numpy as np 6 | 7 | from utils.utils import get_classes 8 | 9 | #--------------------------------------------------------------------------------------------------------------------------------# 10 | # annotation_mode用于指定该文件运行时计算的内容 11 | # annotation_mode为0代表整个标签处理过程,包括获得VOCdevkit/VOC2007/ImageSets里面的txt以及训练用的2007_train.txt、2007_val.txt 12 | # annotation_mode为1代表获得VOCdevkit/VOC2007/ImageSets里面的txt 13 | # annotation_mode为2代表获得训练用的2007_train.txt、2007_val.txt 14 | #--------------------------------------------------------------------------------------------------------------------------------# 15 | annotation_mode = 0 16 | #-------------------------------------------------------------------# 17 | # 必须要修改,用于生成2007_train.txt、2007_val.txt的目标信息 18 | # 与训练和预测所用的classes_path一致即可 19 | # 如果生成的2007_train.txt里面没有目标信息 20 | # 那么就是因为classes没有设定正确 21 | # 仅在annotation_mode为0和2的时候有效 22 | #-------------------------------------------------------------------# 23 | classes_path = r'F:\Desktop\PCB_code\PCB_DataSet\cls_classes.txt' 24 | #--------------------------------------------------------------------------------------------------------------------------------# 25 | # trainval_percent用于指定(训练集+验证集)与测试集的比例,默认情况下 (训练集+验证集):测试集 = 9:1 26 | # train_percent用于指定(训练集+验证集)中训练集与验证集的比例,默认情况下 训练集:验证集 = 9:1 27 | # 仅在annotation_mode为0和1的时候有效 28 | #--------------------------------------------------------------------------------------------------------------------------------# 29 | trainval_percent = 0.9 30 | train_percent = 0.9 31 | #-------------------------------------------------------# 32 | # 指向VOC数据集所在的文件夹 33 | # 默认指向根目录下的VOC数据集 34 | #-------------------------------------------------------# 35 | PCB_DataSet_path=r'PCB_DataSet' 36 | 37 | PCB_Data_Sets=['trainval','test'] 38 | classes, _ = get_classes(classes_path) 39 | 40 | #-------------------------------------------------------# 41 | # 统计目标数量 42 | #-------------------------------------------------------# 43 | photo_nums = np.zeros(len(PCB_Data_Sets)) 44 | nums = np.zeros(len(classes)) 45 | def convert_annotation(image_id, list_file): 46 | in_file = open(os.path.join(PCB_DataSet_path, 'Annotations/%s.xml'%(image_id)), encoding='utf-8') 47 | tree=ET.parse(in_file) 48 | root = tree.getroot() 49 | 50 | for obj in root.iter('object'): 51 | difficult = 0 52 | if obj.find('difficult')!=None: 53 | difficult = obj.find('difficult').text 54 | cls = obj.find('name').text 55 | if cls not in classes or int(difficult)==1: 56 | continue 57 | cls_id = classes.index(cls) 58 | xmlbox = obj.find('bndbox') 59 | b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text))) 60 | list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id)) 61 | 62 | nums[classes.index(cls)] = nums[classes.index(cls)] + 1 63 | 64 | if __name__ == "__main__": 65 | random.seed(0) 66 | if " " in os.path.abspath(PCB_DataSet_path): 67 | raise ValueError("数据集存放的文件夹路径与图片名称中不可以存在空格,否则会影响正常的模型训练,请注意修改。") 68 | 69 | if annotation_mode == 0 or annotation_mode == 1: 70 | print("Generate txt in ImageSets.") 71 | xmlfilepath = os.path.join(PCB_DataSet_path, 'Annotations') 72 | saveBasePath = os.path.join(PCB_DataSet_path, 'ImageSets') 73 | temp_xml = os.listdir(xmlfilepath) 74 | total_xml = [] 75 | for xml in temp_xml: 76 | if xml.endswith(".xml"): 77 | total_xml.append(xml) 78 | 79 | num = len(total_xml) 80 | list = range(num) 81 | tv = int(num*trainval_percent) 82 | tr = int(tv*train_percent) 83 | trainval= random.sample(list,tv) 84 | train = random.sample(trainval,tr) 85 | 86 | print("train and val size",tv) 87 | print("train size",tr) 88 | ftrainval = open(os.path.join(saveBasePath,'trainval.txt'), 'w') 89 | ftest = open(os.path.join(saveBasePath,'test.txt'), 'w') 90 | ftrain = open(os.path.join(saveBasePath,'train.txt'), 'w') 91 | fval = open(os.path.join(saveBasePath,'val.txt'), 'w') 92 | 93 | for i in list: 94 | name=total_xml[i][:-4]+'\n' 95 | if i in trainval: 96 | ftrainval.write(name) 97 | if i in train: 98 | ftrain.write(name) 99 | else: 100 | fval.write(name) 101 | else: 102 | ftest.write(name) 103 | 104 | ftrainval.close() 105 | ftrain.close() 106 | fval.close() 107 | ftest.close() 108 | print("Generate txt in ImageSets done.") 109 | 110 | if annotation_mode == 0 or annotation_mode == 2: 111 | print("Generate PCB_train.txt and PCB_val.txt for train.") 112 | type_index = 0 113 | for image_set in PCB_Data_Sets: 114 | image_ids = open(os.path.join(PCB_DataSet_path, 'ImageSets/%s.txt'%(image_set)), encoding='utf-8').read().strip().split() 115 | list_file = open(os.path.join(PCB_DataSet_path,'%s.txt'%( image_set)), 'w', encoding='utf-8')#保存训练集和测试集 116 | for image_id in image_ids: 117 | list_file.write('%s/JPEGImages/%s.jpg'%(os.path.abspath(PCB_DataSet_path),image_id))#在训练集和测试集中写入图片路径信息 118 | 119 | convert_annotation(image_id, list_file) 120 | list_file.write('\n') 121 | photo_nums[type_index] = len(image_ids) 122 | type_index += 1 123 | list_file.close() 124 | print("Generate PCB_train.txt and PCB_val.txt for train done.") 125 | 126 | def printTable(List1, List2): 127 | for i in range(len(List1[0])): 128 | print("|", end=' ') 129 | for j in range(len(List1)): 130 | print(List1[j][i].rjust(int(List2[j])), end=' ') 131 | print("|", end=' ') 132 | print() 133 | 134 | str_nums = [str(int(x)) for x in nums] 135 | tableData = [ 136 | classes, str_nums 137 | ] 138 | colWidths = [0]*len(tableData) 139 | len1 = 0 140 | for i in range(len(tableData)): 141 | for j in range(len(tableData[i])): 142 | if len(tableData[i][j]) > colWidths[i]: 143 | colWidths[i] = len(tableData[i][j]) 144 | printTable(tableData, colWidths) 145 | 146 | if photo_nums[0] <= 500: 147 | print("训练集数量小于500,属于较小的数据量,请注意设置较大的训练世代(Epoch)以满足足够的梯度下降次数(Step)。") 148 | 149 | if np.sum(nums) == 0: 150 | print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!") 151 | print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!") 152 | print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!") 153 | print("(重要的事情说三遍)。") 154 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 JiaQi Xu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Faster-Rcnn:PCB-component-defect-detection目标检测模型在Pytorch当中的实现 2 | --- 3 | 4 | ## 目录 5 | 1. [仓库更新 Top News](#仓库更新) 6 | 2. [性能情况 Performance](#性能情况) 7 | 3. [所需环境 Environment](#所需环境) 8 | 4. [文件下载 Download](#文件下载) 9 | 5. [预测步骤 How2predict](#预测步骤) 10 | 6. [训练步骤 How2train](#训练步骤) 11 | 7. [评估步骤 How2eval](#评估步骤) 12 | 8. [参考资料 Reference](#Reference) 13 | 14 | ## Top News 15 | **本项目支持step、cos学习率下降法、支持adam、sgd优化器选择、支持学习率根据batch_size自适应调整、新增图片裁剪。** 16 | **增加了大量注释、增加了大量可调整参数、对代码的组成模块进行修改、增加fps、视频预测、批量预测等功能。** 17 | 18 | ## 性能情况 19 | | 训练数据集 | 权值文件名称 | 测试数据集 | 输入图片大小 | mAP 0.5:0.95 | mAP 0.5 | 20 | | :-----: | :-----: | :------: | :------: | :------: | :-----: | 21 | | VOC07+12 | [voc_weights_resnet.pth](https://github.com/bubbliiiing/faster-rcnn-pytorch/releases/download/v1.0/voc_weights_resnet.pth) | VOC-Test07 | - | - | 80.36 22 | | VOC07+12 | [voc_weights_vgg.pth](https://github.com/bubbliiiing/faster-rcnn-pytorch/releases/download/v1.0/voc_weights_vgg.pth) | VOC-Test07 | - | - | 77.46 23 | **本代码中也用训练权重,读者可以不用下载。 24 | 25 | ## 所需环境 26 | torch == 1.2.0 27 | 28 | ## 文件下载 29 | 训练所需的voc_weights_resnet.pth或者voc_weights_vgg.pth以及主干的网络权重可以在百度云下载。 30 | voc_weights_resnet.pth是resnet为主干特征提取网络用到的; 31 | voc_weights_vgg.pth是vgg为主干特征提取网络用到的; 32 | 链接: https://pan.baidu.com/s/1S6wG8sEXBeoSec95NZxmlQ 33 | 提取码: 8mgp 34 | 35 | VOC数据集下载地址如下,里面已经包括了训练集、测试集、验证集(与测试集一样),无需再次划分: 36 | 链接: https://pan.baidu.com/s/1YuBbBKxm2FGgTU5OfaeC5A 37 | 提取码: uack 38 | **笔者是使用生产工厂,现场采集的PCB图片,故不能上传,读者需要自备数据集或者根据提供的数据集链接下载 39 | **在后续中,笔者以共用数据集VOC07+12数据集进行讲解, 40 | 41 | ## 训练步骤 42 | ### a、训练VOC07+12数据集 43 | 1. 数据集的准备 44 | **本文使用VOC格式进行训练,训练前需要下载好VOC07+12的数据集,解压后放在根目录** 45 | 46 | 2. 数据集的处理 47 | 修改voc_annotation.py里面的annotation_mode=2,运行voc_annotation.py生成根目录下的2007_train.txt和2007_val.txt。 48 | 49 | 3. 开始网络训练 50 | train.py的默认参数用于训练VOC数据集,直接运行train.py即可开始训练。 51 | 52 | 4. 训练结果预测 53 | 训练结果预测需要用到两个文件,分别是frcnn_predict.py和predict.py。我们首先需要去frcnn_predict.py里面修改model_path以及classes_path,这两个参数必须要修改。 54 | **model_path指向训练好的权值文件,在logs文件夹里。 55 | classes_path指向检测类别所对应的txt。** 56 | 完成修改后就可以运行predict.py进行检测了。运行后输入图片路径即可检测。 57 | 58 | ### b、训练自己的数据集 59 | 1. 数据集的准备 60 | **本文使用VOC格式进行训练,训练前需要自己制作好数据集,** 61 | 训练前将标签文件放在VOCdevkit文件夹下的VOC2007文件夹下的Annotation中。 62 | 训练前将图片文件放在VOCdevkit文件夹下的VOC2007文件夹下的JPEGImages中。 63 | 64 | 2. 数据集的处理 65 | 在完成数据集的摆放之后,我们需要利用voc_annotation.py获得训练用的2007_train.txt和2007_val.txt。 66 | 修改voc_annotation.py里面的参数。第一次训练可以仅修改classes_path,classes_path用于指向检测类别所对应的txt。 67 | 训练自己的数据集时,可以自己建立一个cls_classes.txt,里面写自己所需要区分的类别。 68 | model_data/cls_classes.txt文件内容为: 69 | ```python 70 | cat 71 | dog 72 | ... 73 | ``` 74 | 修改voc_annotation.py中的classes_path,使其对应cls_classes.txt,并运行voc_annotation.py。 75 | 76 | 3. 开始网络训练 77 | **训练的参数较多,均在train.py中,大家可以在下载库后仔细看注释,其中最重要的部分依然是train.py里的classes_path。** 78 | **classes_path用于指向检测类别所对应的txt,这个txt和voc_annotation.py里面的txt一样!训练自己的数据集必须要修改!** 79 | 修改完classes_path后就可以运行train.py开始训练了,在训练多个epoch后,权值会生成在logs文件夹中。 80 | 81 | 4. 训练结果预测 82 | 训练结果预测需要用到两个文件,分别是frcnn_predict.py和predict.py。在frcnn_predict.py里面修改model_path以及classes_path。 83 | **model_path指向训练好的权值文件,在logs文件夹里。 84 | classes_path指向检测类别所对应的txt。** 85 | 完成修改后就可以运行predict.py进行检测了。运行后输入图片路径即可检测。 86 | 87 | ## 预测步骤 88 | ### a、使用预训练权重 89 | 1. 下载完库后解压,在百度网盘下载frcnn_weights.pth,放入model_data,运行predict.py,输入 90 | ```python 91 | img/street.jpg 92 | ``` 93 | 2. 在predict.py里面进行设置可以进行fps测试和video视频检测。 94 | ### b、使用自己训练的权重 95 | 1. 按照训练步骤训练。 96 | 2. 在frcnn_predict.py文件里面,在如下部分修改model_path和classes_path使其对应训练好的文件;**model_path对应logs文件夹下面的权值文件,classes_path是model_path对应分的类**。 97 | 98 | 99 | 3. 运行predict.py,输入 100 | ```python 101 | img/street.jpg 102 | ``` 103 | 4.在predict.py里面进行设置可以进行fps测试和video视频检测。 104 | 105 | ## 评估步骤 106 | ### a、评估VOC07+12的测试集 107 | 1. 本文使用VOC格式进行评估。VOC07+12已经划分好了测试集,无需利用voc_annotation.py生成ImageSets文件夹下的txt。 108 | 2. 在frcnn.py里面修改model_path以及classes_path。**model_path指向训练好的权值文件,在logs文件夹里。classes_path指向检测类别所对应的txt。** 109 | 3. 运行get_map.py即可获得评估结果,评估结果会保存在map_out文件夹中。 110 | 111 | ### b、评估自己的数据集 112 | 1. 本文使用VOC格式进行评估。 113 | 2. 如果在训练前已经运行过voc_annotation.py文件,代码会自动将数据集划分成训练集、验证集和测试集。如果想要修改测试集的比例,可以修改voc_annotation.py文件下的trainval_percent。trainval_percent用于指定(训练集+验证集)与测试集的比例,默认情况下 (训练集+验证集):测试集 = 9:1。train_percent用于指定(训练集+验证集)中训练集与验证集的比例,默认情况下 训练集:验证集 = 9:1。 114 | 3. 利用voc_annotation.py划分测试集后,前往get_map.py文件修改classes_path,classes_path用于指向检测类别所对应的txt,这个txt和训练时的txt一样。评估自己的数据集必须要修改。 115 | 4. 在frcnn_predict.py里面修改model_path以及classes_path。**model_path指向训练好的权值文件,在logs文件夹里。classes_path指向检测类别所对应的txt。** 116 | 5. 运行get_map.py即可获得评估结果,评估结果会保存在map_out文件夹中。 117 | -------------------------------------------------------------------------------- /data_expansion.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """数据增强 3 | 1. 翻转变换 flip 4 | 2. 图片裁剪 crop 5 | 3. 色彩抖动 color jittering 6 | 4. 平移变换 shift 7 | 5. 尺度变换 scale 8 | 6. 对比度变换 contrast 9 | 7. 噪声扰动 noise 10 | 8. 旋转变换/反射变换 Rotation/reflection 11 | 9.直方图增强 12 | 10.拉普拉斯算子 13 | 11.对数变换 14 | 12.伽马变换 15 | 13.限制对比度自适应直方图均衡化CLAHE 16 | 14.retinex SSR 17 | 15.retinex MMR 18 | 16. 19 | 20 | """ 21 | 22 | import logging 23 | import os 24 | import random 25 | import threading 26 | import time 27 | from dataclasses import dataclass 28 | from distutils.log import error 29 | 30 | import cv2 31 | import numpy as np 32 | from PIL import Image, ImageEnhance, ImageFile 33 | 34 | 35 | # 图片裁剪 36 | def read_path(file_pathname): 37 | 38 | for filename in os.listdir(file_pathname): 39 | # print(filename) 40 | img_filename = os.path.join(file_pathname, filename) #将图片路径与图片名进行拼接 41 | 42 | img = cv2.imread(img_filename) #img_path为图片所在路径 43 | crop_img = img[0:3585,0:3629] #x0,y0为裁剪区域左上坐标;x1,y1为裁剪区域右下坐标(y0:y1,x0:x1) 44 | 45 | #####save figure 46 | # cv2.imwrite(r'date_set\data_source1'+"/"+filename,crop_img) 47 | cv2.imwrite(r'jixing\polarity'+"/"+filename,crop_img) 48 | 49 | 50 | logger = logging.getLogger(__name__) 51 | ImageFile.LOAD_TRUNCATED_IMAGES = True 52 | 53 | 54 | class DataAugmentation: 55 | """ 56 | 包含数据增强的八种方式 57 | """ 58 | 59 | def __init__(self): 60 | pass 61 | 62 | @staticmethod 63 | def openImage(image): 64 | img=cv2.imread(image) 65 | return img 66 | 67 | @staticmethod 68 | def randomRotation(image, center=None, scale=1.0): #mode=Image.BICUBIC 69 | """ 70 | 对图像进行随机任意角度(0~360度)旋转 71 | :return: 旋转转之后的图像 72 | """ 73 | random_angle = np.random.randint(-180, 180) 74 | (h, w) = image.shape[:2] 75 | # If no rotation center is specified, the center of the image is set as the rotation center 76 | if center is None: 77 | center = (w / 2, h / 2) 78 | m = cv2.getRotationMatrix2D(center, random_angle, scale) #center:旋转中心坐标.angle:旋转角度,负号为逆时针,正号为顺时针.scale:缩放比例,1为等比例缩放 79 | rotated = cv2.warpAffine(image, m, (w, h)) 80 | return rotated 81 | 82 | @staticmethod 83 | def transpose(image): 84 | """ 85 | 水平垂直翻转 86 | :return: 旋转转之后的图像 87 | """ 88 | random_angle = np.random.randint(-2, 2) #取[-1,1]的随机整数 89 | img_filp=cv2.flip(image,random_angle) 90 | return img_filp 91 | 92 | '''噪声抖动''' 93 | 94 | @staticmethod 95 | def randomColor(image): 96 | """ 97 | 对图像进行颜色抖动 98 | :param image: PIL的图像image 99 | :return: 有颜色色差的图像image 100 | """ 101 | saturation=random.randint(0,1) 102 | brightness=random.randint(0,1) 103 | contrast=random.randint(0,1) 104 | sharpness=random.randint(0,1) 105 | image=Image.fromarray(cv2.cvtColor(image,cv2.COLOR_BGR2RGB)) #转化为PIL.Image对象,才能使用ImageEnhance.Brightness(image) 106 | if random.random() < saturation: 107 | random_factor = np.random.randint(0, 31) / 10. # 随机因子 108 | image = ImageEnhance.Color(image).enhance(random_factor) # 调整图像的饱和度 109 | if random.random() < brightness: 110 | random_factor = np.random.randint(10, 21) / 10. # 随机因子 111 | image = ImageEnhance.Brightness(image).enhance(random_factor) # 调整图像的亮度 112 | if random.random() < contrast: 113 | random_factor = np.random.randint(10, 21) / 10. # 随机因1子 114 | image = ImageEnhance.Contrast(image).enhance(random_factor) # 调整图像对比度 115 | if random.random() < sharpness: 116 | random_factor = np.random.randint(0, 31) / 10. # 随机因子 117 | image= ImageEnhance.Sharpness(image).enhance(random_factor) # 调整图像锐度 118 | image=cv2.cvtColor(np.asarray(image),cv2.COLOR_RGB2BGR) #转换为cv格式 119 | return image 120 | 121 | @staticmethod 122 | def randomGaussian(image, mean=0.2, sigma=0.04): 123 | """ 124 | 对图像进行高斯噪声处理 125 | mean:设置高斯分布的均值和方差 126 | sigma:设置高斯分布的标准差,sigma值越大,噪声越多 127 | 128 | 返回: 129 | gaussian_out : 噪声处理后的图片 130 | """ 131 | # 将图片灰度标准化 132 | img = image / 255 133 | # 产生高斯 noise 134 | noise = np.random.normal(mean, sigma, img.shape) 135 | # 将噪声和图片叠加 136 | gaussian_out = img + noise 137 | # 将超过 1 的置 1,低于 0 的置 0 138 | gaussian_out = np.clip(gaussian_out, 0, 1) 139 | # 将图片灰度范围的恢复为 0-255 140 | gaussian_out = np.uint8(gaussian_out*255) 141 | # 将噪声范围搞为 0-255 142 | # noise = np.uint8(noise*255) 143 | return gaussian_out 144 | 145 | @staticmethod 146 | def Pepper_noise(image): 147 | ''' 148 | 椒盐噪声 149 | ''' 150 | #设置添加椒盐噪声的数目比例 151 | s_vs_p = 0.04 152 | #设置添加噪声图像像素的数目 153 | amount =0.03 154 | noisy_img = np.copy(image) 155 | #添加salt噪声 156 | num_salt = np.ceil(amount * image.size * s_vs_p) 157 | #设置添加噪声的坐标位置 158 | coords = [np.random.randint(0,i - 1, int(num_salt)) for i in image.shape] 159 | noisy_img[tuple(coords)] = 255 160 | #添加pepper噪声 161 | num_pepper = np.ceil(amount * image.size * (1. - s_vs_p)) 162 | #设置添加噪声的坐标位置 163 | coords = [np.random.randint(0,i - 1, int(num_pepper)) for i in image.shape] 164 | noisy_img[tuple (coords)] = 0 165 | return noisy_img 166 | 167 | @staticmethod 168 | def Poisson_noise(image): 169 | '''泊松噪声''' 170 | 171 | #计算图像像素的分布范围 172 | vals = len(np.unique(image)) 173 | vals = 2 ** np.ceil(np.log2(vals)) 174 | #给图片添加泊松噪声 175 | noisy_img = np.random.poisson(image * vals) / float(vals) 176 | return noisy_img 177 | 178 | '''图像增强算法''' 179 | 180 | @staticmethod 181 | def hist(image): 182 | '''直方图均衡增强''' 183 | r, g, b = cv2.split(image) 184 | r1 = cv2.equalizeHist(r) 185 | g1 = cv2.equalizeHist(g) 186 | b1 = cv2.equalizeHist(b) 187 | image_equal_clo = cv2.merge([r1, g1, b1]) 188 | return image_equal_clo 189 | 190 | @staticmethod 191 | def laplacian(image): 192 | '''拉普拉斯算子''' 193 | kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]]) 194 | image_lap = cv2.filter2D(image, cv2.CV_8UC3, kernel) 195 | return image_lap 196 | 197 | @staticmethod 198 | def log(image): 199 | '''对数变换''' 200 | image_log = np.uint8(np.log(np.array(image) + 1)) 201 | cv2.normalize(image_log, image_log, 0, 255, cv2.NORM_MINMAX) 202 | # 转换成8bit图像显示 203 | cv2.convertScaleAbs(image_log, image_log) 204 | return image_log 205 | 206 | @staticmethod 207 | def gamma(image): 208 | '''伽马变换''' 209 | fgamma = 0.5 #数值越大,生成的图片越黑 210 | image_gamma = np.uint8(np.power((np.array(image) / 255.0), fgamma) * 255.0) 211 | cv2.normalize(image_gamma, image_gamma, 0, 255, cv2.NORM_MINMAX) 212 | cv2.convertScaleAbs(image_gamma, image_gamma) 213 | return image_gamma 214 | 215 | @staticmethod 216 | def clahe(image): 217 | '''# 限制对比度自适应直方图均衡化CLAHE''' 218 | b, g, r = cv2.split(image) 219 | clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) 220 | b = clahe.apply(b) 221 | g = clahe.apply(g) 222 | r = clahe.apply(r) 223 | image_clahe = cv2.merge([b, g, r]) 224 | return image_clahe 225 | 226 | def __replaceZeroes(data): 227 | min_nonzero = min(data[np.nonzero(data)]) 228 | data[data == 0] = min_nonzero 229 | return data 230 | 231 | def __SSR(src_img, size): 232 | 233 | L_blur = cv2.GaussianBlur(src_img, (size, size), 0) 234 | img =DataAugmentation.__replaceZeroes(src_img) 235 | 236 | L_blur =DataAugmentation. __replaceZeroes(L_blur) 237 | 238 | dst_Img = cv2.log(img/255.0) 239 | dst_Lblur = cv2.log(L_blur/255.0) 240 | dst_IxL = cv2.multiply(dst_Img, dst_Lblur) 241 | log_R = cv2.subtract(dst_Img, dst_IxL) 242 | 243 | dst_R = cv2.normalize(log_R,None, 0, 255, cv2.NORM_MINMAX) 244 | log_uint8 = cv2.convertScaleAbs(dst_R) 245 | return log_uint8 246 | 247 | @staticmethod 248 | def SSR_image(image): 249 | '''SSR_image''' 250 | size = 3 251 | b_gray, g_gray, r_gray = cv2.split(image) 252 | b_gray =DataAugmentation.__SSR(b_gray, size) 253 | g_gray =DataAugmentation.__SSR(g_gray, size) 254 | r_gray =DataAugmentation.__SSR(r_gray, size) 255 | result = cv2.merge([b_gray, g_gray, r_gray]) 256 | return result 257 | 258 | # retinex MSR 259 | def __MSR(img, scales): 260 | weight = 2 / 3.0 261 | scales_size = len(scales) 262 | h, w = img.shape[:2] 263 | log_R = np.zeros((h, w), dtype=np.float32) 264 | 265 | for i in range(scales_size): 266 | img =DataAugmentation. __replaceZeroes(img) 267 | L_blur = cv2.GaussianBlur(img, (scales[i], scales[i]), 0) 268 | L_blur =DataAugmentation. __replaceZeroes(L_blur) 269 | dst_Img = cv2.log(img/255.0) 270 | dst_Lblur = cv2.log(L_blur/255.0) 271 | dst_Ixl = cv2.multiply(dst_Img, dst_Lblur) 272 | log_R += weight * cv2.subtract(dst_Img, dst_Ixl) 273 | 274 | dst_R = cv2.normalize(log_R,None, 0, 255, cv2.NORM_MINMAX) 275 | log_uint8 = cv2.convertScaleAbs(dst_R) 276 | return log_uint8 277 | 278 | @staticmethod 279 | def MSR_image(image): 280 | '''MSR_image''' 281 | scales = [15, 101, 301] # [3,5,9] 282 | b_gray, g_gray, r_gray = cv2.split(image) 283 | b_gray =DataAugmentation.__MSR(b_gray, scales) 284 | g_gray =DataAugmentation. __MSR(g_gray, scales) 285 | r_gray =DataAugmentation. __MSR(r_gray, scales) 286 | result = cv2.merge([b_gray, g_gray, r_gray]) 287 | return result 288 | 289 | 290 | def imageOps(func_name, image1, img_des_path, img_file_name, times=1): #times=1每种方式,每张图片运行一次 291 | funcMap = {#"randomRotation": DataAugmentation.randomRotation, 292 | "randomcolor": DataAugmentation.randomColor,"transpose": DataAugmentation.transpose, 293 | "randomGaussian": DataAugmentation.randomGaussian, "pepper_noise": DataAugmentation.Pepper_noise, 294 | "Poisson_noise": DataAugmentation.Poisson_noise, "hist": DataAugmentation.hist, 295 | "laplacian": DataAugmentation.laplacian,"log": DataAugmentation.log, 296 | "gamma": DataAugmentation.gamma, "clahe": DataAugmentation.clahe, 297 | "SSR_image": DataAugmentation.SSR_image, "MSR_image": DataAugmentation.MSR_image 298 | } 299 | if funcMap.get(func_name) is None: 300 | logger.error("%s is not exist", func_name) 301 | return -1 302 | 303 | for _i in range(0, times, 1): 304 | new_image = funcMap[func_name](image1) #经过变化后的图片 305 | # print('new_image:',new_image) 306 | # path=os.path.join(img_des_path, func_name + str(_i) + img_file_name) #存图的新名字 307 | path=os.path.join(img_des_path, img_file_name) 308 | # print('new_filename:',path) 309 | cv2.imwrite (path,new_image) 310 | 311 | 312 | # opsList = {"transpose",'randomcolor',"gamma","MSR_image","pepper_noise","hist","log","clahe",'randomGaussian', 313 | # 'Poisson_noise','laplacian','SSR_image'} 314 | opsList = {"clahe"} #clahe图像增强效果较好 315 | 316 | def threadOPS(img_path, new_img_path): 317 | """ 318 | 多线程处理事务 319 | :param src_path: 源文件 320 | :param des_path: 存放文件 321 | :return: 322 | """ 323 | #img path 324 | if os.path.isdir(img_path): 325 | img_names = os.listdir(img_path) 326 | # print('img_names值为:',img_names) 327 | else: 328 | img_names = [img_path] 329 | # print('img_names1值为:',img_names) 330 | 331 | img_num = 0 332 | 333 | #img num 334 | for img_name in img_names: 335 | tmp_img_name = os.path.join(img_path, img_name) 336 | if os.path.isdir(tmp_img_name): 337 | print('contain file folder') 338 | exit() 339 | else: 340 | img_num = img_num + 1 341 | num = img_num 342 | # print("num数值为:",num ) 343 | 344 | 345 | for i in range(num): 346 | img_name = img_names[i] 347 | # print("img_name:",img_name) 348 | tmp_img_name = os.path.join(img_path, img_name) 349 | # 读取文件并进行操作 350 | image1 = DataAugmentation.openImage(tmp_img_name) 351 | # print("读取文件image:",image1) 352 | 353 | # threadImage =[0] * 12 #定义一个元组,其长度为12. 354 | threadImage ={} #定义为空字典类型。用来装线程结果信息 355 | _index = 0 356 | for ops_name in opsList: 357 | # print("ops_name:",ops_name) 358 | #创建一个新线程 359 | threadImage[_index] = threading.Thread(target=imageOps, 360 | args=(ops_name, image1, new_img_path,img_name)) 361 | print('threadImage[{}]:{}'.format(_index,threadImage)) 362 | threadImage[_index].start() #启动线程 363 | _index += 1 #显示每个线程的起停位置 364 | time.sleep(0.2) #线程执行的时间 365 | 366 | 367 | if __name__ == '__main__': 368 | threadOPS(#r"F:\Desktop\PCB_code\date_set\1shujuchuli", 369 | #r"F:\Desktop\PCB_code\date_set\2shujucunfang" 370 | r'F:\Desktop\PCB_code\data_set1\data_shiyan', 371 | r'F:\Desktop\PCB_code\data_set1\data_shiyan_kuochong') 372 | 373 | # read_path(r'F:\Desktop\PCB_code\data_set1\data_shiyan') #图片裁剪 374 | 375 | 376 | ''' 377 | 路径问题: 378 | 关于上述路径中,\table\name\rain中的\t,\n,\r都易被识别为转义字符。 379 | 解决的办法主要由以下三种: 380 | #1 381 | path=r"C:\data\table\name\rain" 382 | #前面加r表示不转义 383 | 384 | #2 385 | path="C:\\data\\table\\name\\rain" 386 | #用\\代替\ 387 | 388 | #3 389 | path="C:/data/table/name/rain" 390 | #用\代替/ 391 | 392 | ''' 393 | 394 | -------------------------------------------------------------------------------- /get_map.py: -------------------------------------------------------------------------------- 1 | import os 2 | import xml.etree.ElementTree as ET 3 | 4 | from PIL import Image 5 | from tqdm import tqdm 6 | 7 | from utils.utils import get_classes 8 | from utils.utils_map import get_coco_map, get_map 9 | from frcnn_predict import FRCNN 10 | 11 | if __name__ == "__main__": 12 | ''' 13 | Recall和Precision不像AP是一个面积的概念,因此在门限值(Confidence)不同时,网络的Recall和Precision值是不同的。 14 | 默认情况下,本代码计算的Recall和Precision代表的是当门限值(Confidence)为0.5时,所对应的Recall和Precision值。 15 | 16 | 受到mAP计算原理的限制,网络在计算mAP时需要获得近乎所有的预测框,这样才可以计算不同门限条件下的Recall和Precision值 17 | 因此,本代码获得的map_out/detection-results/里面的txt的框的数量一般会比直接predict多一些,目的是列出所有可能的预测框, 18 | ''' 19 | #------------------------------------------------------------------------------------------------------------------# 20 | # map_mode用于指定该文件运行时计算的内容 21 | # map_mode为0代表整个map计算流程,包括获得预测结果、获得真实框、计算VOC_map。 22 | # map_mode为1代表仅仅获得预测结果。 23 | # map_mode为2代表仅仅获得真实框。 24 | # map_mode为3代表仅仅计算VOC_map。 25 | # map_mode为4代表利用COCO工具箱计算当前数据集的0.50:0.95map。需要获得预测结果、获得真实框后并安装pycocotools才行 26 | #-------------------------------------------------------------------------------------------------------------------# 27 | map_mode = 0 28 | #--------------------------------------------------------------------------------------# 29 | # 此处的classes_path用于指定需要测量VOC_map的类别 30 | # 一般情况下与训练和预测所用的classes_path一致即可 31 | #--------------------------------------------------------------------------------------# 32 | classes_path = 'PCB_DataSet/cls_classes.txt' 33 | #--------------------------------------------------------------------------------------# 34 | # MINOVERLAP用于指定想要获得的mAP0.x,mAP0.x的意义是什么请同学们百度一下。 35 | # 比如计算mAP0.75,可以设定MINOVERLAP = 0.75。 36 | # 37 | # 当某一预测框与真实框重合度大于MINOVERLAP时,该预测框被认为是正样本,否则为负样本。 38 | # 因此MINOVERLAP的值越大,预测框要预测的越准确才能被认为是正样本,此时算出来的mAP值越低, 39 | #--------------------------------------------------------------------------------------# 40 | MINOVERLAP = 0.5 41 | #--------------------------------------------------------------------------------------# 42 | # 受到mAP计算原理的限制,网络在计算mAP时需要获得近乎所有的预测框,这样才可以计算mAP 43 | # 因此,confidence的值应当设置的尽量小进而获得全部可能的预测框。 44 | # 45 | # 该值一般不调整。因为计算mAP需要获得近乎所有的预测框,此处的confidence不能随便更改。 46 | # 想要获得不同门限值下的Recall和Precision值,请修改下方的score_threhold。 47 | #--------------------------------------------------------------------------------------# 48 | confidence = 0.02 49 | #--------------------------------------------------------------------------------------# 50 | # 预测时使用到的非极大抑制值的大小,越大表示非极大抑制越不严格。 51 | # 52 | # 该值一般不调整。 53 | #--------------------------------------------------------------------------------------# 54 | nms_iou = 0.5 55 | #---------------------------------------------------------------------------------------------------------------# 56 | # Recall和Precision不像AP是一个面积的概念,因此在门限值不同时,网络的Recall和Precision值是不同的。 57 | # 58 | # 默认情况下,本代码计算的Recall和Precision代表的是当门限值为0.5(此处定义为score_threhold)时所对应的Recall和Precision值。 59 | # 因为计算mAP需要获得近乎所有的预测框,上面定义的confidence不能随便更改。 60 | # 这里专门定义一个score_threhold用于代表门限值,进而在计算mAP时找到门限值对应的Recall和Precision值。 61 | #---------------------------------------------------------------------------------------------------------------# 62 | score_threhold = 0.5 63 | #-------------------------------------------------------# 64 | # map_vis用于指定是否开启VOC_map计算的可视化 65 | #-------------------------------------------------------# 66 | map_vis = False 67 | #-------------------------------------------------------# 68 | # 指向VOC数据集所在的文件夹 69 | # 默认指向根目录下的VOC数据集 70 | #-------------------------------------------------------# 71 | VOCdevkit_path = r'F:\Desktop\PCB_code\PCB_DataSet' 72 | #-------------------------------------------------------# 73 | # 结果输出的文件夹,默认为map_out 74 | #-------------------------------------------------------# 75 | map_out_path = 'faster-rcnn-pytorch-master/map_out' 76 | 77 | image_ids = open(os.path.join(VOCdevkit_path, "ImageSets/test.txt")).read().strip().split() 78 | 79 | if not os.path.exists(map_out_path): 80 | os.makedirs(map_out_path) 81 | if not os.path.exists(os.path.join(map_out_path, 'ground-truth')): 82 | os.makedirs(os.path.join(map_out_path, 'ground-truth')) 83 | if not os.path.exists(os.path.join(map_out_path, 'detection-results')): 84 | os.makedirs(os.path.join(map_out_path, 'detection-results')) 85 | if not os.path.exists(os.path.join(map_out_path, 'images-optional')): 86 | os.makedirs(os.path.join(map_out_path, 'images-optional')) 87 | 88 | class_names, _ = get_classes(classes_path) 89 | 90 | if map_mode == 0 or map_mode == 1: 91 | print("Load model.") 92 | frcnn = FRCNN(confidence = confidence, nms_iou = nms_iou) 93 | print("Load model done.") 94 | 95 | print("Get predict result.") 96 | for image_id in tqdm(image_ids): 97 | image_path = os.path.join(VOCdevkit_path, "JPEGImages/"+image_id+".jpg") 98 | image = Image.open(image_path) 99 | if map_vis: 100 | image.save(os.path.join(map_out_path, "images-optional/" + image_id + ".jpg")) 101 | frcnn.get_map_txt(image_id, image, class_names, map_out_path) 102 | print("Get predict result done.") 103 | 104 | if map_mode == 0 or map_mode == 2: 105 | print("Get ground truth result.") 106 | for image_id in tqdm(image_ids): 107 | with open(os.path.join(map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f: 108 | root = ET.parse(os.path.join(VOCdevkit_path, "Annotations/"+image_id+".xml")).getroot() 109 | for obj in root.findall('object'): 110 | difficult_flag = False 111 | if obj.find('difficult')!=None: 112 | difficult = obj.find('difficult').text 113 | if int(difficult)==1: 114 | difficult_flag = True 115 | obj_name = obj.find('name').text 116 | if obj_name not in class_names: 117 | continue 118 | bndbox = obj.find('bndbox') 119 | left = bndbox.find('xmin').text 120 | top = bndbox.find('ymin').text 121 | right = bndbox.find('xmax').text 122 | bottom = bndbox.find('ymax').text 123 | 124 | if difficult_flag: 125 | new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom)) 126 | else: 127 | new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom)) 128 | print("Get ground truth result done.") 129 | 130 | if map_mode == 0 or map_mode == 3: 131 | print("Get map.") 132 | get_map(MINOVERLAP, True, score_threhold = score_threhold, path = map_out_path) 133 | print("Get map done.") 134 | 135 | if map_mode == 4: 136 | print("Get map.") 137 | get_coco_map(class_names = class_names, path = map_out_path) 138 | print("Get map done.") 139 | -------------------------------------------------------------------------------- /nets/FasterRCNN_train.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | 4 | from nets.classifier import Resnet50RoIHead, Resnet101RoIHead, VGG16RoIHead,Resnet50_FPNRoIHead 5 | from nets.vgg16 import decom_vgg16 6 | from nets.resnet50 import resnet50 7 | from nets.resnet101 import resnet101 8 | from nets.resnet50_FPN import resnet50_FPN 9 | from nets.rpn import RegionProposalNetwork, resnet50_fpn_RPNhead 10 | 11 | 12 | 13 | class FasterRCNN(nn.Module): 14 | def __init__(self, num_classes, 15 | mode = "training", 16 | feat_stride = 16, 17 | anchor_scales = [4, 16, 32], 18 | ratios = [0.5, 1, 2], 19 | backbone = 'vgg', 20 | pretrained = False): 21 | super(FasterRCNN, self).__init__() #对继承自父类的属性进行初始化,且用父类的初始化方法来初始化继承的属性 22 | self.feat_stride = feat_stride 23 | #---------------------------------# 24 | # vgg和resnet50,resnet101,resnet50_FPN主干网络 25 | #---------------------------------# 26 | if backbone == 'vgg': 27 | self.extractor, classifier = decom_vgg16(pretrained) 28 | #---------------------------------# 29 | # 构建建议框网络 30 | #---------------------------------# 31 | self.rpn = RegionProposalNetwork( 32 | 512, 512, 33 | ratios = ratios, 34 | anchor_scales = anchor_scales, 35 | feat_stride = self.feat_stride, 36 | mode = mode 37 | ) 38 | #---------------------------------# 39 | # 构建分类器网络 40 | #---------------------------------# 41 | self.head = VGG16RoIHead( 42 | n_class = num_classes + 1, 43 | roi_size = 7, 44 | spatial_scale = 1, 45 | classifier = classifier 46 | ) 47 | 48 | elif backbone == 'resnet50': 49 | # 获得图像的特征层和分类层特征信息 50 | self.extractor, classifier = resnet50(pretrained) 51 | #---------------------------------# 52 | # 构建建议框Proposal卷积网络 53 | #---------------------------------# 54 | self.rpn = RegionProposalNetwork( 55 | 1024, 512, 56 | ratios = ratios, 57 | anchor_scales = anchor_scales, 58 | feat_stride = self.feat_stride, 59 | mode = mode 60 | ) 61 | #---------------------------------# 62 | # 构建classifier网络 63 | #---------------------------------# 64 | self.head = Resnet50RoIHead( 65 | n_class = num_classes + 1, 66 | roi_size = 14, 67 | spatial_scale = 1, 68 | classifier = classifier 69 | ) 70 | 71 | elif backbone=='resnet101': 72 | self.extractor, classifier = resnet101(pretrained) 73 | #---------------------------------# 74 | # 构建建议框Proposal卷积网络 75 | #---------------------------------# 76 | self.rpn = RegionProposalNetwork( 77 | 1024, 512, 78 | ratios = ratios, 79 | anchor_scales = anchor_scales, 80 | feat_stride = self.feat_stride, 81 | mode = mode 82 | ) 83 | #---------------------------------# 84 | # 构建classifier网络 85 | #---------------------------------# 86 | self.head = Resnet101RoIHead( 87 | n_class = num_classes + 1, 88 | roi_size = 14, 89 | spatial_scale = 1, 90 | classifier = classifier) 91 | 92 | elif backbone=='resnet50_FPN': 93 | self.extractor, classifier = resnet50_FPN(pretrained) 94 | #---------------------------------# 95 | # 构建建议框Proposal卷积网络 96 | #---------------------------------# 97 | ratios = ratios*len(anchor_scales) 98 | self.rpn = resnet50_fpn_RPNhead( 99 | 256, 256, 100 | ratios = ratios, 101 | anchor_scales = anchor_scales, 102 | feat_stride = self.feat_stride, 103 | mode = mode 104 | ) 105 | #---------------------------------# 106 | # 构建classifier网络 107 | #---------------------------------# 108 | self.head = Resnet50_FPNRoIHead( 109 | n_class = num_classes + 1, 110 | roi_size = 14, 111 | spatial_scale = 1, 112 | classifier = classifier) 113 | 114 | #x= [base_feature, img_size],在Suggestion_box.FasterRCNNTrainer.forward()产生 115 | def forward(self, x, scale=1., mode="forward"): 116 | if mode == "forward": 117 | #---------------------------------# 118 | # 计算输入图片的大小 119 | #---------------------------------# 120 | img_size = x.shape[2:] 121 | #---------------------------------# 122 | # 利用主干网络提取特征 123 | #---------------------------------# 124 | base_feature = self.extractor.forward(x) 125 | 126 | #---------------------------------# 127 | # 获得建议框 128 | #---------------------------------# 129 | _, _, rois, roi_indices, _ = self.rpn.forward(base_feature, img_size, scale) 130 | #---------------------------------------# 131 | # 获得classifier的分类结果和回归结果 132 | #---------------------------------------# 133 | roi_cls_locs, roi_scores = self.head.forward(base_feature, rois, roi_indices, img_size) 134 | return roi_cls_locs, roi_scores, rois, roi_indices 135 | elif mode == "extractor": 136 | #---------------------------------# 137 | # 利用主干网络提取特征,resnet50网络特征提取 138 | #---------------------------------# 139 | base_feature = self.extractor.forward(x) 140 | return base_feature 141 | elif mode == "rpn": 142 | base_feature, img_size = x 143 | #---------------------------------# 144 | # 获得建议框 145 | #---------------------------------# 146 | rpn_locs, rpn_scores, rois, roi_indices, anchor = self.rpn.forward(base_feature, img_size, scale) 147 | return rpn_locs, rpn_scores, rois, roi_indices, anchor 148 | elif mode == "head": 149 | base_feature, rois, roi_indices, img_size = x 150 | #---------------------------------------# 151 | # 获得classifier的分类结果和回归结果 152 | #---------------------------------------# 153 | roi_cls_locs, roi_scores = self.head.forward(base_feature, rois, roi_indices, img_size) 154 | return roi_cls_locs, roi_scores 155 | elif mode == "fpn_head": 156 | base_feature, rois, roi_indices, img_size = x 157 | #---------------------------------------# 158 | # 获得classifier的分类结果和回归结果 159 | # 取p2~p5层进行分类结果预测 160 | #---------------------------------------# 161 | roi_cls_locs, roi_scores = self.head.forward(base_feature[:4], rois[:4], roi_indices[:4], img_size) 162 | return roi_cls_locs, roi_scores 163 | 164 | def freeze_bn(self): 165 | for m in self.modules(): 166 | if isinstance(m, nn.BatchNorm2d): 167 | m.eval() 168 | -------------------------------------------------------------------------------- /nets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------- /nets/__pycache__/FasterRCNN_train.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/FasterRCNN_train.cpython-37.pyc -------------------------------------------------------------------------------- /nets/__pycache__/FasterRCNN_train.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/FasterRCNN_train.cpython-39.pyc -------------------------------------------------------------------------------- /nets/__pycache__/Suggestion_box.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/Suggestion_box.cpython-37.pyc -------------------------------------------------------------------------------- /nets/__pycache__/Suggestion_box.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/Suggestion_box.cpython-39.pyc -------------------------------------------------------------------------------- /nets/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /nets/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /nets/__pycache__/classifier.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/classifier.cpython-37.pyc -------------------------------------------------------------------------------- /nets/__pycache__/classifier.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/classifier.cpython-39.pyc -------------------------------------------------------------------------------- /nets/__pycache__/faster_rcnn_feature_extraction.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/faster_rcnn_feature_extraction.cpython-39.pyc -------------------------------------------------------------------------------- /nets/__pycache__/feature_extraction.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/feature_extraction.cpython-39.pyc -------------------------------------------------------------------------------- /nets/__pycache__/feature_pyramid_network.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/feature_pyramid_network.cpython-39.pyc -------------------------------------------------------------------------------- /nets/__pycache__/frcnn.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/frcnn.cpython-39.pyc -------------------------------------------------------------------------------- /nets/__pycache__/frcnn_training.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/frcnn_training.cpython-39.pyc -------------------------------------------------------------------------------- /nets/__pycache__/resnet101.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/resnet101.cpython-37.pyc -------------------------------------------------------------------------------- /nets/__pycache__/resnet101.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/resnet101.cpython-39.pyc -------------------------------------------------------------------------------- /nets/__pycache__/resnet50.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/resnet50.cpython-37.pyc -------------------------------------------------------------------------------- /nets/__pycache__/resnet50.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/resnet50.cpython-39.pyc -------------------------------------------------------------------------------- /nets/__pycache__/resnet50_FPN.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/resnet50_FPN.cpython-37.pyc -------------------------------------------------------------------------------- /nets/__pycache__/resnet50_FPN.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/resnet50_FPN.cpython-39.pyc -------------------------------------------------------------------------------- /nets/__pycache__/rpn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/rpn.cpython-37.pyc -------------------------------------------------------------------------------- /nets/__pycache__/rpn.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/rpn.cpython-39.pyc -------------------------------------------------------------------------------- /nets/__pycache__/vgg16.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/vgg16.cpython-37.pyc -------------------------------------------------------------------------------- /nets/__pycache__/vgg16.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/nets/__pycache__/vgg16.cpython-39.pyc -------------------------------------------------------------------------------- /nets/classifier.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import torch 4 | from torch import nn 5 | from torchvision.ops import RoIPool,MultiScaleRoIAlign,RoIAlign 6 | from collections import OrderedDict 7 | 8 | warnings.filterwarnings("ignore") 9 | 10 | class VGG16RoIHead(nn.Module): 11 | def __init__(self, n_class, roi_size, spatial_scale, classifier): 12 | super(VGG16RoIHead, self).__init__() 13 | self.classifier = classifier 14 | #--------------------------------------# 15 | # 对ROIPooling后的的结果进行回归预测 16 | #--------------------------------------# 17 | self.cls_loc = nn.Linear(4096, n_class * 4) 18 | #-----------------------------------# 19 | # 对ROIPooling后的的结果进行分类 20 | #-----------------------------------# 21 | self.score = nn.Linear(4096, n_class) 22 | #-----------------------------------# 23 | # 权值初始化 24 | #-----------------------------------# 25 | normal_init(self.cls_loc, 0, 0.001) 26 | normal_init(self.score, 0, 0.01) 27 | 28 | self.roi = RoIPool((roi_size, roi_size), spatial_scale) 29 | 30 | def forward(self, x, rois, roi_indices, img_size): 31 | n, _, _, _ = x.shape 32 | if x.is_cuda: 33 | roi_indices = roi_indices.cuda() 34 | rois = rois.cuda() 35 | rois = torch.flatten(rois, 0, 1) 36 | roi_indices = torch.flatten(roi_indices, 0, 1) 37 | 38 | rois_feature_map = torch.zeros_like(rois) 39 | rois_feature_map[:, [0,2]] = rois[:, [0,2]] / img_size[1] * x.size()[3] 40 | rois_feature_map[:, [1,3]] = rois[:, [1,3]] / img_size[0] * x.size()[2] 41 | 42 | indices_and_rois = torch.cat([roi_indices[:, None], rois_feature_map], dim=1) 43 | #-----------------------------------# 44 | # 利用建议框对公用特征层进行截取 45 | #-----------------------------------# 46 | pool = self.roi(x, indices_and_rois) 47 | #-----------------------------------# 48 | # 利用classifier网络进行特征提取 49 | #-----------------------------------# 50 | pool = pool.view(pool.size(0), -1) 51 | #--------------------------------------------------------------# 52 | # 当输入为一张图片的时候,这里获得的f7的shape为[300, 4096] 53 | #--------------------------------------------------------------# 54 | fc7 = self.classifier(pool) 55 | 56 | roi_cls_locs = self.cls_loc(fc7) 57 | roi_scores = self.score(fc7) 58 | 59 | roi_cls_locs = roi_cls_locs.view(n, -1, roi_cls_locs.size(1)) 60 | roi_scores = roi_scores.view(n, -1, roi_scores.size(1)) 61 | return roi_cls_locs, roi_scores 62 | 63 | # faster rcnn 网络部分的分类 64 | class Resnet50RoIHead(nn.Module): 65 | def __init__(self, n_class, roi_size, spatial_scale, classifier): 66 | super(Resnet50RoIHead, self).__init__() 67 | self.classifier = classifier 68 | #--------------------------------------# 69 | # 对ROIPooling后的的结果进行回归预测 70 | # in_features:2048 -> out_features:n_class * 4 71 | #--------------------------------------# 72 | self.cls_loc = nn.Linear(2048, n_class * 4) 73 | #-----------------------------------# 74 | # 对ROIPooling后的的结果进行分类 75 | #-----------------------------------# 76 | self.score = nn.Linear(2048, n_class ) 77 | #-----------------------------------# 78 | # 权值初始化 79 | #-----------------------------------# 80 | normal_init(self.cls_loc, 0, 0.001) 81 | normal_init(self.score, 0, 0.01) 82 | 83 | self.roi = RoIPool((roi_size, roi_size), spatial_scale) 84 | 85 | def forward(self, x, rois, roi_indices, img_size): 86 | n, _, _, _ = x.shape 87 | if x.is_cuda: 88 | roi_indices = roi_indices.cuda() 89 | rois = rois.cuda() 90 | rois = torch.flatten(rois, 0, 1) 91 | roi_indices = torch.flatten(roi_indices, 0, 1) 92 | 93 | # 对特征层的建议框进行缩放 94 | rois_feature_map = torch.zeros_like(rois) 95 | rois_feature_map[:, [0,2]] = rois[:, [0,2]] / img_size[1] * x.size()[3] 96 | rois_feature_map[:, [1,3]] = rois[:, [1,3]] / img_size[0] * x.size()[2] 97 | 98 | indices_and_rois = torch.cat([roi_indices[:, None], rois_feature_map], dim=1) 99 | #-----------------------------------# 100 | # 利用建议框对公用特征层进行截取 101 | # 得到一个将预选框映射到一个预测特征层 102 | #-----------------------------------# 103 | pool = self.roi(x, indices_and_rois) 104 | #-----------------------------------# 105 | # 利用classifier网络进行特征提取 106 | #-----------------------------------# 107 | fc = self.classifier(pool) 108 | #--------------------------------------------------------------# 109 | # 当输入为一张图片的时候,这里获得的f7的shape为[300, 2048] 110 | #--------------------------------------------------------------# 111 | fc7 = fc.view(fc.size(0), -1) 112 | 113 | roi_cls_locs = self.cls_loc(fc7) 114 | roi_scores = self.score(fc7) 115 | roi_cls_locs = roi_cls_locs.view(n, -1, roi_cls_locs.size(1)) 116 | roi_scores = roi_scores.view(n, -1, roi_scores.size(1)) 117 | return roi_cls_locs, roi_scores 118 | 119 | 120 | class Resnet50_FPNRoIHead(nn.Module): 121 | def __init__(self, n_class, roi_size, spatial_scale, classifier): 122 | super(Resnet50_FPNRoIHead, self).__init__() 123 | self.classifier = classifier 124 | #--------------------------------------# 125 | # 对ROIPooling后的结果进行全连接 126 | # 最大池化层 127 | #--------------------------------------# 128 | self.maxpool =nn.AdaptiveMaxPool2d(7) # output size = (1, 1) 129 | self.fc = nn.Linear(12544, 1024) 130 | #--------------------------------------# 131 | # 对ROIPooling后的结果进行回归预测 132 | # in_features:256 -> out_features:n_class * 4 133 | #--------------------------------------# 134 | self.cls_loc = nn.Linear(1024, n_class * 4) 135 | #-----------------------------------# 136 | # 对ROIPooling后的的结果进行分类 137 | #-----------------------------------# 138 | self.score = nn.Linear(1024, n_class) 139 | #-----------------------------------# 140 | # 权值初始化 141 | #-----------------------------------# 142 | normal_init(self.fc, 0, 0.001) 143 | normal_init(self.cls_loc, 0, 0.001) 144 | normal_init(self.score, 0, 0.01) 145 | 146 | self.roi = MultiScaleRoIAlign(featmap_names=['p2', 'p3', 'p4', 'p5'], output_size=7, sampling_ratio=2, canonical_scale=600, canonical_level=4) 147 | # self.roi = RoIAlign(output_size=7, sampling_ratio=2, spatial_scale=1) 148 | 149 | def forward(self, x, rois, roi_indices, img_size): 150 | # import time 151 | # start=time.time() 152 | # 将列表转换为OrderedDict类型,MultiScaleRoIAlign()数据准备 153 | Ordered_x = OrderedDict(p2=x[0],p3=x[1],p4=x[2],p5=x[3]) 154 | # img_size_p1 = [(img_size[0], img_size[1])] 155 | rois_p = [] 156 | # 将每层背景和前景的建议框进行合并 157 | for p in range(len(x)): 158 | n, _, _, _ = x[p].shape 159 | if x[p].is_cuda: 160 | roi = rois[p].cuda() 161 | rois_p.append(torch.flatten(roi, 0, 1)) 162 | else: 163 | rois_p.append(torch.flatten(rois[p], 0, 1)) 164 | 165 | 166 | # 将p2~p4层Proposal框合并为一个[tensor] 167 | rois_x = [torch.cat(rois_p, dim=0)] 168 | # 使用MultiScaleRoIAlign()在多尺度特征层进行预测 169 | pool = self.roi(Ordered_x, rois_x, [img_size]) 170 | #--------------------------------------------------------------# 171 | # 将 rois 的信息在第一维度上进行展平操作 172 | # 当输入为一张图片的时候,这里获得的f7的shape为[1024, 12544] 173 | #--------------------------------------------------------------# 174 | fc=self.maxpool(pool) 175 | fc = pool.view(pool.size(0), -1) 176 | # 进行全链接层。最终输出 1024 维的特征向量。 177 | fc7 = self.fc(fc) 178 | 179 | roi_cls_locs = self.cls_loc(fc7) 180 | roi_scores = self.score(fc7) 181 | roi_cls_locs = roi_cls_locs.view(n, -1, roi_cls_locs.size(1)) 182 | roi_scores = roi_scores.view(n, -1, roi_scores.size(1)) 183 | 184 | # end=time.time()-start 185 | # print("代码运行时间:",end) 186 | return roi_cls_locs, roi_scores 187 | 188 | 189 | class Resnet101RoIHead(nn.Module): 190 | def __init__(self, n_class, roi_size, spatial_scale, classifier): 191 | super(Resnet101RoIHead, self).__init__() 192 | self.classifier = classifier 193 | #--------------------------------------# 194 | # 对ROIPooling后的的结果进行回归预测 195 | #--------------------------------------# 196 | self.cls_loc = nn.Linear(2048, n_class * 4) 197 | #-----------------------------------# 198 | # 对ROIPooling后的的结果进行分类 199 | #-----------------------------------# 200 | self.score = nn.Linear(2048, n_class) 201 | #-----------------------------------# 202 | # 权值初始化 203 | #-----------------------------------# 204 | normal_init(self.cls_loc, 0, 0.001) 205 | normal_init(self.score, 0, 0.01) 206 | 207 | self.roi = RoIPool((roi_size, roi_size), spatial_scale) 208 | 209 | def forward(self, x, rois, roi_indices, img_size): 210 | n, _, _, _ = x.shape 211 | if x.is_cuda: 212 | roi_indices = roi_indices.cuda() 213 | rois = rois.cuda() 214 | rois = torch.flatten(rois, 0, 1) 215 | roi_indices = torch.flatten(roi_indices, 0, 1) 216 | 217 | rois_feature_map = torch.zeros_like(rois) 218 | rois_feature_map[:, [0,2]] = rois[:, [0,2]] / img_size[1] * x.size()[3] 219 | rois_feature_map[:, [1,3]] = rois[:, [1,3]] / img_size[0] * x.size()[2] 220 | 221 | indices_and_rois = torch.cat([roi_indices[:, None], rois_feature_map], dim=1) 222 | #-----------------------------------# 223 | # 利用建议框对公用特征层进行截取 224 | #-----------------------------------# 225 | pool = self.roi(x, indices_and_rois) 226 | #-----------------------------------# 227 | # 利用classifier网络进行特征提取 228 | #-----------------------------------# 229 | fc7 = self.classifier(pool) 230 | #--------------------------------------------------------------# 231 | # 当输入为一张图片的时候,这里获得的f7的shape为[300, 2048] 232 | #--------------------------------------------------------------# 233 | fc7 = fc7.view(fc7.size(0), -1) 234 | 235 | roi_cls_locs = self.cls_loc(fc7) 236 | roi_scores = self.score(fc7) 237 | roi_cls_locs = roi_cls_locs.view(n, -1, roi_cls_locs.size(1)) 238 | roi_scores = roi_scores.view(n, -1, roi_scores.size(1)) 239 | return roi_cls_locs, roi_scores 240 | 241 | 242 | def normal_init(m, mean, stddev, truncated=False): 243 | if truncated: 244 | m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation 245 | else: 246 | m.weight.data.normal_(mean, stddev) 247 | m.bias.data.zero_() 248 | -------------------------------------------------------------------------------- /nets/resnet101.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | from torch.hub import load_state_dict_from_url 4 | 5 | class Bottleneck(nn.Module): 6 | """ 7 | 注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。 8 | 但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2, 9 | 这么做的好处是能够在top1上提升大概0.5%的准确率。 10 | 可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch 11 | """ 12 | expansion = 4 13 | def __init__(self, inplanes, planes, stride=1, downsample=None): 14 | super(Bottleneck, self).__init__() 15 | #1*1的卷积压缩通道数 16 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) 17 | self.bn1 = nn.BatchNorm2d(planes) 18 | #3*3卷积特征提取 19 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 20 | self.bn2 = nn.BatchNorm2d(planes) 21 | #1*1复原通道数 22 | 23 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 24 | self.bn3 = nn.BatchNorm2d(planes * 4) 25 | 26 | self.relu = nn.ReLU(inplace=True) 27 | self.downsample = downsample 28 | self.stride = stride 29 | 30 | def forward(self, x): 31 | residual = x 32 | 33 | out = self.conv1(x) 34 | out = self.bn1(out) 35 | out = self.relu(out) 36 | 37 | out = self.conv2(out) 38 | out = self.bn2(out) 39 | out = self.relu(out) 40 | 41 | out = self.conv3(out) 42 | out = self.bn3(out) 43 | if self.downsample is not None: 44 | residual = self.downsample(x) #判断是否有残差边,有残差边即为:输入维度和输出维度发生改变,对应conv block 45 | #无残差边:输入维度=输出维度,对应identity block 46 | 47 | out += residual 48 | out = self.relu(out) 49 | 50 | return out 51 | 52 | class ResNet101(nn.Module): 53 | def __init__(self, block, layers, num_classes=1000): 54 | #-----------------------------------# 55 | # 假设输入进来的图片是600,600,3 56 | #-----------------------------------# 57 | self.inplanes = 64 58 | super(ResNet101, self).__init__() 59 | 60 | # input(600,600,3) -> conv2d stride(300,300,64) 61 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)#输入3通道,卷积核大小kernel_size=7*7, 62 | #步长stride=2,输出通道数=64,bias偏移量 63 | self.bn1 = nn.BatchNorm2d(64) #标准化(归一化) 64 | self.relu = nn.ReLU(inplace=True) #激活函数 65 | 66 | # 300,300,64 -> 150,150,64 最大池化 67 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True) 68 | 69 | # 150,150,64 -> 150,150,256 70 | self.layer1 = self._make_layer(block, 64, layers[0]) 71 | # 150,150,256 -> 75,75,512 72 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 73 | # 75,75,512 -> 38,38,1024 到这里可以获得一个38,38,1024的共享特征层 74 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 75 | # self.layer4被用在classifier模型中 76 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 77 | 78 | self.avgpool = nn.AvgPool2d(7) 79 | self.fc = nn.Linear(512 * block.expansion, num_classes) 80 | 81 | for m in self.modules(): 82 | if isinstance(m, nn.Conv2d): 83 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 84 | m.weight.data.normal_(0, math.sqrt(2. / n)) 85 | elif isinstance(m, nn.BatchNorm2d): 86 | m.weight.data.fill_(1) 87 | m.bias.data.zero_() 88 | 89 | # 构建resnet残差结构layer1.。。。layer5 90 | def _make_layer(self, block, planes, blocks, stride=1): 91 | downsample = None 92 | #-------------------------------------------------------------------# 93 | # 当模型需要进行高和宽的压缩的时候,就需要用到残差边的downsample(下采样) 94 | #-------------------------------------------------------------------# 95 | if stride != 1 or self.inplanes != planes * block.expansion: 96 | downsample = nn.Sequential( 97 | nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride, bias=False), 98 | nn.BatchNorm2d(planes * block.expansion), 99 | ) 100 | layers = [] 101 | layers.append(block(self.inplanes, planes, stride, downsample)) 102 | self.inplanes = planes * block.expansion 103 | for i in range(1, blocks): 104 | layers.append(block(self.inplanes, planes)) 105 | return nn.Sequential(*layers) 106 | 107 | def forward(self, x): 108 | x = self.conv1(x) 109 | x = self.bn1(x) 110 | x = self.relu(x) 111 | x = self.maxpool(x) 112 | 113 | x = self.layer1(x) 114 | x = self.layer2(x) 115 | x = self.layer3(x) 116 | x = self.layer4(x) 117 | 118 | x = self.avgpool(x) 119 | x = x.view(x.size(0), -1) #维度变化 120 | x = self.fc(x) #全连接层 121 | return x 122 | 123 | def resnet101(pretrained = False): 124 | model = ResNet101(Bottleneck, [3, 4, 23, 3]) #对应resnet101的网络结构shape,第五次压缩是在roi中使用,有3个bottleneck。 125 | if pretrained: 126 | state_dict = load_state_dict_from_url("https://download.pytorch.org/models/resnet101-5d3b4d8f.pth", model_dir="./model_data") 127 | model.load_state_dict(state_dict) 128 | #----------------------------------------------------------------------------# 129 | # 获取特征提取部分,从conv1到model.layer3,最终获得一个38,38,1024的特征层 130 | #----------------------------------------------------------------------------# 131 | features = list([model.conv1, model.bn1, model.relu, model.maxpool, model.layer1, model.layer2, model.layer3]) 132 | #----------------------------------------------------------------------------# 133 | # 获取分类部分,从model.layer4到model.avgpool 134 | #----------------------------------------------------------------------------# 135 | classifier = list([model.layer4, model.avgpool]) 136 | 137 | features = nn.Sequential(*features) 138 | classifier = nn.Sequential(*classifier) 139 | return features, classifier 140 | -------------------------------------------------------------------------------- /nets/resnet50.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch.nn as nn 4 | from torch.hub import load_state_dict_from_url 5 | 6 | 7 | class Bottleneck(nn.Module): 8 | """ 9 | 注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。 10 | 但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2, 11 | 这么做的好处是能够在top1上提升大概0.5%的准确率。 12 | 可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch 13 | """ 14 | expansion = 4 15 | def __init__(self, inplanes, planes, stride=1, downsample=None): 16 | super(Bottleneck, self).__init__() 17 | #1*1的卷积压缩通道数 18 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) 19 | self.bn1 = nn.BatchNorm2d(planes) 20 | #3*3卷积特征提取 21 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 22 | self.bn2 = nn.BatchNorm2d(planes) 23 | #1*1复原通道数 24 | 25 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 26 | self.bn3 = nn.BatchNorm2d(planes * 4) 27 | 28 | self.relu = nn.ReLU(inplace=True) 29 | self.downsample = downsample 30 | self.stride = stride 31 | 32 | def forward(self, x): 33 | residual = x 34 | 35 | out = self.conv1(x) 36 | out = self.bn1(out) 37 | out = self.relu(out) 38 | 39 | out = self.conv2(out) 40 | out = self.bn2(out) 41 | out = self.relu(out) 42 | 43 | out = self.conv3(out) 44 | out = self.bn3(out) 45 | if self.downsample is not None: 46 | residual = self.downsample(x) #判断是否有残差边,有残差边即为:输入维度和输出维度发生改变,对应conv block 47 | #无残差边:输入维度=输出维度,对应identity block 48 | 49 | out += residual 50 | out = self.relu(out) 51 | 52 | return out 53 | 54 | class ResNet(nn.Module): 55 | def __init__(self, block, layers, include_top=True,num_classes=1000): 56 | #-----------------------------------# 57 | # 假设输入进来的图片是600,600,3 58 | #-----------------------------------# 59 | self.include_top = include_top 60 | self.inplanes = 64 61 | super(ResNet, self).__init__() 62 | 63 | # input(600,600,3) -> conv2d stride(300,300,64) 64 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)#输入3通道,卷积核大小kernel_size=7*7, 65 | #步长stride=2,输出通道数=64,bias偏移量 66 | self.bn1 = nn.BatchNorm2d(64) #标准化(归一化) 67 | self.relu = nn.ReLU(inplace=True) #激活函数 68 | 69 | # 300,300,64 -> 150,150,64 最大池化 70 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True) 71 | 72 | # 150,150,64 -> 150,150,256 73 | self.layer1 = self._make_layer(block, 64, layers[0]) 74 | # 150,150,256 -> 75,75,512 75 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 76 | # 75,75,512 -> 38,38,1024 到这里可以获得一个38,38,1024的共享特征层 77 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 78 | # self.layer4被用在classifier模型中 79 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 80 | 81 | if self.include_top: 82 | self.avgpool = nn.AvgPool2d(7) # output size = (1, 1) 83 | self.fc = nn.Linear(512 * block.expansion, num_classes) 84 | 85 | #resnet模型每层进行参数学习,如:layer1中每层进行模型训练 86 | for m in self.modules(): 87 | if isinstance(m, nn.Conv2d): 88 | new_var = 1 89 | n = m.kernel_size[0] * m.kernel_size[new_var] * m.out_channels #通道数的改变(如:256->64) 90 | m.weight.data.normal_(0, math.sqrt(2. / n)) 91 | elif isinstance(m, nn.BatchNorm2d): 92 | m.weight.data.fill_(1) #更改resnet50网络中每层中权重数据 93 | m.bias.data.zero_() 94 | 95 | def _make_layer(self, block, planes, blocks, stride=1): 96 | downsample = None 97 | #-------------------------------------------------------------------# 98 | # 当模型需要进行高和宽的压缩的时候,就需要用到残差边的downsample(下采样) 99 | #-------------------------------------------------------------------# 100 | if stride != 1 or self.inplanes != planes * block.expansion: 101 | downsample = nn.Sequential( 102 | nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride, bias=False), 103 | nn.BatchNorm2d(planes * block.expansion), 104 | ) 105 | layers = [] 106 | layers.append(block(self.inplanes, planes, stride, downsample)) 107 | self.inplanes = planes * block.expansion 108 | # resnet50网络层数堆积,layer=[3, 4, 6, 3] 109 | for i in range(1, blocks): 110 | layers.append(block(self.inplanes, planes)) 111 | return nn.Sequential(*layers) 112 | 113 | def forward(self, x): 114 | x = self.conv1(x) 115 | x = self.bn1(x) 116 | x = self.relu(x) 117 | x = self.maxpool(x) 118 | 119 | x = self.layer1(x) 120 | x = self.layer2(x) 121 | x = self.layer3(x) 122 | x = self.layer4(x) 123 | 124 | if self.include_top: 125 | x = self.avgpool(x) 126 | # x = torch.flatten(x, 1) 127 | x = x.view(x.size(0), -1) # 传入神经网络之前将tensor变形, 128 | x = self.fc(x) # 输入全连接层,神经网络输入准备 129 | 130 | return x 131 | 132 | def resnet50(pretrained = False): 133 | model = ResNet(Bottleneck, [3, 4, 6, 3]) #对应resnet50的网络结构shape,第五次压缩是在roi中使用,有3个bottleneck。 134 | if pretrained: 135 | state_dict = load_state_dict_from_url("https://download.pytorch.org/models/resnet50-19c8e357.pth", model_dir="./model_data") 136 | model.load_state_dict(state_dict) 137 | #----------------------------------------------------------------------------# 138 | # 获取特征提取部分,从conv1到model.layer3,最终获得一个38,38,1024的特征层 139 | #----------------------------------------------------------------------------# 140 | features = list([model.conv1, model.bn1, model.relu, model.maxpool, model.layer1, model.layer2, model.layer3]) 141 | print('features:', features) 142 | #----------------------------------------------------------------------------# 143 | # 获取分类部分,从model.layer4到model.avgpool 144 | #----------------------------------------------------------------------------# 145 | classifier = list([model.layer4, model.avgpool]) 146 | print('classifier:', classifier) 147 | 148 | features = nn.Sequential(*features) 149 | print('features:', features) 150 | classifier = nn.Sequential(*classifier) 151 | print('classifier:', classifier) 152 | return features, classifier 153 | 154 | 155 | # net = ResNet(Bottleneck, [3, 4, 6, 3]) 156 | # print(net) 157 | 158 | -------------------------------------------------------------------------------- /nets/resnet50_ECA_FPN.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch.nn as nn 3 | from torch.hub import load_state_dict_from_url 4 | 5 | class eca_layer(nn.Module): 6 | """Constructs a ECA module. 7 | 8 | Args: 9 | channel: Number of channels of the input feature map 10 | k_size: Adaptive selection of kernel size 11 | """ 12 | def __init__(self, channel, k_size=3): 13 | super(eca_layer, self).__init__() 14 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 15 | self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False) 16 | self.sigmoid = nn.Sigmoid() 17 | 18 | def forward(self, x): 19 | # feature descriptor on the global spatial information 20 | y = self.avg_pool(x) 21 | 22 | # Two different branches of ECA module 23 | y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) 24 | 25 | # Multi-scale information fusion 26 | y = self.sigmoid(y) 27 | 28 | return x * y.expand_as(x) 29 | 30 | 31 | class Bottleneck(nn.Module): 32 | """ 33 | 注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。 34 | 但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2, 35 | 这么做的好处是能够在top1上提升大概0.5%的准确率。 36 | 可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch 37 | """ 38 | expansion = 4 #通道倍增数 39 | 40 | def __init__(self, inplanes, planes, stride=1, downsample=None,k_size=3): 41 | super(Bottleneck, self).__init__() 42 | 43 | #1*1的卷积压缩通道数 44 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) 45 | self.bn1 = nn.BatchNorm2d(planes) 46 | #3*3卷积特征提取 47 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 48 | self.bn2 = nn.BatchNorm2d(planes) 49 | #1*1复原通道数 50 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 51 | self.bn3 = nn.BatchNorm2d(planes * 4) 52 | 53 | # 激活+下采样 54 | self.relu = nn.ReLU(inplace=True) 55 | # 加入ECA模型 56 | self.eca = eca_layer(planes * 4, k_size) 57 | 58 | self.downsample = downsample 59 | self.stride = stride 60 | 61 | def forward(self, x): 62 | residual = x 63 | 64 | out = self.conv1(x) 65 | out = self.bn1(out) 66 | out = self.relu(out) 67 | 68 | out = self.conv2(out) 69 | out = self.bn2(out) 70 | out = self.relu(out) 71 | 72 | out = self.conv3(out) 73 | out = self.bn3(out) 74 | out = self.eca(out) 75 | 76 | if self.downsample is not None: 77 | residual = self.downsample(x) #判断是否有残差边,有残差边即为:输入维度和输出维度发生改变,对应conv block 78 | #无残差边:输入维度=输出维度,对应identity block 79 | out += residual 80 | out = self.relu(out) 81 | 82 | return out 83 | 84 | class ResNet50_ECA_FPN(nn.Module): 85 | def __init__(self, block, layers, num_classes=100,k_size=[3, 3, 3, 3]): 86 | #-----------------------------------# 87 | # 假设输入进来的图片是600,600,3 88 | #-----------------------------------# 89 | super(ResNet50_ECA_FPN, self).__init__() 90 | self.inplanes = 64 91 | 92 | #处理输入的C1模块(C1代表了RestNet的前几个卷积与池化层) 93 | # input(600,600,3) -> conv2d stride(300,300,64) 94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)#输入3通道,卷积核大小kernel_size=7*7, 95 | #步长stride=2,输出通道数=64,bias偏移量 96 | self.bn1 = nn.BatchNorm2d(64) #标准化(归一化) 97 | self.relu = nn.ReLU(inplace=True) #激活函数 98 | 99 | # 300,300,64 -> 150,150,64 最大池化 100 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True) 101 | 102 | ''' Bottom-up layers ,搭建自下而上的C2,C3,C4,C5''' 103 | # 150,150,64 -> 150,150,256 104 | self.layer1 = self._make_layer(block, 64, layers[0],int(k_size[0])) 105 | # 150,150,256 -> 75,75,512 106 | self.layer2 = self._make_layer(block, 128, layers[1],int(k_size[1]), stride=2) 107 | # 75,75,512 -> 38,38,1024 到这里可以获得一个38,38,1024的共享特征层 108 | self.layer3 = self._make_layer(block, 256, layers[2],int(k_size[2]), stride=2) 109 | # 38,38,1024 -> 19,19,2048 110 | self.layer4 = self._make_layer(block, 512, layers[3],int(k_size[3]), stride=2) 111 | 112 | # 对C5减少通道数,得到P5 113 | self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0) # Reduce channels 114 | 115 | # Smooth layers,3x3卷积融合特征 116 | self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) 117 | self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) 118 | self.smooth3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) 119 | self.smooth4 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) 120 | 121 | # Lateral layers,横向连接,保证通道数相同 122 | self.latlayer3 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0) 123 | self.latlayer2 = nn.Conv2d( 512, 256, kernel_size=1, stride=1, padding=0) 124 | self.latlayer1 = nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0) 125 | 126 | # 19,19,p5 ->10,10, p6 最大池化 127 | self.maxpool_p6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0, ceil_mode=True) 128 | 129 | # 平均池化层和全连接层 130 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1) 131 | self.fc = nn.Linear(256, 256) 132 | 133 | #resnet模型每层进行参数学习,如:layer1中每层进行模型训练 134 | for m in self.modules(): 135 | if isinstance(m, nn.Conv2d): 136 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 137 | m.weight.data.normal_(0, math.sqrt(2. / n)) 138 | elif isinstance(m, nn.BatchNorm2d): 139 | m.weight.data.fill_(1) 140 | m.bias.data.zero_() 141 | 142 | def _make_layer(self, block, planes, blocks, k_size, stride=1): 143 | downsample = None 144 | #-------------------------------------------------------------------# 145 | # 当模型需要进行高和宽的压缩的时候,就需要用到残差边的downsample(下采样) 146 | # 将输入的downsample(x)自动按照Sequential()里面的布局,顺序执行, 147 | # 目的:优化类似于这种结构:x = self.bn1(x),x = self.relu(x),降低运行内存。 148 | #-------------------------------------------------------------------# 149 | if stride != 1 or self.inplanes != planes * block.expansion: 150 | downsample = nn.Sequential( 151 | nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride, bias=False), 152 | nn.BatchNorm2d(planes * block.expansion), 153 | ) 154 | layers = [] 155 | layers.append(block(self.inplanes, planes, stride, downsample,k_size)) 156 | self.inplanes = planes * block.expansion 157 | # resnet50网络层数堆积,layer=[3, 4, 6, 3] 158 | for i in range(1, blocks): 159 | layers.append(block(self.inplanes, planes,k_size=k_size)) 160 | return nn.Sequential(*layers) 161 | 162 | # 通过上采样后,进行特征融合 163 | def _upsample_add(self, x, y): 164 | _,_,H,W = y.size() 165 | return nn.functional.upsample(x, size=(H,W), mode='bilinear') + y 166 | 167 | def forward(self, x): 168 | # Bottom-up 169 | x = self.conv1(x) 170 | x = self.bn1(x) 171 | x = self.relu(x) 172 | c1 = self.maxpool(x) 173 | 174 | # 自己构建的fpn网络,c1~c4层搭建 175 | c2 = self.layer1(c1) 176 | c3 = self.layer2(c2) 177 | c4 = self.layer3(c3) 178 | c5 = self.layer4(c4) 179 | 180 | # Top-down 降通道数 181 | p5 = self.toplayer(c5) 182 | # upsample 183 | p4 = self._upsample_add(p5, self.latlayer3(c4)) 184 | p3 = self._upsample_add(p4, self.latlayer2(c3)) 185 | p2 = self._upsample_add(p3, self.latlayer1(c2)) 186 | 187 | # Smooth,特征提取,卷积的融合,平滑处理 188 | p5 = self.smooth4(p5) 189 | # 19,19,256->10,10,256 经过maxpool得到p6,用于rpn网络中 190 | p6 = self.maxpool_p6(p5) 191 | p4 = self.smooth3(p4) 192 | p3 = self.smooth2(p3) 193 | p2 = self.smooth1(p2) 194 | 195 | x = [p2, p3, p4,p5, p6] 196 | # 对fpn的特征层进行全连接层 197 | # for key,value in x.items() : 198 | # value = self.avgpool(value) 199 | # # view()函数的功能根reshape类似,用来转换size大小。x = x.view(batchsize, -1)中batchsize指转换后有几行,而-1指在不告诉函数有多少列的情况下,根据原tensor数据和batchsize自动分配列数。 200 | # value = value.view(value.size(0), -1) 201 | # # value = torch.flatten(value, 1) #flatten(x,1)是按照x的第1个维度拼接(按照列来拼接,横向拼接);flatten(x,0)是按照x的第0个维度拼接(按照行来拼接,纵向拼接) 202 | # value = self.fc(value) 203 | # # value = value.view(-1) 204 | # x.update(key,value) 205 | 206 | return x 207 | 208 | # test 209 | # FPN = ResNet50_ECA_FPN(Bottleneck, [3, 4, 6, 3]) 210 | # print('FPN:',FPN) 211 | 212 | 213 | def resnet50_ECA_FPN(pretrained=False): 214 | # 对应resnet50的网络结构shape,第五次压缩是在roi中使用,有3个bottleneck。 215 | model = ResNet50_ECA_FPN(Bottleneck, [3, 4, 6, 3]) 216 | # print('ResNet50_FPN:',model) 217 | #----------------------------------------------------------------------------# 218 | # 获取特征提取部分,从conv1到model.smooth1(p4层),获得多个p2, p3, p4, p5,p6不同尺度的特征层 219 | #----------------------------------------------------------------------------# 220 | # features = list([model.conv1, model.bn1, model.relu,model.maxpool, model.layer1, model.layer2, model.layer3,model.layer4, 221 | # model.toplayer,model.smooth4, model.smooth3, model.smooth2, model.smooth1]) 222 | 223 | # features = list([model.conv1, model.bn1, model.relu, model.maxpool, model.layer1, 224 | # model.layer2, model.layer3, model.layer4, ]) 225 | #----------------------------------------------------------------------------# 226 | # 获取分类部分,从model.smooth3(p2)到model.toplayer(p5)特征层 227 | #----------------------------------------------------------------------------# 228 | classifier = list([model.smooth1, model.smooth2, model.smooth3, model.smooth4, 229 | model.avgpool]) 230 | 231 | # 特征提取(feature map) 232 | features = model 233 | # features = nn.Sequential(*features) # 函数参数(位置参数,*可变参数(以tuple/list形式传递),**关键字参数(以字典形式传递), 234 | # 默认参数(需要放在参数中最右端,避免传参是歧义)) 235 | print('features:', features) 236 | classifier = nn.Sequential(*classifier) #在进行完roipool层后,进行回归和分类预测 237 | print('classifier:', classifier) 238 | return features, classifier 239 | 240 | -------------------------------------------------------------------------------- /nets/resnet50_FPN.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch.nn as nn 3 | from torch.hub import load_state_dict_from_url 4 | 5 | class eca_layer(nn.Module): 6 | """Constructs a ECA module. 7 | 8 | Args: 9 | channel: Number of channels of the input feature map 10 | k_size: Adaptive selection of kernel size 11 | """ 12 | def __init__(self, channel, k_size=3): 13 | super(eca_layer, self).__init__() 14 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 15 | self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False) 16 | self.sigmoid = nn.Sigmoid() 17 | 18 | def forward(self, x): 19 | # feature descriptor on the global spatial information 20 | y = self.avg_pool(x) 21 | 22 | # Two different branches of ECA module 23 | y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) 24 | 25 | # Multi-scale information fusion 26 | y = self.sigmoid(y) 27 | 28 | return x * y.expand_as(x) 29 | 30 | 31 | class Bottleneck(nn.Module): 32 | """ 33 | 注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。 34 | 但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2, 35 | 这么做的好处是能够在top1上提升大概0.5%的准确率。 36 | 可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch 37 | """ 38 | expansion = 4 #通道倍增数 39 | 40 | def __init__(self, inplanes, planes, stride=1, downsample=None,k_size=3): 41 | super(Bottleneck, self).__init__() 42 | 43 | #1*1的卷积压缩通道数 44 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) 45 | self.bn1 = nn.BatchNorm2d(planes) 46 | #3*3卷积特征提取 47 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 48 | self.bn2 = nn.BatchNorm2d(planes) 49 | #1*1复原通道数 50 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 51 | self.bn3 = nn.BatchNorm2d(planes * 4) 52 | 53 | # 激活+下采样 54 | self.relu = nn.ReLU(inplace=True) 55 | # 加入ECA模型 56 | self.eca = eca_layer(planes * 4, k_size) 57 | 58 | self.downsample = downsample 59 | self.stride = stride 60 | 61 | def forward(self, x): 62 | residual = x 63 | 64 | out = self.conv1(x) 65 | out = self.bn1(out) 66 | out = self.relu(out) 67 | 68 | out = self.conv2(out) 69 | out = self.bn2(out) 70 | out = self.relu(out) 71 | 72 | out = self.conv3(out) 73 | out = self.bn3(out) 74 | out = self.eca(out) 75 | 76 | if self.downsample is not None: 77 | residual = self.downsample(x) #判断是否有残差边,有残差边即为:输入维度和输出维度发生改变,对应conv block 78 | #无残差边:输入维度=输出维度,对应identity block 79 | out += residual 80 | out = self.relu(out) 81 | 82 | return out 83 | 84 | class ResNet50_FPN(nn.Module): 85 | def __init__(self, block, layers, num_classes=100,k_size=[1, 1, 1, 1]): 86 | #-----------------------------------# 87 | # 假设输入进来的图片是600,600,3 88 | #-----------------------------------# 89 | super(ResNet50_FPN, self).__init__() 90 | self.inplanes = 64 91 | 92 | #处理输入的C1模块(C1代表了RestNet的前几个卷积与池化层) 93 | # input(600,600,3) -> conv2d stride(300,300,64) 94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)#输入3通道,卷积核大小kernel_size=7*7, 95 | #步长stride=2,输出通道数=64,bias偏移量 96 | self.bn1 = nn.BatchNorm2d(64) #标准化(归一化) 97 | self.relu = nn.ReLU(inplace=True) #激活函数 98 | 99 | # 300,300,64 -> 150,150,64 最大池化 100 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True) 101 | 102 | ''' Bottom-up layers ,搭建自下而上的C2,C3,C4,C5''' 103 | # 150,150,64 -> 150,150,256 104 | self.layer1 = self._make_layer(block, 64, layers[0],int(k_size[0])) 105 | # 150,150,256 -> 75,75,512 106 | self.layer2 = self._make_layer(block, 128, layers[1],int(k_size[1]), stride=2) 107 | # 75,75,512 -> 38,38,1024 到这里可以获得一个38,38,1024的共享特征层 108 | self.layer3 = self._make_layer(block, 256, layers[2],int(k_size[2]), stride=2) 109 | # 38,38,1024 -> 19,19,2048 110 | self.layer4 = self._make_layer(block, 512, layers[3],int(k_size[3]), stride=2) 111 | 112 | # 对C5减少通道数,得到P5 113 | self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0) # Reduce channels 114 | 115 | # Smooth layers,3x3卷积融合特征 116 | self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) 117 | self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) 118 | self.smooth3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) 119 | self.smooth4 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) 120 | 121 | # Lateral layers,横向连接,保证通道数相同 122 | self.latlayer3 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0) 123 | self.latlayer2 = nn.Conv2d( 512, 256, kernel_size=1, stride=1, padding=0) 124 | self.latlayer1 = nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0) 125 | 126 | # 19,19,p5 ->10,10, p6 最大池化 127 | self.maxpool_p6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0, ceil_mode=True) 128 | 129 | # 最池化层和全连接层 130 | self.maxpool1 = nn.AdaptiveMaxPool2d(7) # output size = (1, 1) 131 | # self.fc = nn.Linear(256, 256) 132 | 133 | #resnet模型每层进行参数学习,如:layer1中每层进行模型训练 134 | for m in self.modules(): 135 | if isinstance(m, nn.Conv2d): 136 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 137 | m.weight.data.normal_(0, math.sqrt(2. / n)) 138 | elif isinstance(m, nn.BatchNorm2d): 139 | m.weight.data.fill_(1) 140 | m.bias.data.zero_() 141 | 142 | def _make_layer(self, block, planes, blocks,k_size, stride=1): 143 | downsample = None 144 | #-------------------------------------------------------------------# 145 | # 当模型需要进行高和宽的压缩的时候,就需要用到残差边的downsample(下采样) 146 | # 将输入的downsample(x)自动按照Sequential()里面的布局,顺序执行, 147 | # 目的:优化类似于这种结构:x = self.bn1(x),x = self.relu(x),降低运行内存。 148 | #-------------------------------------------------------------------# 149 | if stride != 1 or self.inplanes != planes * block.expansion: 150 | downsample = nn.Sequential( 151 | nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride, bias=False), 152 | nn.BatchNorm2d(planes * block.expansion), 153 | ) 154 | layers = [] 155 | layers.append(block(self.inplanes, planes, stride,downsample,k_size)) 156 | self.inplanes = planes * block.expansion 157 | # resnet50网络层数堆积,layer=[3, 4, 6, 3] 158 | for i in range(1, blocks): 159 | layers.append(block(self.inplanes, planes,k_size)) 160 | return nn.Sequential(*layers) 161 | 162 | # 通过上采样后,进行特征融合 163 | def _upsample_add(self, x, y): 164 | _,_,H,W = y.size() 165 | return nn.functional.upsample(x, size=(H,W), mode='bilinear') + y 166 | 167 | def forward(self, x): 168 | # Bottom-up 169 | x = self.conv1(x) 170 | x = self.bn1(x) 171 | x = self.relu(x) 172 | c1 = self.maxpool(x) 173 | 174 | # 自己构建的fpn网络,c1~c4层搭建 175 | c2 = self.layer1(c1) 176 | c3 = self.layer2(c2) 177 | c4 = self.layer3(c3) 178 | c5 = self.layer4(c4) 179 | 180 | # Top-down 降通道数 181 | p5 = self.toplayer(c5) 182 | # upsample 183 | p4 = self._upsample_add(p5, self.latlayer3(c4)) 184 | p3 = self._upsample_add(p4, self.latlayer2(c3)) 185 | p2 = self._upsample_add(p3, self.latlayer1(c2)) 186 | 187 | # Smooth,特征提取,卷积的融合,平滑处理 188 | p5 = self.smooth4(p5) 189 | # 19,19,256->10,10,256 经过maxpool得到p6,用于rpn网络中 190 | p6 = self.maxpool_p6(p5) 191 | p4 = self.smooth3(p4) 192 | p3 = self.smooth2(p3) 193 | p2 = self.smooth1(p2) 194 | 195 | x = [p2, p3, p4,p5, p6] 196 | # 对fpn的特征层进行全连接层 197 | # for key,value in x.items() : 198 | # value = self.avgpool(value) 199 | # # view()函数的功能根reshape类似,用来转换size大小。x = x.view(batchsize, -1)中batchsize指转换后有几行,而-1指在不告诉函数有多少列的情况下,根据原tensor数据和batchsize自动分配列数。 200 | # value = value.view(value.size(0), -1) 201 | # # value = torch.flatten(value, 1) #flatten(x,1)是按照x的第1个维度拼接(按照列来拼接,横向拼接);flatten(x,0)是按照x的第0个维度拼接(按照行来拼接,纵向拼接) 202 | # value = self.fc(value) 203 | # # value = value.view(-1) 204 | # x.update(key,value) 205 | 206 | return x 207 | 208 | # test 209 | # FPN = ResNet50_FPN(Bottleneck, [3, 4, 6, 3]) 210 | # print('FPN:',FPN) 211 | 212 | 213 | def resnet50_FPN(pretrained=False): 214 | # 对应resnet50的网络结构shape,第五次压缩是在roi中使用,有3个bottleneck。 215 | model = ResNet50_FPN(Bottleneck, [3, 4, 6, 3]) 216 | # print('ResNet50_FPN:',model) 217 | #----------------------------------------------------------------------------# 218 | # 获取特征提取部分,从conv1到model.smooth1(p4层),获得多个p2, p3, p4, p5,p6不同尺度的特征层 219 | #----------------------------------------------------------------------------# 220 | # features = list([model.conv1, model.bn1, model.relu,model.maxpool, model.layer1, model.layer2, model.layer3,model.layer4, 221 | # model.toplayer,model.smooth4, model.smooth3, model.smooth2, model.smooth1]) 222 | 223 | # features = list([model.conv1, model.bn1, model.relu, model.maxpool, model.layer1, 224 | # model.layer2, model.layer3, model.layer4, ]) 225 | #----------------------------------------------------------------------------# 226 | # 获取分类部分,从model.smooth3(p2)到model.toplayer(p5)特征层 227 | #----------------------------------------------------------------------------# 228 | classifier = list([model.smooth1, model.smooth2, model.smooth3, model.smooth4, 229 | model.maxpool1]) 230 | 231 | # 特征提取(feature map) 232 | features = model 233 | # features = nn.Sequential(*features) # 函数参数(位置参数,*可变参数(以tuple/list形式传递),**关键字参数(以字典形式传递), 234 | # 默认参数(需要放在参数中最右端,避免传参是歧义)) 235 | print('features:', features) 236 | classifier = nn.Sequential(*classifier) #在进行完roipool层后,进行回归和分类预测 237 | print('classifier:', classifier) 238 | return features, classifier 239 | 240 | -------------------------------------------------------------------------------- /nets/rpn.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import torch 4 | from torch import nn 5 | from torch.nn import functional as F 6 | from torchvision.ops import nms 7 | from utils.anchors import _enumerate_shifted_anchor, generate_anchor_base 8 | from utils.utils_bbox import loc2bbox 9 | from collections import OrderedDict 10 | 11 | class ProposalCreator(): 12 | def __init__( 13 | self, 14 | mode, 15 | nms_iou = 0.7, 16 | n_train_pre_nms = 12000, 17 | n_train_post_nms = 1000, 18 | n_test_pre_nms = 3000, 19 | n_test_post_nms = 1000, 20 | min_size = 16 21 | 22 | ): 23 | #-----------------------------------# 24 | # 设置预测还是训练 25 | #-----------------------------------# 26 | self.mode = mode 27 | #-----------------------------------# 28 | # 建议框非极大抑制的iou大小 29 | #-----------------------------------# 30 | self.nms_iou = nms_iou 31 | #-----------------------------------# 32 | # 训练用到的建议框数量 33 | #-----------------------------------# 34 | self.n_train_pre_nms = n_train_pre_nms 35 | self.n_train_post_nms = n_train_post_nms 36 | #-----------------------------------# 37 | # 预测用到的建议框数量 38 | #-----------------------------------# 39 | self.n_test_pre_nms = n_test_pre_nms 40 | self.n_test_post_nms = n_test_post_nms 41 | self.min_size = min_size 42 | 43 | def __call__(self, loc, score, anchor, img_size, scale=1.): 44 | if self.mode == "training": 45 | n_pre_nms = self.n_train_pre_nms 46 | n_post_nms = self.n_train_post_nms 47 | else: 48 | n_pre_nms = self.n_test_pre_nms 49 | n_post_nms = self.n_test_post_nms 50 | 51 | #-----------------------------------# 52 | # 将先验框转换成tensor 53 | #-----------------------------------# 54 | anchor = torch.from_numpy(anchor).type_as(loc) 55 | #-----------------------------------# 56 | # 将RPN网络预测结果转化成建议框 57 | #-----------------------------------# 58 | roi = loc2bbox(anchor, loc) 59 | #-----------------------------------# 60 | # 防止建议框超出图像边缘 61 | #-----------------------------------# 62 | roi[:, [0, 2]] = torch.clamp(roi[:, [0, 2]], min = 0, max = img_size[1]) 63 | roi[:, [1, 3]] = torch.clamp(roi[:, [1, 3]], min = 0, max = img_size[0]) 64 | 65 | #-----------------------------------# 66 | # 建议框的宽高的最小值不可以小于16 67 | #-----------------------------------# 68 | min_size = self.min_size * scale 69 | keep = torch.where(((roi[:, 2] - roi[:, 0]) >= min_size) & ((roi[:, 3] - roi[:, 1]) >= min_size))[0] 70 | #-----------------------------------# 71 | # 将对应的建议框保留下来 72 | #-----------------------------------# 73 | roi = roi[keep, :] 74 | score = score[keep] 75 | 76 | #-----------------------------------# 77 | # 根据得分进行排序,取出建议框 78 | #-----------------------------------# 79 | order = torch.argsort(score, descending=True) 80 | if n_pre_nms > 0: 81 | order = order[:n_pre_nms] 82 | roi = roi[order, :] 83 | score = score[order] 84 | 85 | #-----------------------------------# 86 | # 对建议框进行非极大抑制 87 | # 使用官方的非极大抑制会快非常多 88 | #-----------------------------------# 89 | keep = nms(roi, score, self.nms_iou) 90 | if len(keep) < n_post_nms: 91 | index_extra = np.random.choice(range(len(keep)), size=(n_post_nms - len(keep)), replace=True) 92 | keep = torch.cat([keep, keep[index_extra]]) 93 | keep = keep[:n_post_nms] 94 | roi = roi[keep] 95 | return roi 96 | 97 | 98 | class resnet50_fpn_RPNhead(nn.Module): 99 | def __init__( 100 | self, 101 | in_channels=512, 102 | mid_channels=512, 103 | ratios=[0.5, 1, 2], 104 | anchor_scales=[4, 16, 32], 105 | feat_stride=16, 106 | mode="training", 107 | ): 108 | super(resnet50_fpn_RPNhead, self).__init__() 109 | #-----------------------------------------# 110 | # 生成基础先验框,shape为[9, 4] 111 | #-----------------------------------------# 112 | self.anchor_base = generate_anchor_base( 113 | anchor_scales=anchor_scales, ratios=ratios) 114 | #每个网格上默认的先验框数量 115 | n_anchor = self.anchor_base.shape[0] 116 | 117 | #-----------------------------------------# 118 | # 先进行一个3x3的卷积,可理解为特征整合 119 | #-----------------------------------------# 120 | self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1) 121 | #-----------------------------------------# 122 | # 分类预测先验框内部是否包含物体,score为带有18通道的conv1*1卷积, 123 | #-----------------------------------------# 124 | self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0) 125 | #-----------------------------------------# 126 | # 回归预测对先验框进行调整,loc带有36通道的conv1*1卷积 127 | #-----------------------------------------# 128 | self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0) 129 | 130 | #-----------------------------------------# 131 | # 特征点间距步长 132 | #-----------------------------------------# 133 | self.feat_stride = feat_stride 134 | #-----------------------------------------# 135 | # 用于对建议框解码并进行非极大抑制 136 | #-----------------------------------------# 137 | self.proposal_layer = ProposalCreator(mode) 138 | #--------------------------------------# 139 | # 对FPN的网络部分进行权值初始化 140 | #--------------------------------------# 141 | normal_init(self.conv1, 0, 0.01) 142 | normal_init(self.score, 0, 0.01) 143 | normal_init(self.loc, 0, 0.01) 144 | 145 | #输入的x为feature map共享特征p2~p6层, 146 | def forward(self, x, img_size, scale=1.): 147 | rois = [] 148 | roi_indices =[] 149 | rpn_locs =[] 150 | rpn_scores=[] 151 | anchor = [] 152 | #对p2~p5层分别进行建议框生成 153 | for p in x: 154 | n, _, h, w = p.shape 155 | #-----------------------------------------# 156 | # 先进行一个3x3的卷积,可理解为特征整合 157 | #-----------------------------------------# 158 | p = F.relu(self.conv1(p)) # 激活函数 159 | #-----------------------------------------# 160 | # 回归预测对先验框进行调整 161 | # view(n, -1, 4):n个(m/(4*n))行4列的新tensor形状。 162 | # 交换后n(第0维度)=batch_size(这里为2,表示背景和物体形状), 163 | # -1(1维度):表示每个先验框(自行计算), 164 | # 4(第2维度)=调整先验框位置的四个参数。 165 | #-----------------------------------------# 166 | rpn_locs_k = self.loc(p) 167 | rpn_locs_k = rpn_locs_k.permute(0, 2, 3, 1).contiguous().view(n, -1, 4) 168 | #-----------------------------------------# 169 | # torch.transpose():交换指定的两个维度的内容 170 | # torch.permute():一次性交换多个维度。 171 | # contiguous():相当于是在permute(0, 2, 3, 1)tensor中复制一份,在用于view()中的tensor进行结构改变,而不影响前面的数据内容和结构。 172 | # torch.view():首先,view()函数会将Tensor所有维度拉平成一维(m),然后再根据传入的的维度信息重构出一个Tensor。 173 | # 174 | # Tensor与ndarray数组一样, 175 | # 176 | # 分类预测先验框内部是否包含物体 177 | # 178 | #-----------------------------------------# 179 | rpn_scores_k = self.score(p) 180 | rpn_scores_k = rpn_scores_k.permute(0, 2, 3, 1).contiguous().view(n, -1, 2) 181 | 182 | #--------------------------------------------------------------------------------------# 183 | # 进行softmax概率计算,每个先验框只有两个判别结果 184 | # 内部包含物体或者内部不包含物体,rpn_softmax_scores[:, :, 1]的内容为包含物体的概率 185 | #--------------------------------------------------------------------------------------# 186 | rpn_softmax_scores = F.softmax(rpn_scores_k, dim=-1) 187 | rpn_fg_scores = rpn_softmax_scores[:, :, 1].contiguous() 188 | rpn_fg_scores = rpn_fg_scores.view(n, -1) 189 | 190 | #------------------------------------------------------------------------------------------------# 191 | # 生成先验框,此时获得的anchor是布满网格点的,当输入图片为600,600,3的时候,shape为(12996, 4) 192 | #------------------------------------------------------------------------------------------------# 193 | anchor_k = _enumerate_shifted_anchor(np.array(self.anchor_base), self.feat_stride, h, w) 194 | rois_k = [] 195 | roi_indices_k = [] 196 | #分离开背景和前景 197 | for i in range(n): 198 | roi = self.proposal_layer(rpn_locs_k[i], rpn_fg_scores[i], anchor_k, img_size, scale=scale) 199 | batch_index = i * torch.ones((len(roi),)) 200 | rois_k.append(roi.unsqueeze(0)) 201 | roi_indices_k.append(batch_index.unsqueeze(0)) 202 | 203 | #------------------------------------------------------------------# 204 | # 获得RPN网络的预测结果,进行格式调整,把五个特征层的结果进行堆叠 205 | #------------------------------------------------------------------# 206 | rois.append(torch.cat(rois_k, dim=0).type_as(p)) 207 | roi_indices.append(torch.cat(roi_indices_k, dim=0).type_as(p)) 208 | anchor.append(torch.from_numpy(anchor_k).unsqueeze(0).float().to(p.device)) 209 | rpn_locs.append(rpn_locs_k) 210 | rpn_scores.append(rpn_scores_k) 211 | 212 | return rpn_locs, rpn_scores, rois, roi_indices, anchor 213 | 214 | 215 | class RegionProposalNetwork(nn.Module): 216 | def __init__( 217 | self, 218 | in_channels = 512, 219 | mid_channels = 512, 220 | ratios = [0.5, 1, 2], 221 | anchor_scales = [4, 16, 32], 222 | feat_stride = 16, 223 | mode = "training", 224 | ): 225 | super(RegionProposalNetwork, self).__init__() 226 | #-----------------------------------------# 227 | # 生成基础先验框,shape为[9, 4] 228 | #-----------------------------------------# 229 | self.anchor_base = generate_anchor_base(anchor_scales = anchor_scales, ratios = ratios) 230 | #每个网格上默认的先验框数量 231 | n_anchor = self.anchor_base.shape[0] 232 | 233 | #-----------------------------------------# 234 | # 先进行一个3x3的卷积,可理解为特征整合 235 | #-----------------------------------------# 236 | self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1) 237 | #-----------------------------------------# 238 | # 分类预测先验框内部是否包含物体,score为带有18通道的conv1*1卷积, 239 | #-----------------------------------------# 240 | self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0) 241 | #-----------------------------------------# 242 | # 回归预测对先验框进行调整,loc带有36通道的conv1*1卷积 243 | #-----------------------------------------# 244 | self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0) 245 | 246 | #-----------------------------------------# 247 | # 特征点间距步长 248 | #-----------------------------------------# 249 | self.feat_stride = feat_stride 250 | #-----------------------------------------# 251 | # 用于对建议框解码并进行非极大抑制 252 | #-----------------------------------------# 253 | self.proposal_layer = ProposalCreator(mode) 254 | #--------------------------------------# 255 | # 对FPN的网络部分进行权值初始化 256 | #--------------------------------------# 257 | normal_init(self.conv1, 0, 0.01) 258 | normal_init(self.score, 0, 0.01) 259 | normal_init(self.loc, 0, 0.01) 260 | 261 | #输入的x为feature map共享特征层, 262 | def forward(self, x, img_size, scale=1.): 263 | n, _, h, w = x.shape 264 | #-----------------------------------------# 265 | # 先进行一个3x3的卷积,可理解为特征整合 266 | #-----------------------------------------# 267 | x = F.relu(self.conv1(x)) #激活函数 268 | #-----------------------------------------# 269 | # 回归预测对先验框进行调整 270 | # view(n, -1, 4):n个(m/(4*n))行4列的新tensor形状。 271 | # 交换后n(第0维度)=batch_size(这里为2,表示背景和物体形状), 272 | # -1(1维度):表示每个先验框(自行计算), 273 | # 4(第2维度)=调整先验框位置的四个参数。 274 | #-----------------------------------------# 275 | rpn_locs = self.loc(x) 276 | rpn_locs = rpn_locs.permute(0, 2, 3, 1).contiguous().view(n, -1, 4) 277 | #-----------------------------------------# 278 | # torch.transpose():交换指定的两个维度的内容 279 | # torch.permute():一次性交换多个维度。 280 | # contiguous():相当于是在permute(0, 2, 3, 1)tensor中复制一份,在用于view()中的tensor进行结构改变,而不影响前面的数据内容和结构。 281 | # torch.view():首先,view()函数会将Tensor所有维度拉平成一维(m),然后再根据传入的的维度信息重构出一个Tensor。 282 | # 283 | # Tensor与ndarray数组一样, 284 | # 285 | # 分类预测先验框内部是否包含物体 286 | # 287 | #-----------------------------------------# 288 | rpn_scores = self.score(x) 289 | rpn_scores = rpn_scores.permute(0, 2, 3, 1).contiguous().view(n, -1, 2) 290 | 291 | #--------------------------------------------------------------------------------------# 292 | # 进行softmax概率计算,每个先验框只有两个判别结果 293 | # 内部包含物体或者内部不包含物体,rpn_softmax_scores[:, :, 1]的内容为包含物体的概率 294 | #--------------------------------------------------------------------------------------# 295 | rpn_softmax_scores = F.softmax(rpn_scores, dim=-1) 296 | rpn_fg_scores = rpn_softmax_scores[:, :, 1].contiguous() 297 | rpn_fg_scores = rpn_fg_scores.view(n, -1) 298 | 299 | #------------------------------------------------------------------------------------------------# 300 | # 生成先验框,此时获得的anchor是布满网格点的,当输入图片为600,600,3的时候,shape为(12996, 4) 301 | #------------------------------------------------------------------------------------------------# 302 | anchor = _enumerate_shifted_anchor(np.array(self.anchor_base), self.feat_stride, h, w) 303 | rois = list() 304 | roi_indices = list() 305 | for i in range(n): 306 | roi = self.proposal_layer(rpn_locs[i], rpn_fg_scores[i], anchor, img_size, scale = scale) 307 | batch_index = i * torch.ones((len(roi),)) 308 | rois.append(roi.unsqueeze(0)) 309 | roi_indices.append(batch_index.unsqueeze(0)) 310 | 311 | rois = torch.cat(rois, dim=0).type_as(x) 312 | roi_indices = torch.cat(roi_indices, dim=0).type_as(x) 313 | anchor = torch.from_numpy(anchor).unsqueeze(0).float().to(x.device) 314 | 315 | return rpn_locs, rpn_scores, rois, roi_indices, anchor 316 | 317 | 318 | def normal_init(m, mean, stddev, truncated=False): 319 | if truncated: 320 | m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation 321 | else: 322 | m.weight.data.normal_(mean, stddev) 323 | m.bias.data.zero_() 324 | -------------------------------------------------------------------------------- /nets/vgg16.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.hub import load_state_dict_from_url 4 | 5 | 6 | #--------------------------------------# 7 | # VGG16的结构 8 | #--------------------------------------# 9 | class VGG(nn.Module): 10 | def __init__(self, features, num_classes=1000, init_weights=True): 11 | super(VGG, self).__init__() 12 | self.features = features 13 | #--------------------------------------# 14 | # 平均池化到7x7大小 15 | #--------------------------------------# 16 | self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) 17 | #--------------------------------------# 18 | # 分类部分 19 | #--------------------------------------# 20 | self.classifier = nn.Sequential( 21 | nn.Linear(512 * 7 * 7, 4096), 22 | nn.ReLU(True), 23 | nn.Dropout(), 24 | nn.Linear(4096, 4096), 25 | nn.ReLU(True), 26 | nn.Dropout(), 27 | nn.Linear(4096, num_classes), 28 | ) 29 | if init_weights: 30 | self._initialize_weights() 31 | 32 | def forward(self, x): 33 | #--------------------------------------# 34 | # 特征提取 35 | #--------------------------------------# 36 | x = self.features(x) 37 | #--------------------------------------# 38 | # 平均池化 39 | #--------------------------------------# 40 | x = self.avgpool(x) 41 | #--------------------------------------# 42 | # 平铺后 43 | #--------------------------------------# 44 | x = torch.flatten(x, 1) 45 | #--------------------------------------# 46 | # 分类部分 47 | #--------------------------------------# 48 | x = self.classifier(x) 49 | return x 50 | 51 | def _initialize_weights(self): 52 | for m in self.modules(): 53 | if isinstance(m, nn.Conv2d): 54 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 55 | if m.bias is not None: 56 | nn.init.constant_(m.bias, 0) 57 | elif isinstance(m, nn.BatchNorm2d): 58 | nn.init.constant_(m.weight, 1) 59 | nn.init.constant_(m.bias, 0) 60 | elif isinstance(m, nn.Linear): 61 | nn.init.normal_(m.weight, 0, 0.01) 62 | nn.init.constant_(m.bias, 0) 63 | 64 | ''' 65 | 假设输入图像为(600, 600, 3),随着cfg的循环,特征层变化如下: 66 | 600,600,3 -> 600,600,64 -> 600,600,64 -> 300,300,64 -> 300,300,128 -> 300,300,128 -> 150,150,128 -> 150,150,256 -> 150,150,256 -> 150,150,256 67 | -> 75,75,256 -> 75,75,512 -> 75,75,512 -> 75,75,512 -> 37,37,512 -> 37,37,512 -> 37,37,512 -> 37,37,512 68 | 到cfg结束,我们获得了一个37,37,512的特征层 69 | ''' 70 | 71 | cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'] 72 | 73 | #--------------------------------------# 74 | # 特征提取部分 75 | #--------------------------------------# 76 | def make_layers(cfg, batch_norm=False): 77 | layers = [] 78 | in_channels = 3 79 | for v in cfg: 80 | if v == 'M': 81 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 82 | else: 83 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) 84 | if batch_norm: 85 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] 86 | else: 87 | layers += [conv2d, nn.ReLU(inplace=True)] 88 | in_channels = v 89 | return nn.Sequential(*layers) 90 | 91 | def decom_vgg16(pretrained = False): 92 | model = VGG(make_layers(cfg)) 93 | if pretrained: 94 | state_dict = load_state_dict_from_url("https://download.pytorch.org/models/vgg16-397923af.pth", model_dir="./model_data") 95 | model.load_state_dict(state_dict) 96 | #----------------------------------------------------------------------------# 97 | # 获取特征提取部分,最终获得一个37,37,1024的特征层 98 | #----------------------------------------------------------------------------# 99 | features = list(model.features)[:30] 100 | #----------------------------------------------------------------------------# 101 | # 获取分类部分,需要除去Dropout部分 102 | #----------------------------------------------------------------------------# 103 | classifier = list(model.classifier) 104 | del classifier[6] 105 | del classifier[5] 106 | del classifier[2] 107 | 108 | features = nn.Sequential(*features) 109 | classifier = nn.Sequential(*classifier) 110 | return features, classifier 111 | -------------------------------------------------------------------------------- /order_name.py: -------------------------------------------------------------------------------- 1 | #...........................# 2 | #对文件夹中的文件进行重命名 3 | #...........................# 4 | import os 5 | import xml 6 | from xml.dom import minidom 7 | import xml.etree.cElementTree as ET 8 | 9 | def myrename(file_path): 10 | file_list=os.listdir(file_path) 11 | for i,fi in enumerate(file_list): 12 | old_dir=os.path.join(file_path,fi) 13 | print('wenjianmingzi :',old_dir) 14 | # 删除名字中的空格 15 | new_name = fi.replace(" ", "_") 16 | print("新名字为:",new_name) 17 | 18 | # # 顺序命名 19 | # # new_name=str(i+1)+"."+str(fi.split(".")[-1]) 20 | new_dir=os.path.join(file_path,new_name) 21 | try: 22 | os.rename(old_dir,new_dir) 23 | except Exception as e: 24 | print(e) 25 | print("Failed!") 26 | else: 27 | print("SUcess!") 28 | 29 | 30 | #...........................# 31 | #对xml文件内的filename和path名进行重命名 32 | #...........................# 33 | 34 | def xml_name(xmlpath): 35 | files = os.listdir(xmlpath) # 得到文件夹下所有文件名称 36 | count = 0 37 | for xmlFile in files: # 遍历文件夹 38 | if not os.path.isdir(xmlFile): # 判断是否是文件夹,不是文件夹才打开 39 | name1 = xmlFile.split('.')[0] 40 | dom = xml.dom.minidom.parse(xmlpath + '/' + xmlFile) 41 | root = dom.documentElement 42 | #filename重命名 43 | newfilename = root.getElementsByTagName('filename') 44 | t=newfilename[0].firstChild.data = name1 + '.jpg' 45 | print('t:',t ) 46 | #path重命名 47 | newpath = root.getElementsByTagName('path') 48 | t1=newpath[0].firstChild.data =xmlpath +'\\'+ name1 +'.jpg' 49 | print('t1:',t1 ) 50 | 51 | with open(os.path.join(xmlpath, xmlFile), 'w',) as fh: 52 | print('fh:',fh ) 53 | dom.writexml(fh) 54 | print('写入name/pose OK!') 55 | count = count + 1 56 | 57 | 58 | # 删除xml文件中显示的版本号 59 | def delete_xmlversion(xmlpath,savedir): 60 | 61 | files = os.listdir(xmlpath) 62 | for ml in files: 63 | if '.xml' in ml: 64 | fo = open(savedir + '/' + '{}'.format(ml), 'w', encoding='utf-8') 65 | print('{}'.format(ml)) 66 | fi = open(xmlpath + '/' + '{}'.format(ml), 'r') 67 | content = fi.readlines() 68 | for line in content: 69 | # line = line.replace('a', 'b') # 例:将a替换为b 70 | line = line.replace('', '') 71 | # line = line.replace('测试图片', '车辆图片') 72 | # line = line.replace('class1', 'class2') 73 | fo.write(line) 74 | fo.close() 75 | print('替换成功') 76 | 77 | 78 | #删除xml文件中部分不要的标签信息 79 | def Delete_part_information_xml(path_root,xy_classes): 80 | for anno_path in path_root: 81 | xml_list=os.listdir(anno_path) 82 | print("打开{}文件".format(xml_list)) 83 | for annoxml in xml_list: 84 | path_xml=os.path.join(anno_path,annoxml) 85 | print('保存文件路径为{}'.format(path_xml)) 86 | tree =ET.parse(path_xml) 87 | root=tree.getroot() 88 | 89 | for child in root.findall('object'): 90 | name = child.find('name').text 91 | if not name in xy_classes: 92 | root.remove(child) 93 | print(annoxml) 94 | tree.write(os.path.join(r'F:\Desktop\PCB_code\PCB_DataSet\Annotations—new', annoxml)) #处理结束后保存的路径 95 | 96 | 97 | 98 | 99 | if __name__=="__main__": 100 | file_path=r"F:\Desktop\PCB_code\date_set\new_data" #完整路径+文件名 101 | # xmlpath="F:\\桌面\\PCB_code\\date_set\\Image_label_source" 102 | # savedir = r'F:\桌面\PCB_code\date_set\3' #删除xml文件中显示的版本号后存放文件位置 103 | # xmlpath=r'F:\桌面\PCB_code\date_set\label' 104 | myrename(file_path) #图片重命名文件 105 | 106 | #对xml文件中的名字进行修改 107 | # myrename(xmlpath) #1、xml文件名重命名 108 | # xml_name(xmlpath) #2、xml文件内的filename和path重命名 109 | # delete_xmlversion(xmlpath,savedir) #删除经过xml重命名后文件内的版本号 110 | 111 | #删除xml文件中部分不要的标签信息 112 | path_root=r'F:\Desktop\PCB_code\PCB_DataSet\Annotations' 113 | xy_classes=['Speaker',"Bat","2USB","Rj45+2USB","Cap_cross","Cap_blue_black","Jumper04p", 114 | "Jumper10p", "HDD","Power08p","Power04p","Power24p"] 115 | Delete_part_information_xml(path_root,xy_classes) 116 | 117 | -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------# 2 | # 将单张图片预测、摄像头检测和FPS测试功能 3 | # 整合到了一个py文件中,通过指定mode进行模式的修改。 4 | #----------------------------------------------------# 5 | import time 6 | 7 | import cv2 8 | import numpy as np 9 | from PIL import Image 10 | 11 | from frcnn_predict import FRCNN 12 | 13 | if __name__ == "__main__": 14 | frcnn = FRCNN() 15 | #----------------------------------------------------------------------------------------------------------# 16 | # mode用于指定测试的模式: 17 | # 'predict' 表示单张图片预测,如果想对预测过程进行修改,如保存图片,截取对象等,可以先看下方详细的注释 18 | # 'video' 表示视频检测,可调用摄像头或者视频进行检测,详情查看下方注释。 19 | # 'fps' 表示测试fps,使用的图片是img里面的street.jpg,详情查看下方注释。 20 | # 'dir_predict' 表示遍历文件夹进行检测并保存。默认遍历img文件夹,保存img_out文件夹,详情查看下方注释。 21 | #----------------------------------------------------------------------------------------------------------# 22 | mode = "dir_predict" 23 | #-------------------------------------------------------------------------# 24 | # crop 指定了是否在单张图片预测后对目标进行截取 25 | # count 指定了是否进行目标的计数 26 | # crop、count仅在mode='predict'时有效 27 | #-------------------------------------------------------------------------# 28 | crop = False 29 | count = False 30 | #----------------------------------------------------------------------------------------------------------# 31 | # video_path 用于指定视频的路径,当video_path=0时表示检测摄像头 32 | # 想要检测视频,则设置如video_path = "xxx.mp4"即可,代表读取出根目录下的xxx.mp4文件。 33 | # video_save_path 表示视频保存的路径,当video_save_path=""时表示不保存 34 | # 想要保存视频,则设置如video_save_path = "yyy.mp4"即可,代表保存为根目录下的yyy.mp4文件。 35 | # video_fps 用于保存的视频的fps 36 | # 37 | # video_path、video_save_path和video_fps仅在mode='video'时有效 38 | # 保存视频时需要ctrl+c退出或者运行到最后一帧才会完成完整的保存步骤。 39 | #----------------------------------------------------------------------------------------------------------# 40 | video_path = 0 41 | video_save_path = "" 42 | video_fps = 25.0 43 | #----------------------------------------------------------------------------------------------------------# 44 | # test_interval 用于指定测量fps的时候,图片检测的次数。理论上test_interval越大,fps越准确。 45 | # fps_image_path 用于指定测试的fps图片 46 | # 47 | # test_interval和fps_image_path仅在mode='fps'有效 48 | #----------------------------------------------------------------------------------------------------------# 49 | test_interval = 100 50 | fps_image_path = "img/street.jpg" 51 | #-------------------------------------------------------------------------# 52 | # dir_origin_path 指定了用于检测的图片的文件夹路径 53 | # dir_save_path 指定了检测完图片的保存路径 54 | # 55 | # dir_origin_path和dir_save_path仅在mode='dir_predict'时有效 56 | #-------------------------------------------------------------------------# 57 | dir_origin_path = "faster-rcnn-pytorch-master/prediction_img" 58 | dir_save_path = "faster-rcnn-pytorch-master/prediction_img_out" 59 | 60 | if mode == "predict": 61 | ''' 62 | 1、该代码无法直接进行批量预测,如果想要批量预测,可以利用os.listdir()遍历文件夹,利用Image.open打开图片文件进行预测。 63 | 具体流程可以参考get_dr_txt.py,在get_dr_txt.py即实现了遍历还实现了目标信息的保存。 64 | 2、如果想要进行检测完的图片的保存,利用r_image.save("img.jpg")即可保存,直接在predict.py里进行修改即可。 65 | 3、如果想要获得预测框的坐标,可以进入frcnn.detect_image函数,在绘图部分读取top,left,bottom,right这四个值。 66 | 4、如果想要利用预测框截取下目标,可以进入frcnn.detect_image函数,在绘图部分利用获取到的top,left,bottom,right这四个值 67 | 在原图上利用矩阵的方式进行截取。 68 | 5、如果想要在预测图上写额外的字,比如检测到的特定目标的数量,可以进入frcnn.detect_image函数,在绘图部分对predicted_class进行判断, 69 | 比如判断if predicted_class == 'car': 即可判断当前目标是否为车,然后记录数量即可。利用draw.text即可写字。 70 | ''' 71 | while True: 72 | img = input('Input image filename:') 73 | try: 74 | image = Image.open(img) 75 | except: 76 | print('Open Error! Try again!') 77 | continue 78 | else: 79 | r_image = frcnn.detect_image(image, crop = crop, count = count) 80 | r_image.show() 81 | 82 | elif mode == "video": 83 | capture=cv2.VideoCapture(video_path) 84 | if video_save_path!="": 85 | fourcc = cv2.VideoWriter_fourcc(*'XVID') 86 | size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) 87 | out = cv2.VideoWriter(video_save_path, fourcc, video_fps, size) 88 | 89 | fps = 0.0 90 | while(True): 91 | t1 = time.time() 92 | # 读取某一帧 93 | ref,frame=capture.read() 94 | # 格式转变,BGRtoRGB 95 | frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB) 96 | # 转变成Image 97 | frame = Image.fromarray(np.uint8(frame)) 98 | # 进行检测 99 | frame = np.array(frcnn.detect_image(frame)) 100 | # RGBtoBGR满足opencv显示格式 101 | frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR) 102 | 103 | fps = ( fps + (1./(time.time()-t1)) ) / 2 104 | print("fps= %.2f"%(fps)) 105 | frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) 106 | 107 | cv2.imshow("video",frame) 108 | c= cv2.waitKey(1) & 0xff 109 | if video_save_path!="": 110 | out.write(frame) 111 | 112 | if c==27: 113 | capture.release() 114 | break 115 | capture.release() 116 | out.release() 117 | cv2.destroyAllWindows() 118 | 119 | elif mode == "fps": 120 | img = Image.open(fps_image_path) 121 | tact_time = frcnn.get_FPS(img, test_interval) 122 | print(str(tact_time) + ' seconds, ' + str(1/tact_time) + 'FPS, @batch_size 1') 123 | 124 | elif mode == "dir_predict": 125 | import os 126 | from tqdm import tqdm 127 | 128 | img_names = os.listdir(dir_origin_path) 129 | for img_name in tqdm(img_names): 130 | if img_name.lower().endswith(('.bmp', '.dib', '.png', '.jpg', '.jpeg', '.pbm', '.pgm', '.ppm', '.tif', '.tiff')): 131 | image_path = os.path.join(dir_origin_path, img_name) 132 | image = Image.open(image_path) 133 | r_image = frcnn.detect_image(image) 134 | if not os.path.exists(dir_save_path): 135 | os.makedirs(dir_save_path) 136 | r_image.save(os.path.join(dir_save_path, img_name.replace(".jpg", ".png")), quality=95, subsampling=0) 137 | 138 | else: 139 | raise AssertionError("Please specify the correct mode: 'predict', 'video', 'fps' or 'dir_predict'.") 140 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scipy==1.2.1 2 | numpy==1.17.0 3 | matplotlib==3.1.2 4 | opencv_python==4.1.2.30 5 | torch==1.2.0 6 | torchvision==0.4.0 7 | tqdm==4.60.0 8 | Pillow==8.2.0 9 | h5py==2.10.0 10 | -------------------------------------------------------------------------------- /summary.py: -------------------------------------------------------------------------------- 1 | #--------------------------------------------# 2 | # 该部分代码用于看网络结构 3 | #--------------------------------------------# 4 | import torch 5 | from thop import clever_format, profile 6 | from torchsummary import summary 7 | 8 | from nets.faster_rcnn_feature_extraction import FasterRCNN 9 | 10 | if __name__ == "__main__": 11 | input_shape = [600, 600] 12 | num_classes = 21 13 | 14 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 15 | model = FasterRCNN(num_classes, backbone = 'vgg').to(device) 16 | summary(model, (3, input_shape[0], input_shape[1])) 17 | 18 | dummy_input = torch.randn(1, 3, input_shape[0], input_shape[1]).to(device) 19 | flops, params = profile(model.to(device), (dummy_input, ), verbose=False) 20 | #--------------------------------------------------------# 21 | # flops * 2是因为profile没有将卷积作为两个operations 22 | # 有些论文将卷积算乘法、加法两个operations。此时乘2 23 | # 有些论文只考虑乘法的运算次数,忽略加法。此时不乘2 24 | # 本代码选择乘2,参考YOLOX。 25 | #--------------------------------------------------------# 26 | flops = flops * 2 27 | flops, params = clever_format([flops, params], "%.3f") 28 | print('Total GFLOPS: %s' % (flops)) 29 | print('Total params: %s' % (params)) 30 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /utils/__pycache__/anchors.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/anchors.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/anchors.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/anchors.cpython-39.pyc -------------------------------------------------------------------------------- /utils/__pycache__/callbacks.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/callbacks.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/callbacks.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/callbacks.cpython-39.pyc -------------------------------------------------------------------------------- /utils/__pycache__/dataloader.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/dataloader.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/dataloader.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/dataloader.cpython-39.pyc -------------------------------------------------------------------------------- /utils/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils.cpython-39.pyc -------------------------------------------------------------------------------- /utils/__pycache__/utils_bbox.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils_bbox.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/utils_bbox.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils_bbox.cpython-39.pyc -------------------------------------------------------------------------------- /utils/__pycache__/utils_fit.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils_fit.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/utils_fit.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils_fit.cpython-39.pyc -------------------------------------------------------------------------------- /utils/__pycache__/utils_map.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils_map.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/utils_map.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zao-chao6/PCB_defect_detection_faster_r_cnn/619ea6878250098b37c8a7336c4c7a8b21615673/utils/__pycache__/utils_map.cpython-39.pyc -------------------------------------------------------------------------------- /utils/anchors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | #--------------------------------------------# 5 | # 生成基础的先验框 6 | #--------------------------------------------# 7 | def generate_anchor_base(base_size=16, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32]): 8 | anchor_base = np.zeros((len(ratios) * len(anchor_scales), 4), dtype=np.float32) 9 | for i in range(len(ratios)): 10 | for j in range(len(anchor_scales)): 11 | h = base_size * anchor_scales[j] * np.sqrt(ratios[i]) 12 | w = base_size * anchor_scales[j] * np.sqrt(1. / ratios[i]) 13 | 14 | index = i * len(anchor_scales) + j 15 | anchor_base[index, 0] = - h / 2. 16 | anchor_base[index, 1] = - w / 2. 17 | anchor_base[index, 2] = h / 2. 18 | anchor_base[index, 3] = w / 2. 19 | return anchor_base 20 | 21 | #--------------------------------------------# 22 | # 对基础先验框进行拓展对应到所有特征点上 23 | #--------------------------------------------# 24 | def _enumerate_shifted_anchor(anchor_base, feat_stride, height, width): 25 | #---------------------------------# 26 | # 计算网格中心点 27 | #---------------------------------# 28 | shift_x = np.arange(0, width * feat_stride, feat_stride) 29 | shift_y = np.arange(0, height * feat_stride, feat_stride) 30 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 31 | shift = np.stack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel(),), axis=1) 32 | 33 | #---------------------------------# 34 | # 每个网格点上的9个先验框 35 | #---------------------------------# 36 | A = anchor_base.shape[0] 37 | K = shift.shape[0] 38 | anchor = anchor_base.reshape((1, A, 4)) + shift.reshape((K, 1, 4)) 39 | #---------------------------------# 40 | # 所有的先验框 41 | #---------------------------------# 42 | anchor = anchor.reshape((K * A, 4)).astype(np.float32) 43 | return anchor 44 | 45 | if __name__ == "__main__": 46 | import matplotlib.pyplot as plt 47 | nine_anchors = generate_anchor_base() 48 | print(nine_anchors) 49 | 50 | height, width, feat_stride = 38,38,16 51 | anchors_all = _enumerate_shifted_anchor(nine_anchors, feat_stride, height, width) 52 | print(np.shape(anchors_all)) 53 | 54 | fig = plt.figure() 55 | ax = fig.add_subplot(111) 56 | plt.ylim(-300,900) 57 | plt.xlim(-300,900) 58 | shift_x = np.arange(0, width * feat_stride, feat_stride) 59 | shift_y = np.arange(0, height * feat_stride, feat_stride) 60 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 61 | plt.scatter(shift_x,shift_y) 62 | box_widths = anchors_all[:,2]-anchors_all[:,0] 63 | box_heights = anchors_all[:,3]-anchors_all[:,1] 64 | 65 | for i in [108, 109, 110, 111, 112, 113, 114, 115, 116]: 66 | rect = plt.Rectangle([anchors_all[i, 0],anchors_all[i, 1]],box_widths[i],box_heights[i],color="r",fill=False) 67 | ax.add_patch(rect) 68 | plt.show() 69 | 70 | -------------------------------------------------------------------------------- /utils/callbacks.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import matplotlib 4 | import torch 5 | 6 | matplotlib.use('Agg') 7 | import shutil 8 | 9 | import numpy as np 10 | from matplotlib import pyplot as plt 11 | from PIL import Image 12 | from scipy import signal 13 | from torch.utils.tensorboard import SummaryWriter 14 | # from tqdm import tqdm 15 | import tqdm 16 | 17 | from .utils import cvtColor, get_new_img_size, preprocess_input, resize_image 18 | from .utils_bbox import DecodeBox 19 | from .utils_map import get_coco_map, get_map 20 | 21 | 22 | class LossHistory(): 23 | def __init__(self, log_dir, model, input_shape): 24 | self.log_dir = log_dir 25 | self.losses = [] 26 | self.val_loss = [] 27 | 28 | os.makedirs(self.log_dir) 29 | self.writer = SummaryWriter(self.log_dir) 30 | # try: 31 | # dummy_input = torch.randn(2, 3, input_shape[0], input_shape[1]) 32 | # self.writer.add_graph(model, dummy_input) 33 | # except: 34 | # pass 35 | 36 | def append_loss(self, epoch, loss, val_loss): 37 | if not os.path.exists(self.log_dir): 38 | os.makedirs(self.log_dir) 39 | 40 | self.losses.append(loss) 41 | self.val_loss.append(val_loss) 42 | 43 | with open(os.path.join(self.log_dir, "epoch_loss.txt"), 'a') as f: 44 | f.write(str(loss)) 45 | f.write("\n") 46 | with open(os.path.join(self.log_dir, "epoch_val_loss.txt"), 'a') as f: 47 | f.write(str(val_loss)) 48 | f.write("\n") 49 | 50 | self.writer.add_scalar('loss', loss, epoch) 51 | self.writer.add_scalar('val_loss', val_loss, epoch) 52 | self.loss_plot() 53 | 54 | def loss_plot(self): 55 | iters = range(len(self.losses)) 56 | 57 | plt.figure() 58 | plt.plot(iters, self.losses, 'red', linewidth = 2, label='train loss') 59 | plt.plot(iters, self.val_loss, 'coral', linewidth = 2, label='val loss') 60 | try: 61 | if len(self.losses) < 25: 62 | num = 5 63 | else: 64 | num = 15 65 | 66 | # plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss') 67 | plt.plot(iters, signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss') 68 | # plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss') 69 | plt.plot(iters, signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss') 70 | except: 71 | pass 72 | 73 | plt.grid(True) 74 | plt.xlabel('Epoch') 75 | plt.ylabel('Loss') 76 | plt.legend(loc="upper right") 77 | 78 | plt.savefig(os.path.join(self.log_dir, "epoch_loss.png")) 79 | 80 | plt.cla() 81 | plt.close("all") 82 | 83 | class EvalCallback(): 84 | def __init__(self, net, input_shape, class_names, num_classes, val_lines, log_dir, cuda, \ 85 | map_out_path=".temp_map_out", max_boxes=100, confidence=0.05, nms_iou=0.5, letterbox_image=True, MINOVERLAP=0.5, eval_flag=True, period=1): 86 | super(EvalCallback, self).__init__() 87 | 88 | self.net = net 89 | self.input_shape = input_shape 90 | self.class_names = class_names 91 | self.num_classes = num_classes 92 | self.val_lines = val_lines 93 | self.log_dir = log_dir 94 | self.cuda = cuda 95 | self.map_out_path = map_out_path 96 | self.max_boxes = max_boxes 97 | self.confidence = confidence 98 | self.nms_iou = nms_iou 99 | self.letterbox_image = letterbox_image 100 | self.MINOVERLAP = MINOVERLAP 101 | self.eval_flag = eval_flag 102 | self.period = period 103 | 104 | self.std = torch.Tensor([0.1, 0.1, 0.2, 0.2]).repeat(self.num_classes + 1)[None] 105 | if self.cuda: 106 | self.std = self.std.cuda() 107 | self.bbox_util = DecodeBox(self.std, self.num_classes) 108 | 109 | self.maps = [0] 110 | self.epoches = [0] 111 | if self.eval_flag: 112 | with open(os.path.join(self.log_dir, "epoch_map.txt"), 'a') as f: 113 | f.write(str(0)) 114 | f.write("\n") 115 | 116 | #---------------------------------------------------# 117 | # 检测图片 118 | #---------------------------------------------------# 119 | def get_map_txt(self, image_id, image, class_names, map_out_path): 120 | f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w") 121 | #---------------------------------------------------# 122 | # 计算输入图片的高和宽 123 | #---------------------------------------------------# 124 | image_shape = np.array(np.shape(image)[0:2]) 125 | input_shape = get_new_img_size(image_shape[0], image_shape[1]) 126 | #---------------------------------------------------------# 127 | # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 128 | # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB 129 | #---------------------------------------------------------# 130 | image = cvtColor(image) 131 | 132 | #---------------------------------------------------------# 133 | # 给原图像进行resize,resize到短边为600的大小上 134 | #---------------------------------------------------------# 135 | image_data = resize_image(image, [input_shape[1], input_shape[0]]) 136 | #---------------------------------------------------------# 137 | # 添加上batch_size维度 138 | #---------------------------------------------------------# 139 | image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) 140 | 141 | with torch.no_grad(): 142 | images = torch.from_numpy(image_data) 143 | if self.cuda: 144 | images = images.cuda() 145 | 146 | roi_cls_locs, roi_scores, rois, _ = self.net(images) 147 | #-------------------------------------------------------------# 148 | # 利用classifier的预测结果对建议框进行解码,获得预测框 149 | #-------------------------------------------------------------# 150 | results = self.bbox_util.forward(roi_cls_locs, roi_scores, rois, image_shape, input_shape, 151 | nms_iou = self.nms_iou, confidence = self.confidence) 152 | #--------------------------------------# 153 | # 如果没有检测到物体,则返回原图 154 | #--------------------------------------# 155 | if len(results[0]) <= 0: 156 | return 157 | 158 | top_label = np.array(results[0][:, 5], dtype = 'int32') 159 | top_conf = results[0][:, 4] 160 | top_boxes = results[0][:, :4] 161 | 162 | top_100 = np.argsort(top_conf)[::-1][:self.max_boxes] 163 | top_boxes = top_boxes[top_100] 164 | top_conf = top_conf[top_100] 165 | top_label = top_label[top_100] 166 | 167 | for i, c in list(enumerate(top_label)): 168 | predicted_class = self.class_names[int(c)] 169 | box = top_boxes[i] 170 | score = str(top_conf[i]) 171 | 172 | top, left, bottom, right = box 173 | if predicted_class not in class_names: 174 | continue 175 | 176 | f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom)))) 177 | 178 | f.close() 179 | return 180 | 181 | def on_epoch_end(self, epoch): 182 | if epoch % self.period == 0 and self.eval_flag: 183 | if not os.path.exists(self.map_out_path): 184 | os.makedirs(self.map_out_path) 185 | if not os.path.exists(os.path.join(self.map_out_path, "ground-truth")): 186 | os.makedirs(os.path.join(self.map_out_path, "ground-truth")) 187 | if not os.path.exists(os.path.join(self.map_out_path, "detection-results")): 188 | os.makedirs(os.path.join(self.map_out_path, "detection-results")) 189 | print("Get map.") 190 | for annotation_line in tqdm(self.val_lines): 191 | line = annotation_line.split() 192 | image_id = os.path.basename(line[0]).split('.')[0] 193 | #------------------------------# 194 | # 读取图像并转换成RGB图像 195 | #------------------------------# 196 | image = Image.open(line[0]) 197 | #------------------------------# 198 | # 获得预测框 199 | #------------------------------# 200 | gt_boxes = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]]) 201 | #------------------------------# 202 | # 获得预测txt 203 | #------------------------------# 204 | self.get_map_txt(image_id, image, self.class_names, self.map_out_path) 205 | 206 | #------------------------------# 207 | # 获得真实框txt 208 | #------------------------------# 209 | with open(os.path.join(self.map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f: 210 | for box in gt_boxes: 211 | left, top, right, bottom, obj = box 212 | obj_name = self.class_names[obj] 213 | new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom)) 214 | 215 | print("Calculate Map.") 216 | try: 217 | temp_map = get_coco_map(class_names = self.class_names, path = self.map_out_path)[1] 218 | except: 219 | temp_map = get_map(self.MINOVERLAP, False, path = self.map_out_path) 220 | self.maps.append(temp_map) 221 | self.epoches.append(epoch) 222 | 223 | with open(os.path.join(self.log_dir, "epoch_map.txt"), 'a') as f: 224 | f.write(str(temp_map)) 225 | f.write("\n") 226 | 227 | plt.figure() 228 | plt.plot(self.epoches, self.maps, 'red', linewidth = 2, label='train map') 229 | 230 | plt.grid(True) 231 | plt.xlabel('Epoch') 232 | plt.ylabel('Map %s'%str(self.MINOVERLAP)) 233 | plt.title('A Map Curve') 234 | plt.legend(loc="upper right") 235 | 236 | plt.savefig(os.path.join(self.log_dir, "epoch_map.png")) 237 | plt.cla() 238 | plt.close("all") 239 | 240 | print("Get map done.") 241 | shutil.rmtree(self.map_out_path) 242 | -------------------------------------------------------------------------------- /utils/dataloader.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import torch 4 | from PIL import Image 5 | from torch.utils.data.dataset import Dataset 6 | 7 | from utils.utils import cvtColor, preprocess_input 8 | 9 | 10 | class FRCNNDataset(Dataset): 11 | def __init__(self, annotation_lines, input_shape = [600, 600], train = True): 12 | self.annotation_lines = annotation_lines 13 | self.length = len(annotation_lines) 14 | self.input_shape = input_shape 15 | self.train = train 16 | 17 | def __len__(self): 18 | return self.length 19 | 20 | def __getitem__(self, index): 21 | index = index % self.length 22 | #---------------------------------------------------# 23 | # 训练时进行数据的随机增强 24 | # 验证时不进行数据的随机增强 25 | #---------------------------------------------------# 26 | image, y = self.get_random_data(self.annotation_lines[index], self.input_shape[0:2], random = self.train) 27 | image = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1)) 28 | box_data = np.zeros((len(y), 5)) 29 | if len(y) > 0: 30 | box_data[:len(y)] = y 31 | 32 | box = box_data[:, :4] 33 | label = box_data[:, -1] 34 | return image, box, label 35 | 36 | def rand(self, a=0, b=1): 37 | return np.random.rand()*(b-a) + a 38 | 39 | def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True): 40 | line = annotation_line.split() 41 | #------------------------------# 42 | # 读取图像并转换成RGB图像 43 | #------------------------------# 44 | # image = Image.open('./PCB_DataSet/JPEGImages/'+line[0]+'.jpg') 45 | image = Image.open(line[0]) 46 | image = cvtColor(image) 47 | #------------------------------# 48 | # 获得图像的高宽与目标高宽 49 | #------------------------------# 50 | iw, ih = image.size 51 | h, w = input_shape 52 | #------------------------------# 53 | # 获得预测框 54 | #------------------------------# 55 | box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]]) 56 | 57 | if not random: 58 | scale = min(w/iw, h/ih) 59 | nw = int(iw*scale) 60 | nh = int(ih*scale) 61 | dx = (w-nw)//2 62 | dy = (h-nh)//2 63 | 64 | #---------------------------------# 65 | # 将图像多余的部分加上灰条 66 | #---------------------------------# 67 | image = image.resize((nw,nh), Image.BICUBIC) 68 | new_image = Image.new('RGB', (w,h), (128,128,128)) 69 | new_image.paste(image, (dx, dy)) 70 | image_data = np.array(new_image, np.float32) 71 | 72 | #---------------------------------# 73 | # 对真实框进行调整 74 | #---------------------------------# 75 | if len(box)>0: 76 | np.random.shuffle(box) 77 | box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx 78 | box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy 79 | box[:, 0:2][box[:, 0:2]<0] = 0 80 | box[:, 2][box[:, 2]>w] = w 81 | box[:, 3][box[:, 3]>h] = h 82 | box_w = box[:, 2] - box[:, 0] 83 | box_h = box[:, 3] - box[:, 1] 84 | box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box 85 | 86 | return image_data, box 87 | 88 | #------------------------------------------# 89 | # 对图像进行缩放并且进行长和宽的扭曲 90 | #------------------------------------------# 91 | new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter) 92 | scale = self.rand(.25, 2) 93 | if new_ar < 1: 94 | nh = int(scale*h) 95 | nw = int(nh*new_ar) 96 | else: 97 | nw = int(scale*w) 98 | nh = int(nw/new_ar) 99 | image = image.resize((nw,nh), Image.BICUBIC) 100 | 101 | #------------------------------------------# 102 | # 将图像多余的部分加上灰条 103 | #------------------------------------------# 104 | dx = int(self.rand(0, w-nw)) 105 | dy = int(self.rand(0, h-nh)) 106 | new_image = Image.new('RGB', (w,h), (128,128,128)) 107 | new_image.paste(image, (dx, dy)) 108 | image = new_image 109 | 110 | #------------------------------------------# 111 | # 翻转图像 112 | #------------------------------------------# 113 | flip = self.rand()<.5 114 | if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT) 115 | 116 | image_data = np.array(image, np.uint8) 117 | #---------------------------------# 118 | # 对图像进行色域变换 119 | # 计算色域变换的参数 120 | #---------------------------------# 121 | r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1 122 | #---------------------------------# 123 | # 将图像转到HSV上 124 | #---------------------------------# 125 | hue, sat, val = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV)) 126 | dtype = image_data.dtype 127 | #---------------------------------# 128 | # 应用变换 129 | #---------------------------------# 130 | x = np.arange(0, 256, dtype=r.dtype) 131 | lut_hue = ((x * r[0]) % 180).astype(dtype) 132 | lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) 133 | lut_val = np.clip(x * r[2], 0, 255).astype(dtype) 134 | 135 | image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) 136 | image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB) 137 | 138 | #---------------------------------# 139 | # 对真实框进行调整 140 | #---------------------------------# 141 | if len(box)>0: 142 | np.random.shuffle(box) 143 | box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx 144 | box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy 145 | if flip: box[:, [0,2]] = w - box[:, [2,0]] 146 | box[:, 0:2][box[:, 0:2]<0] = 0 147 | box[:, 2][box[:, 2]>w] = w 148 | box[:, 3][box[:, 3]>h] = h 149 | box_w = box[:, 2] - box[:, 0] 150 | box_h = box[:, 3] - box[:, 1] 151 | box = box[np.logical_and(box_w>1, box_h>1)] 152 | 153 | return image_data, box 154 | 155 | # DataLoader中collate_fn使用 156 | def frcnn_dataset_collate(batch): 157 | images = [] 158 | bboxes = [] 159 | labels = [] 160 | for img, box, label in batch: 161 | images.append(img) 162 | bboxes.append(box) 163 | labels.append(label) 164 | images = torch.from_numpy(np.array(images)) 165 | return images, bboxes, labels 166 | 167 | -------------------------------------------------------------------------------- /utils/kmeans_anchors/Bikmeans_anchors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from read_voc import VOCDataSet 3 | 4 | # bik-means算法 5 | """ 6 | Args: 7 | boxes: 需要聚类的bboxes 8 | k: 簇数(聚成几类) 9 | dist: 更新簇坐标的方法(默认使用中位数,比均值效果略好) 10 | """ 11 | 12 | def load_data_set(fileName): 13 | """加载数据集""" 14 | dataSet = [] # 初始化一个空列表 15 | fr = open(fileName) 16 | for line in fr.readlines(): 17 | # 按tab分割字段,将每行元素分割为list的元素 18 | curLine = line.strip().split('\t') 19 | # 用list函数把map函数返回的迭代器遍历展开成一个列表 20 | # 其中map(float, curLine)表示把列表的每个值用float函数转成float型,并返回迭代器 21 | fltLine = list(map(float, curLine)) 22 | dataSet.append(fltLine) 23 | return dataSet 24 | 25 | 26 | def distance_euclidean(vector1, vector2): 27 | """计算欧氏距离""" 28 | return np.sqrt(sum(np.power(vector1-vector2, 2))) # 返回两个向量的距离 29 | 30 | 31 | def rand_center(dataSet, k): 32 | """构建一个包含K个随机质心的集合""" 33 | n = np.shape(dataSet)[1] # 获取样本特征值 34 | 35 | # 初始化质心,创建(k,n)个以0填充的矩阵 36 | centroids = np.mat(np.zeros((k, n))) # 每个质心有n个坐标值,总共要k个质心 37 | # 遍历特征值 38 | for j in range(n): 39 | # 计算每一列的最小值 40 | minJ = min(dataSet[:, j]) 41 | # 计算每一列的范围值 42 | rangeJ = float(max(dataSet[:, j]) - minJ) 43 | # 计算每一列的质心,并将其赋给centroids 44 | centroids[:, j] = minJ + rangeJ * np.random.rand(k, 1) 45 | 46 | # 返回质心 47 | return centroids 48 | 49 | 50 | def k_means(dataSet, k, distMeas=distance_euclidean, creatCent=rand_center): 51 | """K-means聚类算法""" 52 | m = np.shape(dataSet)[0] # 行数 53 | # 建立簇分配结果矩阵,第一列存放该数据所属中心点,第二列是该数据到中心点的距离 54 | clusterAssment = np.mat(np.zeros((m, 2))) 55 | centroids = creatCent(dataSet, k) # 质心,即聚类点 56 | # 用来判定聚类是否收敛 57 | clusterChanged = True 58 | while clusterChanged: 59 | clusterChanged = False 60 | for i in range(m): # 把每一个数据划分到离他最近的中心点 61 | minDist = np.inf # 无穷大 62 | minIndex = -1 # 初始化 63 | for j in range(k): 64 | # 计算各点与新的聚类中心的距离 65 | distJI = distMeas(centroids[j, :], dataSet[i, :]) 66 | if distJI < minDist: 67 | # 如果第i个数据点到第j中心点更近,则将i归属为j 68 | minDist = distJI 69 | minIndex = j 70 | # 如果分配发生变化,则需要继续迭代 71 | if clusterAssment[i, 0] != minIndex: 72 | clusterChanged = True 73 | # 并将第i个数据点的分配情况存入字典 74 | clusterAssment[i, :] = minIndex, minDist**2 75 | # print(centroids) 76 | for cent in range(k): # 重新计算中心点 77 | # 去第一列等于cent的所有列 78 | ptsInClust = dataSet[np.nonzero(clusterAssment[:, 0].A == cent)[0]] 79 | # 算出这些数据的中心点 80 | centroids[cent, :] = np.mean(ptsInClust, axis=0) 81 | return centroids, clusterAssment 82 | 83 | 84 | def biKmeans(dataMat, k, distMeas=distance_euclidean): 85 | """二分k-means算法""" 86 | m = np.shape(dataMat)[0] 87 | # 创建一个矩阵来存储数据集中每个点的簇分配结果及平方误差 88 | clusterAssment = np.mat(np.zeros((m, 2))) 89 | # 根据数据集均值获取第一个质心 90 | centroid0 = np.mean(dataMat, axis=0).tolist()[0] 91 | # 用一个列表来保留所有的质心 92 | centList = [centroid0] 93 | # 遍历数据集中所有点来计算每个点到质心的距离 94 | for j in range(m): 95 | clusterAssment[j, 1] = distMeas(np.mat(centroid0), dataMat[j, :]) ** 2 96 | # 对簇不停的进行划分,直到得到想要的簇数目为止 97 | while (len(centList) < k): 98 | # 初始化最小SSE为无穷大,用于比较划分前后的SSE 99 | lowestSSE = np.inf # 无穷大 100 | # 通过考察簇列表中的值来获得当前簇的数目,遍历所有的簇来决定最佳的簇进行划分 101 | for i in range(len(centList)): 102 | # 对每一个簇,将该簇中的所有点看成一个小的数据集 103 | ptsInCurrCluster = dataMat[np.nonzero( 104 | clusterAssment[:, 0].A == i)[0], :] 105 | # 将ptsInCurrCluster输入到函数kMeans中进行处理,k=2, 106 | # kMeans会生成两个质心(簇),同时给出每个簇的误差值 107 | centroidMat, splitClustAss = k_means(ptsInCurrCluster, 2, distMeas) 108 | # 划分数据的SSE与未划分的之和作为本次划分的总误差 109 | sseSplit = sum(splitClustAss[:, 1]) # 划分数据集的SSE 110 | sseNotSplit = sum(clusterAssment[np.nonzero(clusterAssment[:, 0].A != i)[0], 1]) # 未划分数据集的SSE 111 | print('划分数据集的SSE, and 未划分的SSE: ', sseSplit, sseNotSplit) 112 | # 将划分与未划分的SSE求和与最小SSE相比较 确定是否划分 113 | if (sseSplit + sseNotSplit) < lowestSSE: 114 | bestCentToSplit = i # 当前最适合做划分的中心点 115 | bestNewCents = centroidMat # 划分后的两个新中心点 116 | bestClustAss = splitClustAss.copy() # 划分点的聚类信息 117 | lowestSSE = sseSplit + sseNotSplit 118 | # 找出最好的簇分配结果 119 | # 调用kmeans函数并且指定簇数为2时,会得到两个编号分别为0和1的结果簇 120 | bestClustAss[np.nonzero(bestClustAss[:, 0].A == 1)[0], 0] = len(centList) 121 | # 更新为最佳质心 122 | bestClustAss[np.nonzero(bestClustAss[:, 0].A == 0)[0], 0] = bestCentToSplit 123 | print('本次最适合划分的质心: ', bestCentToSplit) 124 | print('被划分数据集样本数量: ', len(bestClustAss)) 125 | # 更新质心列表 126 | # 更新原质心list中的第i个质心为使用二分kMeans后bestNewCents的第一个质心 127 | centList[bestCentToSplit] = bestNewCents[0, :].tolist()[0] 128 | # 添加bestNewCents的第二个质心 129 | centList.append(bestNewCents[1, :].tolist()[0]) 130 | # 重新分配最好簇下的数据(质心)以及SSE 131 | clusterAssment[np.nonzero(clusterAssment[:, 0].A == bestCentToSplit)[0], :] = bestClustAss 132 | 133 | return np.mat(centList), clusterAssment 134 | 135 | def main(img_size=600, k=9, thr=0.25, gen=1000): 136 | # 从数据集中读取所有图片的wh以及对应bboxes的wh 137 | dataset = VOCDataSet(voc_root="/data", year="2012", txt_name="train.txt") 138 | im_wh, boxes_wh = dataset.get_info() 139 | 140 | 141 | if __name__ == "__main__": 142 | import matplotlib.pyplot as plt 143 | # nine_anchors = generate_anchor_base() 144 | # print(nine_anchors) 145 | 146 | # height, width, feat_stride = 38,38,16 147 | # anchors_all = _enumerate_shifted_anchor(nine_anchors, feat_stride, height, width) 148 | # print(np.shape(anchors_all)) 149 | 150 | # fig = plt.figure() 151 | # ax = fig.add_subplot(111) 152 | # plt.ylim(-300,900) 153 | # plt.xlim(-300,900) 154 | # shift_x = np.arange(0, width * feat_stride, feat_stride) 155 | # shift_y = np.arange(0, height * feat_stride, feat_stride) 156 | # shift_x, shift_y = np.meshgrid(shift_x, shift_y) 157 | # plt.scatter(shift_x,shift_y) 158 | # box_widths = anchors_all[:,2]-anchors_all[:,0] 159 | # box_heights = anchors_all[:,3]-anchors_all[:,1] 160 | 161 | # for i in [108, 109, 110, 111, 112, 113, 114, 115, 116]: 162 | # rect = plt.Rectangle([anchors_all[i, 0],anchors_all[i, 1]],box_widths[i],box_heights[i],color="r",fill=False) 163 | # ax.add_patch(rect) 164 | # plt.show() 165 | 166 | # 测试biKmeans算法 167 | datMat = np.mat(load_data_set(r'F:\Desktop\PCB_code\PCB_DataSet\trainval.txt')) 168 | # 5个anchor框, 169 | centList, clusterAssment = biKmeans(datMat, 5) 170 | print("质心结果:", centList) 171 | print("聚类结果:", clusterAssment) 172 | # 可视化 173 | plt.scatter(np.array(datMat)[:, 0], np.array(datMat)[:, 1], c=np.array(clusterAssment)[:, 0].T) 174 | plt.scatter(centList[:, 0].tolist(), centList[:, 1].tolist(), c="r") 175 | plt.show() 176 | 177 | -------------------------------------------------------------------------------- /utils/kmeans_anchors/main.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | from tqdm import tqdm 4 | from scipy.cluster.vq import kmeans 5 | 6 | from read_voc import VOCDataSet 7 | from yolo_kmeans import k_means, wh_iou 8 | 9 | 10 | def anchor_fitness(k: np.ndarray, wh: np.ndarray, thr: float): # mutation fitness 11 | r = wh[:, None] / k[None] 12 | x = np.minimum(r, 1. / r).min(2) # ratio metric 13 | # x = wh_iou(wh, k) # iou metric 14 | best = x.max(1) 15 | f = (best * (best > thr).astype(np.float32)).mean() # fitness 16 | bpr = (best > thr).astype(np.float32).mean() # best possible recall 17 | return f, bpr 18 | 19 | 20 | def main(img_size=512, n=9, thr=0.25, gen=1000): 21 | # 从数据集中读取所有图片的wh以及对应bboxes的wh 22 | dataset = VOCDataSet(voc_root="/data", year="2012", txt_name="train.txt") 23 | im_wh, boxes_wh = dataset.get_info() 24 | 25 | # 最大边缩放到img_size 26 | im_wh = np.array(im_wh, dtype=np.float32) 27 | shapes = img_size * im_wh / im_wh.max(1, keepdims=True) 28 | wh0 = np.concatenate([l * s for s, l in zip(shapes, boxes_wh)]) # wh 29 | 30 | # Filter 过滤掉小目标 31 | i = (wh0 < 3.0).any(1).sum() 32 | if i: 33 | print(f'WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.') 34 | wh = wh0[(wh0 >= 2.0).any(1)] # 只保留wh都大于等于2个像素的box 35 | 36 | # Kmeans calculation 37 | # print(f'Running kmeans for {n} anchors on {len(wh)} points...') 38 | # s = wh.std(0) # sigmas for whitening 39 | # k, dist = kmeans(wh / s, n, iter=30) # points, mean distance 40 | # assert len(k) == n, print(f'ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}') 41 | # k *= s 42 | k = k_means(wh, n) 43 | 44 | # 按面积排序 45 | k = k[np.argsort(k.prod(1))] # sort small to large 46 | f, bpr = anchor_fitness(k, wh, thr) 47 | print("kmeans: " + " ".join([f"[{int(i[0])}, {int(i[1])}]" for i in k])) 48 | print(f"fitness: {f:.5f}, best possible recall: {bpr:.5f}") 49 | 50 | # Evolve 51 | # 遗传算法(在kmeans的结果基础上变异mutation) 52 | npr = np.random 53 | f, sh, mp, s = anchor_fitness(k, wh, thr)[0], k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma 54 | pbar = tqdm(range(gen), desc=f'Evolving anchors with Genetic Algorithm:') # progress bar 55 | for _ in pbar: 56 | v = np.ones(sh) 57 | while (v == 1).all(): # mutate until a change occurs (prevent duplicates) 58 | v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) 59 | kg = (k.copy() * v).clip(min=2.0) 60 | fg, bpr = anchor_fitness(kg, wh, thr) 61 | if fg > f: 62 | f, k = fg, kg.copy() 63 | pbar.desc = f'Evolving anchors with Genetic Algorithm: fitness = {f:.4f}' 64 | 65 | # 按面积排序 66 | k = k[np.argsort(k.prod(1))] # sort small to large 67 | print("genetic: " + " ".join([f"[{int(i[0])}, {int(i[1])}]" for i in k])) 68 | print(f"fitness: {f:.5f}, best possible recall: {bpr:.5f}") 69 | 70 | 71 | if __name__ == "__main__": 72 | main() 73 | -------------------------------------------------------------------------------- /utils/kmeans_anchors/plot_kmeans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from matplotlib import pyplot as plt 3 | np.random.seed(0) 4 | 5 | colors = np.array(['blue', 'black']) 6 | 7 | 8 | def plot_clusters(data, cls, clusters, title=""): 9 | if cls is None: 10 | c = [colors[0]] * data.shape[0] 11 | else: 12 | c = colors[cls].tolist() 13 | 14 | plt.scatter(data[:, 0], data[:, 1], c=c) 15 | for i, clus in enumerate(clusters): 16 | plt.scatter(clus[0], clus[1], c='gold', marker='*', s=150) 17 | plt.title(title) 18 | plt.show() 19 | plt.close() 20 | 21 | 22 | def distances(data, clusters): 23 | xy1 = data[:, None] # [N,1,2] 24 | xy2 = clusters[None] # [1,M,2] 25 | d = np.sum(np.power(xy2 - xy1, 2), axis=-1) 26 | return d 27 | 28 | 29 | def k_means(data, k, dist=np.mean): 30 | """ 31 | k-means methods 32 | Args: 33 | data: 需要聚类的data 34 | k: 簇数(聚成几类) 35 | dist: 更新簇坐标的方法 36 | """ 37 | data_number = data.shape[0] 38 | last_nearest = np.zeros((data_number,)) 39 | 40 | # init k clusters 41 | clusters = data[np.random.choice(data_number, k, replace=False)] 42 | print(f"random cluster: \n {clusters}") 43 | # plot 44 | plot_clusters(data, None, clusters, "random clusters") 45 | 46 | step = 0 47 | while True: 48 | d = distances(data, clusters) 49 | current_nearest = np.argmin(d, axis=1) 50 | 51 | # plot 52 | plot_clusters(data, current_nearest, clusters, f"step {step}") 53 | 54 | if (last_nearest == current_nearest).all(): 55 | break # clusters won't change 56 | for cluster in range(k): 57 | # update clusters 58 | clusters[cluster] = dist(data[current_nearest == cluster], axis=0) 59 | last_nearest = current_nearest 60 | step += 1 61 | 62 | return clusters 63 | 64 | 65 | def main(): 66 | x1, y1 = [np.random.normal(loc=1., size=150) for _ in range(2)] 67 | x2, y2 = [np.random.normal(loc=5., size=150) for _ in range(2)] 68 | 69 | x = np.concatenate([x1, x2]) 70 | y = np.concatenate([y1, y2]) 71 | 72 | plt.scatter(x, y, c='blue') 73 | plt.title("initial data") 74 | plt.show() 75 | plt.close() 76 | 77 | clusters = k_means(np.concatenate([x[:, None], y[:, None]], axis=-1), k=2) 78 | print(f"k-means fluster: \n {clusters}") 79 | 80 | 81 | if __name__ == '__main__': 82 | main() 83 | -------------------------------------------------------------------------------- /utils/kmeans_anchors/read_voc.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tqdm import tqdm 3 | from lxml import etree 4 | 5 | 6 | class VOCDataSet(object): 7 | def __init__(self, voc_root, txt_name: str = "train.txt"): 8 | self.root = voc_root 9 | self.annotations_root = os.path.join(self.root, "Annotations") 10 | 11 | # read train.txt or val.txt file 12 | txt_path = os.path.join(self.root, "ImageSets", txt_name) 13 | assert os.path.exists(txt_path), "not found {} file.".format(txt_name) 14 | 15 | with open(txt_path) as read: 16 | self.xml_list = [os.path.join(self.annotations_root, line.strip() + ".xml") 17 | for line in read.readlines() if len(line.strip()) > 0] 18 | 19 | # check file 20 | assert len(self.xml_list) > 0, "in '{}' file does not find any information.".format(txt_path) 21 | for xml_path in self.xml_list: 22 | assert os.path.exists(xml_path), "not found '{}' file.".format(xml_path) 23 | 24 | def __len__(self): 25 | return len(self.xml_list) 26 | 27 | def parse_xml_to_dict(self, xml): 28 | """ 29 | 将xml文件解析成字典形式,参考tensorflow的recursive_parse_xml_to_dict 30 | Args: 31 | xml: xml tree obtained by parsing XML file contents using lxml.etree 32 | 33 | Returns: 34 | Python dictionary holding XML contents. 35 | """ 36 | 37 | if len(xml) == 0: # 遍历到底层,直接返回tag对应的信息 38 | return {xml.tag: xml.text} 39 | 40 | result = {} 41 | for child in xml: 42 | child_result = self.parse_xml_to_dict(child) # 递归遍历标签信息 43 | if child.tag != 'object': 44 | result[child.tag] = child_result[child.tag] 45 | else: 46 | if child.tag not in result: # 因为object可能有多个,所以需要放入列表里 47 | result[child.tag] = [] 48 | result[child.tag].append(child_result[child.tag]) 49 | return {xml.tag: result} 50 | 51 | def get_info(self): 52 | im_wh_list = [] 53 | boxes_wh_list = [] 54 | for xml_path in tqdm(self.xml_list, desc="read data info."): 55 | # read xml 56 | with open(xml_path) as fid: 57 | xml_str = fid.read() 58 | xml = etree.fromstring(xml_str) 59 | data = self.parse_xml_to_dict(xml)["annotation"] 60 | im_height = int(data["size"]["height"]) 61 | im_width = int(data["size"]["width"]) 62 | 63 | wh = [] 64 | for obj in data["object"]: 65 | xmin = float(obj["bndbox"]["xmin"]) 66 | xmax = float(obj["bndbox"]["xmax"]) 67 | ymin = float(obj["bndbox"]["ymin"]) 68 | ymax = float(obj["bndbox"]["ymax"]) 69 | wh.append([(xmax - xmin) / im_width, (ymax - ymin) / im_height]) 70 | 71 | if len(wh) == 0: 72 | continue 73 | 74 | im_wh_list.append([im_width, im_height]) 75 | boxes_wh_list.append(wh) 76 | 77 | return im_wh_list, boxes_wh_list 78 | -------------------------------------------------------------------------------- /utils/kmeans_anchors/yolo_kmeans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def wh_iou(wh1, wh2): 5 | # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2 6 | wh1 = wh1[:, None] # [N,1,2] 7 | wh2 = wh2[None] # [1,M,2] 8 | inter = np.minimum(wh1, wh2).prod(2) # [N,M] 9 | return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter) 10 | 11 | 12 | def k_means(boxes, k, dist=np.median): 13 | """ 14 | yolo k-means methods 15 | refer: https://github.com/qqwweee/keras-yolo3/blob/master/kmeans.py 16 | Args: 17 | boxes: 需要聚类的bboxes 18 | k: 簇数(聚成几类) 19 | dist: 更新簇坐标的方法(默认使用中位数,比均值效果略好) 20 | """ 21 | box_number = boxes.shape[0] 22 | last_nearest = np.zeros((box_number,)) 23 | # np.random.seed(0) # 固定随机数种子 24 | 25 | # init k clusters 26 | clusters = boxes[np.random.choice(box_number, k, replace=False)] 27 | 28 | while True: 29 | distances = 1 - wh_iou(boxes, clusters) 30 | current_nearest = np.argmin(distances, axis=1) 31 | if (last_nearest == current_nearest).all(): 32 | break # clusters won't change 33 | for cluster in range(k): 34 | # update clusters 35 | clusters[cluster] = dist(boxes[current_nearest == cluster], axis=0) 36 | 37 | last_nearest = current_nearest 38 | 39 | return clusters 40 | -------------------------------------------------------------------------------- /utils/soft_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def bbox_iou(self, box1, box2, x1y1x2y2=True): 4 | """ 5 | 计算IOU 6 | """ 7 | if not x1y1x2y2: 8 | b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2 9 | b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2 10 | b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2 11 | b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2 12 | else: 13 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] 14 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] 15 | 16 | inter_rect_x1 = torch.max(b1_x1, b2_x1) 17 | inter_rect_y1 = torch.max(b1_y1, b2_y1) 18 | inter_rect_x2 = torch.min(b1_x2, b2_x2) 19 | inter_rect_y2 = torch.min(b1_y2, b2_y2) 20 | 21 | inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1, min=0) * \ 22 | torch.clamp(inter_rect_y2 - inter_rect_y1, min=0) 23 | 24 | b1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1) 25 | b2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) 26 | 27 | iou = inter_area / torch.clamp(b1_area + b2_area - inter_area, min = 1e-6) 28 | 29 | return iou 30 | 31 | def non_max_suppression(self, prediction, num_classes, input_shape, image_shape, letterbox_image, conf_thres=0.5, nms_thres=0.4, sigma=0.5): 32 | #----------------------------------------------------------# 33 | # 将预测结果的格式转换成左上角右下角的格式。 34 | # prediction [batch_size, num_anchors, 85] 35 | #----------------------------------------------------------# 36 | box_corner = prediction.new(prediction.shape) 37 | box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 38 | box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 39 | box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 40 | box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 41 | prediction[:, :, :4] = box_corner[:, :, :4] 42 | 43 | output = [None for _ in range(len(prediction))] 44 | for i, image_pred in enumerate(prediction): 45 | #----------------------------------------------------------# 46 | # 对种类预测部分取max。 47 | # class_conf [num_anchors, 1] 种类置信度 48 | # class_pred [num_anchors, 1] 种类 49 | #----------------------------------------------------------# 50 | class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True) 51 | 52 | #----------------------------------------------------------# 53 | # 利用置信度进行第一轮筛选 54 | #----------------------------------------------------------# 55 | conf_mask = (image_pred[:, 4] * class_conf[:, 0] >= conf_thres).squeeze() 56 | 57 | #----------------------------------------------------------# 58 | # 根据置信度进行预测结果的筛选 59 | #----------------------------------------------------------# 60 | image_pred = image_pred[conf_mask] 61 | class_conf = class_conf[conf_mask] 62 | class_pred = class_pred[conf_mask] 63 | if not image_pred.size(0): 64 | continue 65 | #-------------------------------------------------------------------------# 66 | # detections [num_anchors, 7] 67 | # 7的内容为:x1, y1, x2, y2, obj_conf, class_conf, class_pred 68 | #-------------------------------------------------------------------------# 69 | detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1) 70 | 71 | #------------------------------------------# 72 | # 获得预测结果中包含的所有种类 73 | #------------------------------------------# 74 | unique_labels = detections[:, -1].cpu().unique() 75 | 76 | if prediction.is_cuda: 77 | unique_labels = unique_labels.cuda() 78 | detections = detections.cuda() 79 | 80 | for c in unique_labels: 81 | #------------------------------------------# 82 | # 获得某一类得分筛选后全部的预测结果 83 | #------------------------------------------# 84 | detections_class = detections[detections[:, -1] == c] 85 | 86 | # #------------------------------------------# 87 | # # 使用官方自带的非极大抑制会速度更快一些! 88 | # #------------------------------------------# 89 | # keep = nms( 90 | # detections_class[:, :4], 91 | # detections_class[:, 4] * detections_class[:, 5], 92 | # nms_thres 93 | # ) 94 | # max_detections = detections_class[keep] 95 | 96 | # 按照存在物体的置信度排序 97 | _, conf_sort_index = torch.sort(detections_class[:, 4]*detections_class[:, 5], descending=True) 98 | detections_class = detections_class[conf_sort_index] 99 | # 进行非极大抑制 100 | max_detections = [] 101 | while detections_class.size(0): 102 | # 取出这一类置信度最高的,一步一步往下判断,判断重合程度是否大于nms_thres,如果是则去除掉 103 | max_detections.append(detections_class[0].unsqueeze(0)) 104 | if len(detections_class) == 1: 105 | break 106 | ious = self.bbox_iou(max_detections[-1], detections_class[1:]) 107 | # 计算soft-nms新权重,将获得的iou取高斯指数后*原得分 108 | detections_class[1:, 4] = torch.exp(-(ious * ious) / sigma) * detections_class[1:, 4] 109 | detections_class = detections_class[1:] 110 | # 对新的得分进行重新排序 111 | detections_class = detections_class[detections_class[:, 4] >= conf_thres] 112 | arg_sort = torch.argsort(detections_class[:, 4], descending = True) 113 | detections_class = detections_class[arg_sort] 114 | # 堆叠 115 | max_detections = torch.cat(max_detections).data 116 | 117 | # Add max detections to outputs 118 | output[i] = max_detections if output[i] is None else torch.cat((output[i], max_detections)) 119 | 120 | if output[i] is not None: 121 | output[i] = output[i].cpu().numpy() 122 | box_xy, box_wh = (output[i][:, 0:2] + output[i][:, 2:4])/2, output[i][:, 2:4] - output[i][:, 0:2] 123 | output[i][:, :4] = self.yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image) 124 | 125 | return output 126 | 127 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | 4 | #---------------------------------------------------------# 5 | # 将图像转换成RGB图像,防止灰度图在预测时报错。 6 | # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB 7 | #---------------------------------------------------------# 8 | def cvtColor(image): 9 | if len(np.shape(image)) == 3 and np.shape(image)[2] == 3: 10 | return image 11 | else: 12 | image = image.convert('RGB') 13 | return image 14 | 15 | #---------------------------------------------------# 16 | # 对输入图像进行resize 17 | #---------------------------------------------------# 18 | def resize_image(image, size): 19 | w, h = size 20 | new_image = image.resize((w, h), Image.BICUBIC) 21 | return new_image 22 | 23 | #---------------------------------------------------# 24 | # 获得类 25 | #---------------------------------------------------# 26 | def get_classes(classes_path): 27 | with open(classes_path, encoding='utf-8') as f: 28 | class_names = f.readlines() 29 | class_names = [c.strip() for c in class_names] 30 | return class_names, len(class_names) 31 | 32 | #---------------------------------------------------# 33 | # 获得学习率 34 | #---------------------------------------------------# 35 | def get_lr(optimizer): 36 | for param_group in optimizer.param_groups: 37 | return param_group['lr'] 38 | 39 | def preprocess_input(image): 40 | image /= 255.0 41 | return image 42 | 43 | def show_config(**kwargs): 44 | print('Configurations:') 45 | print('-' * 70) 46 | print('|%25s | %40s|' % ('keys', 'values')) 47 | print('-' * 70) 48 | for key, value in kwargs.items(): 49 | print('|%25s | %40s|' % (str(key), str(value))) 50 | print('-' * 70) 51 | 52 | def get_new_img_size(height, width, img_min_side=600): 53 | if width <= height: 54 | f = float(img_min_side) / width 55 | resized_height = int(f * height) 56 | resized_width = int(img_min_side) 57 | else: 58 | f = float(img_min_side) / height 59 | resized_width = int(f * width) 60 | resized_height = int(img_min_side) 61 | 62 | return resized_height, resized_width 63 | -------------------------------------------------------------------------------- /utils/utils_bbox.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot as plt 2 | from numpy import * 3 | import numpy as np 4 | import torch 5 | from torch.nn import functional as F 6 | from torchvision.ops import nms 7 | 8 | 9 | #src_bbox先验框,loc建议框结果 10 | def loc2bbox(src_bbox, loc): 11 | if src_bbox.size()[0] == 0: 12 | return torch.zeros((0, 4), dtype=loc.dtype) 13 | 14 | #计算先验框的宽、高,中心坐标 15 | src_width = torch.unsqueeze(src_bbox[:, 2] - src_bbox[:, 0], -1) 16 | src_height = torch.unsqueeze(src_bbox[:, 3] - src_bbox[:, 1], -1) 17 | src_ctr_x = torch.unsqueeze(src_bbox[:, 0], -1) + 0.5 * src_width 18 | src_ctr_y = torch.unsqueeze(src_bbox[:, 1], -1) + 0.5 * src_height 19 | 20 | #对先验框进行大小、坐标调整参数 21 | #[:,0::4]:所有行中,列下标为0,1,2,。。。改变其二维表格中的值。 22 | dx = loc[:, 0::4] 23 | dy = loc[:, 1::4] 24 | dw = loc[:, 2::4] 25 | dh = loc[:, 3::4] 26 | 27 | #先验框调整过程 28 | ctr_x = dx * src_width + src_ctr_x 29 | ctr_y = dy * src_height + src_ctr_y 30 | w = torch.exp(dw) * src_width 31 | h = torch.exp(dh) * src_height 32 | 33 | dst_bbox = torch.zeros_like(loc) 34 | dst_bbox[:, 0::4] = ctr_x - 0.5 * w 35 | dst_bbox[:, 1::4] = ctr_y - 0.5 * h 36 | dst_bbox[:, 2::4] = ctr_x + 0.5 * w 37 | dst_bbox[:, 3::4] = ctr_y + 0.5 * h 38 | 39 | return dst_bbox 40 | 41 | class DecodeBox(): 42 | def __init__(self, std, num_classes): 43 | self.std = std 44 | self.num_classes = num_classes + 1 45 | 46 | def frcnn_correct_boxes(self, box_xy, box_wh, input_shape, image_shape): 47 | #-----------------------------------------------------------------# 48 | # 把y轴放前面是因为方便预测框和图像的宽高进行相乘 49 | #-----------------------------------------------------------------# 50 | box_yx = box_xy[..., ::-1] 51 | box_hw = box_wh[..., ::-1] 52 | input_shape = np.array(input_shape) 53 | image_shape = np.array(image_shape) 54 | 55 | box_mins = box_yx - (box_hw / 2.) 56 | box_maxes = box_yx + (box_hw / 2.) 57 | boxes = np.concatenate([box_mins[..., 0:1], box_mins[..., 1:2], box_maxes[..., 0:1], box_maxes[..., 1:2]], axis=-1) 58 | boxes *= np.concatenate([image_shape, image_shape], axis=-1) 59 | return boxes 60 | 61 | def forward(self, roi_cls_locs, roi_scores, rois, image_shape, input_shape, nms_iou = 0.3, confidence = 0.5): 62 | results = [] 63 | bs = len(roi_cls_locs) 64 | #--------------------------------# 65 | # batch_size, num_rois, 4 66 | #--------------------------------# 67 | rois = rois.view((bs, -1, 4)) 68 | #----------------------------------------------------------------------------------------------------------------# 69 | # 对每一张图片进行处理,由于在predict.py的时候,我们只输入一张图片,所以for i in range(len(mbox_loc))只进行一次 70 | #----------------------------------------------------------------------------------------------------------------# 71 | for i in range(bs): 72 | #----------------------------------------------------------# 73 | # 对回归参数进行reshape 74 | #----------------------------------------------------------# 75 | roi_cls_loc = roi_cls_locs[i] * self.std 76 | #----------------------------------------------------------# 77 | # 第一维度是建议框的数量,第二维度是每个种类 78 | # 第三维度是对应种类的调整参数 79 | #----------------------------------------------------------# 80 | roi_cls_loc = roi_cls_loc.view([-1, self.num_classes, 4]) 81 | 82 | #-------------------------------------------------------------# 83 | # 利用classifier网络的预测结果对建议框进行调整获得预测框 84 | # num_rois, 4 -> num_rois, 1, 4 -> num_rois, num_classes, 4 85 | #-------------------------------------------------------------# 86 | roi = rois[i].view((-1, 1, 4)).expand_as(roi_cls_loc) 87 | cls_bbox = loc2bbox(roi.contiguous().view((-1, 4)), roi_cls_loc.contiguous().view((-1, 4))) 88 | cls_bbox = cls_bbox.view([-1, (self.num_classes), 4]) 89 | #-------------------------------------------------------------# 90 | # 对预测框进行归一化,调整到0-1之间 91 | #-------------------------------------------------------------# 92 | cls_bbox[..., [0, 2]] = (cls_bbox[..., [0, 2]]) / input_shape[1] 93 | cls_bbox[..., [1, 3]] = (cls_bbox[..., [1, 3]]) / input_shape[0] 94 | 95 | roi_score = roi_scores[i] 96 | prob = F.softmax(roi_score, dim=-1) 97 | 98 | results.append([]) 99 | for c in range(1, self.num_classes): 100 | #--------------------------------# 101 | # 取出属于该类的所有框的置信度 102 | # 判断是否大于门限 103 | #--------------------------------# 104 | c_confs = prob[:, c] 105 | c_confs_m = c_confs > confidence 106 | 107 | if len(c_confs[c_confs_m]) > 0: 108 | #-----------------------------------------# 109 | # 取出得分高于confidence的框 110 | #-----------------------------------------# 111 | boxes_to_process = cls_bbox[c_confs_m, c] 112 | confs_to_process = c_confs[c_confs_m] 113 | 114 | keep = nms( 115 | boxes_to_process, 116 | confs_to_process, 117 | nms_iou 118 | ) 119 | #-----------------------------------------# 120 | # 取出在非极大抑制中效果较好的内容 121 | #-----------------------------------------# 122 | good_boxes = boxes_to_process[keep] 123 | confs = confs_to_process[keep][:, None] 124 | labels = (c - 1) * torch.ones((len(keep), 1)).cuda() if confs.is_cuda else (c - 1) * torch.ones((len(keep), 1)) 125 | #-----------------------------------------# 126 | # 将label、置信度、框的位置进行堆叠。 127 | #-----------------------------------------# 128 | c_pred = torch.cat((good_boxes, confs, labels), dim=1).cpu().numpy() 129 | # 添加进result里 130 | results[-1].extend(c_pred) 131 | 132 | if len(results[-1]) > 0: 133 | results[-1] = np.array(results[-1]) 134 | box_xy, box_wh = (results[-1][:, 0:2] + results[-1][:, 2:4])/2, results[-1][:, 2:4] - results[-1][:, 0:2] 135 | results[-1][:, :4] = self.frcnn_correct_boxes(box_xy, box_wh, input_shape, image_shape) 136 | 137 | return results 138 | 139 | 140 | -------------------------------------------------------------------------------- /utils/utils_fit.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | from tqdm import tqdm 5 | 6 | from utils.utils import get_lr 7 | 8 | 9 | def fit_one_epoch(model, train_util, loss_history, eval_callback, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val, Epoch, cuda, fp16, scaler, save_period, save_dir): 10 | total_loss = 0 11 | rpn_loc_loss = 0 12 | rpn_cls_loss = 0 13 | roi_loc_loss = 0 14 | roi_cls_loss = 0 15 | 16 | val_loss = 0 17 | print('Start Train') 18 | with tqdm(total=epoch_step,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar: 19 | for iteration, batch in enumerate(gen): 20 | if iteration >= epoch_step: 21 | break 22 | images, boxes, labels = batch[0], batch[1], batch[2] 23 | with torch.no_grad(): 24 | if cuda: 25 | images = images.cuda() #shape=[2,3,600.600] 26 | 27 | rpn_loc, rpn_cls, roi_loc, roi_cls, total = train_util.train_step(images, boxes, labels, 1, fp16, scaler) 28 | total_loss += total.item() 29 | rpn_loc_loss += rpn_loc.item() 30 | rpn_cls_loss += rpn_cls.item() 31 | roi_loc_loss += roi_loc.item() 32 | roi_cls_loss += roi_cls.item() 33 | 34 | pbar.set_postfix(**{'total_loss' : total_loss / (iteration + 1), 35 | 'rpn_loc' : rpn_loc_loss / (iteration + 1), 36 | 'rpn_cls' : rpn_cls_loss / (iteration + 1), 37 | 'roi_loc' : roi_loc_loss / (iteration + 1), 38 | 'roi_cls' : roi_cls_loss / (iteration + 1), 39 | 'lr' : get_lr(optimizer)}) 40 | pbar.update(1) 41 | 42 | print('Finish Train') 43 | print('Start Validation') 44 | with tqdm(total=epoch_step_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar: 45 | for iteration, batch in enumerate(gen_val): 46 | if iteration >= epoch_step_val: 47 | break 48 | images, boxes, labels = batch[0], batch[1], batch[2] 49 | with torch.no_grad(): 50 | if cuda: 51 | images = images.cuda() 52 | 53 | train_util.optimizer.zero_grad() 54 | _, _, _, _, val_total = train_util.forward(images, boxes, labels, 1) 55 | val_loss += val_total.item() 56 | 57 | pbar.set_postfix(**{'val_loss' : val_loss / (iteration + 1)}) 58 | pbar.update(1) 59 | 60 | print('Finish Validation') 61 | loss_history.append_loss(epoch + 1, total_loss / epoch_step, val_loss / epoch_step_val) 62 | eval_callback.on_epoch_end(epoch + 1) 63 | print('Epoch:'+ str(epoch + 1) + '/' + str(Epoch)) 64 | print('Total Loss: %.3f || Val Loss: %.3f ' % (total_loss / epoch_step, val_loss / epoch_step_val)) 65 | 66 | #-----------------------------------------------# 67 | # 保存权值 68 | #-----------------------------------------------# 69 | if (epoch + 1) % save_period == 0 or epoch + 1 == Epoch: 70 | torch.save(model.state_dict(), os.path.join(save_dir, 'ep%03d-loss%.3f-val_loss%.3f.pth' % (epoch + 1, total_loss / epoch_step, val_loss / epoch_step_val))) 71 | 72 | if len(loss_history.val_loss) <= 1 or (val_loss / epoch_step_val) <= min(loss_history.val_loss): 73 | print('Save best model to best_epoch_weights.pth') 74 | torch.save(model.state_dict(), os.path.join(save_dir, "best_epoch_weights.pth")) 75 | 76 | torch.save(model.state_dict(), os.path.join(save_dir, "last_epoch_weights.pth")) 77 | 78 | -------------------------------------------------------------------------------- /voc_annotation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import xml.etree.ElementTree as ET 4 | 5 | import numpy as np 6 | 7 | from utils.utils import get_classes 8 | 9 | #--------------------------------------------------------------------------------------------------------------------------------# 10 | # annotation_mode用于指定该文件运行时计算的内容 11 | # annotation_mode为0代表整个标签处理过程,包括获得VOCdevkit/VOC2007/ImageSets里面的txt以及训练用的2007_train.txt、2007_val.txt 12 | # annotation_mode为1代表获得VOCdevkit/VOC2007/ImageSets里面的txt 13 | # annotation_mode为2代表获得训练用的2007_train.txt、2007_val.txt 14 | #--------------------------------------------------------------------------------------------------------------------------------# 15 | annotation_mode = 0 16 | #-------------------------------------------------------------------# 17 | # 必须要修改,用于生成2007_train.txt、2007_val.txt的目标信息 18 | # 与训练和预测所用的classes_path一致即可 19 | # 如果生成的2007_train.txt里面没有目标信息 20 | # 那么就是因为classes没有设定正确 21 | # 仅在annotation_mode为0和2的时候有效 22 | #-------------------------------------------------------------------# 23 | classes_path = r'F:\Desktop\PCB_code\PCB_DataSet\cls_classes.txt' 24 | #--------------------------------------------------------------------------------------------------------------------------------# 25 | # trainval_percent用于指定(训练集+验证集)与测试集的比例,默认情况下 (训练集+验证集):测试集 = 9:1 26 | # train_percent用于指定(训练集+验证集)中训练集与验证集的比例,默认情况下 训练集:验证集 = 9:1 27 | # 仅在annotation_mode为0和1的时候有效 28 | #--------------------------------------------------------------------------------------------------------------------------------# 29 | trainval_percent = 0.9 30 | train_percent = 0.9 31 | #-------------------------------------------------------# 32 | # 指向VOC数据集所在的文件夹 33 | # 默认指向根目录下的VOC数据集 34 | #-------------------------------------------------------# 35 | PCB_DataSet_path=r'PCB_DataSet' 36 | 37 | PCB_Data_Sets=['trainval','test'] 38 | classes, _ = get_classes(classes_path) 39 | 40 | #-------------------------------------------------------# 41 | # 统计目标数量 42 | #-------------------------------------------------------# 43 | photo_nums = np.zeros(len(PCB_Data_Sets)) 44 | nums = np.zeros(len(classes)) 45 | def convert_annotation(image_id, list_file): 46 | in_file = open(os.path.join(PCB_DataSet_path, 'Annotations/%s.xml'%(image_id)), encoding='utf-8') 47 | tree=ET.parse(in_file) 48 | root = tree.getroot() 49 | 50 | for obj in root.iter('object'): 51 | difficult = 0 52 | if obj.find('difficult')!=None: 53 | difficult = obj.find('difficult').text 54 | cls = obj.find('name').text 55 | if cls not in classes or int(difficult)==1: 56 | continue 57 | cls_id = classes.index(cls) 58 | xmlbox = obj.find('bndbox') 59 | b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text))) 60 | list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id)) 61 | 62 | nums[classes.index(cls)] = nums[classes.index(cls)] + 1 63 | 64 | if __name__ == "__main__": 65 | random.seed(0) 66 | if " " in os.path.abspath(PCB_DataSet_path): 67 | raise ValueError("数据集存放的文件夹路径与图片名称中不可以存在空格,否则会影响正常的模型训练,请注意修改。") 68 | 69 | if annotation_mode == 0 or annotation_mode == 1: 70 | print("Generate txt in ImageSets.") 71 | xmlfilepath = os.path.join(PCB_DataSet_path, 'Annotations') 72 | saveBasePath = os.path.join(PCB_DataSet_path, 'ImageSets') 73 | temp_xml = os.listdir(xmlfilepath) 74 | total_xml = [] 75 | for xml in temp_xml: 76 | if xml.endswith(".xml"): 77 | total_xml.append(xml) 78 | 79 | num = len(total_xml) 80 | list = range(num) 81 | tv = int(num*trainval_percent) 82 | tr = int(tv*train_percent) 83 | trainval= random.sample(list,tv) 84 | train = random.sample(trainval,tr) 85 | 86 | print("train and val size",tv) 87 | print("train size",tr) 88 | ftrainval = open(os.path.join(saveBasePath,'trainval.txt'), 'w') 89 | ftest = open(os.path.join(saveBasePath,'test.txt'), 'w') 90 | ftrain = open(os.path.join(saveBasePath,'train.txt'), 'w') 91 | fval = open(os.path.join(saveBasePath,'val.txt'), 'w') 92 | 93 | for i in list: 94 | name=total_xml[i][:-4]+'\n' 95 | if i in trainval: 96 | ftrainval.write(name) 97 | if i in train: 98 | ftrain.write(name) 99 | else: 100 | fval.write(name) 101 | else: 102 | ftest.write(name) 103 | 104 | ftrainval.close() 105 | ftrain.close() 106 | fval.close() 107 | ftest.close() 108 | print("Generate txt in ImageSets done.") 109 | 110 | if annotation_mode == 0 or annotation_mode == 2: 111 | print("Generate PCB_train.txt and PCB_val.txt for train.") 112 | type_index = 0 113 | for image_set in PCB_Data_Sets: 114 | image_ids = open(os.path.join(PCB_DataSet_path, 'ImageSets/%s.txt'%(image_set)), encoding='utf-8').read().strip().split() 115 | list_file = open(os.path.join(PCB_DataSet_path,'%s.txt'%( image_set)), 'w', encoding='utf-8')#保存训练集和测试集 116 | for image_id in image_ids: 117 | list_file.write('%s/JPEGImages/%s.jpg'%(os.path.abspath(PCB_DataSet_path),image_id))#在训练集和测试集中写入图片路径信息 118 | 119 | convert_annotation(image_id, list_file) 120 | list_file.write('\n') 121 | photo_nums[type_index] = len(image_ids) 122 | type_index += 1 123 | list_file.close() 124 | print("Generate PCB_train.txt and PCB_val.txt for train done.") 125 | 126 | def printTable(List1, List2): 127 | for i in range(len(List1[0])): 128 | print("|", end=' ') 129 | for j in range(len(List1)): 130 | print(List1[j][i].rjust(int(List2[j])), end=' ') 131 | print("|", end=' ') 132 | print() 133 | 134 | str_nums = [str(int(x)) for x in nums] 135 | tableData = [ 136 | classes, str_nums 137 | ] 138 | colWidths = [0]*len(tableData) 139 | len1 = 0 140 | for i in range(len(tableData)): 141 | for j in range(len(tableData[i])): 142 | if len(tableData[i][j]) > colWidths[i]: 143 | colWidths[i] = len(tableData[i][j]) 144 | printTable(tableData, colWidths) 145 | 146 | if photo_nums[0] <= 500: 147 | print("训练集数量小于500,属于较小的数据量,请注意设置较大的训练世代(Epoch)以满足足够的梯度下降次数(Step)。") 148 | 149 | if np.sum(nums) == 0: 150 | print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!") 151 | print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!") 152 | print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!") 153 | print("(重要的事情说三遍)。") 154 | --------------------------------------------------------------------------------