├── LICENSE ├── README.md ├── dataset_prepare ├── augument_with_label.py ├── datasets_prepare.py ├── rename_txt.py ├── rotate_with_label.py ├── synthetic_fog.py └── use.md ├── label_format_conversion ├── coco_split_trainVal.py ├── coco_visulize.py ├── generate_persudo_json.py ├── make_voc.py ├── readme.md ├── voc_split_trainVal.py ├── voc_to_coco_v1.py ├── voc_to_coco_v2.py ├── voc_to_yoloV3.py └── voc_to_yoloV5.py ├── models ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── common.cpython-310.pyc │ ├── experimental.cpython-310.pyc │ └── yolo.cpython-310.pyc ├── common.py ├── experimental.py ├── hub │ ├── anchors.yaml │ ├── yolov3-spp.yaml │ ├── yolov3-tiny.yaml │ ├── yolov3.yaml │ ├── yolov5-bifpn.yaml │ ├── yolov5-fpn.yaml │ ├── yolov5-p2.yaml │ ├── yolov5-p34.yaml │ ├── yolov5-p6.yaml │ ├── yolov5-p7.yaml │ ├── yolov5-panet.yaml │ ├── yolov5l6.yaml │ ├── yolov5m6.yaml │ ├── yolov5n6.yaml │ ├── yolov5s-LeakyReLU.yaml │ ├── yolov5s-ghost.yaml │ ├── yolov5s-transformer.yaml │ ├── yolov5s6.yaml │ └── yolov5x6.yaml ├── readme.md ├── segment │ ├── yolov5l-seg.yaml │ ├── yolov5m-seg.yaml │ ├── yolov5n-seg.yaml │ ├── yolov5s-seg.yaml │ └── yolov5x-seg.yaml ├── tf.py ├── yolo.py ├── yolov5_ghost_attention.yaml ├── yolov5l.yaml ├── yolov5l_2.yaml ├── yolov5m.yaml ├── yolov5m_2.yaml ├── yolov5n.yaml ├── yolov5s-ghost_dw.yaml ├── yolov5s-transformer_dw.yaml ├── yolov5s.yaml ├── yolov5s_dw_se.yaml ├── yolov5s_dw_se_c3ghost.yaml ├── yolov5s_dw_se_c3spp_c3ghost.yaml ├── yolov5s_dw_spp.yaml ├── yolov5s_dw_x.yaml ├── yolov5s_raw.yaml ├── yolov5x.yaml └── yolov5x_raw.yaml └── yolov5_gradcam.py /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2024, lcd955 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Insulator_defect-nest_detection 2 | 基于YOLOv5网络的输配电线路故障检测模型研究,本项目基于yolov5构建而成 3 | ## 1、对于绝缘子缺陷检测 4 | 5 | 各个网络模型已经存放于models文件夹 6 | 7 | ## 2、标注格式转换方法 8 | 9 | 请看label_format_coversion文件夹。 10 | 11 | ## 3、对网上公开数据集的整合,预处理方法,包括图片加雾,添加噪声,随机裁剪方法 12 | 13 | 数据集预处理方法可以看dataset_prepare文件夹 14 | 15 | ## 4、开源数据集如下所示: 16 | 飞桨网址:https://aistudio.baidu.com/datasetdetail/270697/0 17 | 18 | ## 5、可视化特征图方法 19 | 20 | 参考yolov5_gradcam.py函数 21 | 22 | -------------------------------------------------------------------------------- /dataset_prepare/augument_with_label.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import glob 4 | import numpy as np 5 | import imgaug as ia 6 | import imgaug.augmenters as iaa 7 | from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage 8 | from tqdm import tqdm 9 | 10 | def load_yolo_boxes(filename, shape): 11 | with open(filename) as f: 12 | lines = f.readlines() 13 | 14 | boxes = [] 15 | for line in lines: 16 | class_id, x_center, y_center, width, height = map(float, line.split()) 17 | x1 = (x_center - width / 2) * shape[1] 18 | y1 = (y_center - height / 2) * shape[0] 19 | x2 = (x_center + width / 2) * shape[1] 20 | y2 = (y_center + height / 2) * shape[0] 21 | 22 | boxes.append(BoundingBox(x1=x1, y1=y1, x2=x2, y2=y2, label=class_id)) 23 | 24 | return BoundingBoxesOnImage(boxes, shape=shape) 25 | 26 | def save_yolo_boxes(bbs, filename, shape): 27 | with open(filename, 'w') as f: 28 | for bb in bbs.bounding_boxes: 29 | x_center = (bb.x1 + bb.x2) / 2 / shape[1] 30 | y_center = (bb.y1 + bb.y2) / 2 / shape[0] 31 | width = (bb.x2 - bb.x1) / shape[1] 32 | height = (bb.y2 - bb.y1) / shape[0] 33 | 34 | f.write(f'{int(bb.label)} {x_center} {y_center} {width} {height}\n') 35 | 36 | # source_dir = 'path/to/source' 37 | # output_dir = 'path/to/output' 38 | source_dir = r'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\datasets\testdata' 39 | 40 | output_dir = r'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\datasets\testdata\1' 41 | 42 | images = glob.glob(os.path.join(source_dir, '*.jpg')) 43 | 44 | seq = iaa.Sequential([ 45 | iaa.Fliplr(0.5), # horizontal flips 46 | iaa.Crop(percent=(0, 0.1)), # random crops 47 | # 尽管小概率,但是务必组合一些其他的扩充方式,这样可以确保标注的持久性 48 | iaa.Sometimes(0.5, iaa.GaussianBlur(sigma=(0, 0.5))), 49 | iaa.Sometimes(0.7, iaa.Affine(scale={"x": (0.8, 1.2), "y": (0.8, 1.2)})), 50 | iaa.Sometimes(0.9,iaa.Affine(translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)})), 51 | ]) 52 | 53 | for image_path in tqdm(images): 54 | image = cv2.imread(image_path) 55 | bbs = load_yolo_boxes(image_path.replace('.jpg', '.txt'), image.shape) 56 | 57 | image_aug, bbs_aug = seq(image=image, bounding_boxes=bbs) 58 | 59 | cv2.imwrite(os.path.join(output_dir, 'augument_random' + os.path.basename(image_path)), image_aug) 60 | save_yolo_boxes(bbs_aug, os.path.join(output_dir, 'augument_random' + os.path.basename(image_path).replace('.jpg', '.txt')), image_aug.shape) 61 | 62 | print('Done') -------------------------------------------------------------------------------- /dataset_prepare/datasets_prepare.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import numpy as np 4 | import re 5 | import random 6 | 7 | #设置随机种子,保证每次分割的数据集是确定的 8 | random.seed(0) 9 | np.random.seed(0) 10 | os.environ['PYTHONHASHSEED'] = '0' 11 | 12 | # 原始数据集的文件夹路径 13 | img_dir ="/root/autodl-tmp/merged_insulator_data_new/img" #"./data/images" 14 | label_dir ='/root/autodl-tmp/merged_insulator_data_new/labels' #"./data/labels" 15 | 16 | 17 | # 目标文件夹路径 18 | dataset_dir ="/root/autodl-tmp/datasets_0512" #"./dataset" 19 | img_train_dir = os.path.join(dataset_dir, "images/train") 20 | img_val_dir = os.path.join(dataset_dir, "images/val") 21 | label_train_dir = os.path.join(dataset_dir, "labels/train") 22 | label_val_dir = os.path.join(dataset_dir, "labels/val") 23 | 24 | # 创建需要的文件夹 25 | os.makedirs(img_train_dir, exist_ok=True) 26 | os.makedirs(img_val_dir, exist_ok=True) 27 | os.makedirs(label_train_dir, exist_ok=True) 28 | os.makedirs(label_val_dir, exist_ok=True) 29 | 30 | # 获取所有图像文件 31 | img_files = [f for f in os.listdir(img_dir) if os.path.isfile(os.path.join(img_dir, f))] 32 | 33 | # 随机洗牌 34 | np.random.shuffle(img_files) 35 | 36 | # 计算训练集和验证集的划分点 37 | split_idx = int(len(img_files) * 0.7) 38 | 39 | # 将文件分割为训练集和验证集 40 | train_files = img_files[:split_idx] 41 | val_files = img_files[split_idx:] 42 | 43 | #匹配模块,确保数据集里如果有.jpg,.JPG,JPEG,jpeg也能有良好的表现 44 | for f in train_files: 45 | shutil.copy(os.path.join(img_dir, f), os.path.join(img_train_dir, f)) 46 | shutil.copy(os.path.join(label_dir, re.sub(r'.[jJ][pP][gG]|.[jJ][pP][eE][gG]$', '.txt', f)), os.path.join(label_train_dir, re.sub(r'.[jJ][pP][gG]|.[jJ][pP][eE][gG]$', '.txt', f))) 47 | 48 | for f in val_files: 49 | shutil.copy(os.path.join(img_dir, f), os.path.join(img_val_dir, f)) 50 | shutil.copy(os.path.join(label_dir, re.sub(r'.[jJ][pP][gG]|.[jJ][pP][eE][gG]$', '.txt', f)), os.path.join(label_val_dir, re.sub(r'.[jJ][pP][gG]|.[jJ][pP][eE][gG]$', '.txt', f))) 51 | -------------------------------------------------------------------------------- /dataset_prepare/rename_txt.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | 4 | # 在当前目录下获取所有txt文件 5 | # filepath=[r"D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\datasets\foggy_labels/"] 6 | 7 | folder1 = r"D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\datasets\foggy_labels/" # 需要修改名称的txt文件的目录 8 | folder2 = r'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\datasets\foggy_img/' # 参考的jpg文件的目录 9 | 10 | # 获取所有.txt和.jpg文件 11 | txt_files = sorted(glob.glob(os.path.join(folder1, "*.txt"))) 12 | jpg_files = sorted(glob.glob(os.path.join(folder2, "*.jpg"))) 13 | 14 | # 检查两个文件夹中的文件数量是否相同 15 | if len(txt_files) != len(jpg_files): 16 | print("文件数量不匹配!") 17 | else: 18 | # 遍历每个txt文件,根据jpg文件进行重命名 19 | for txt_file, jpg_file in zip(txt_files, jpg_files): 20 | # 提取jpg文件的基础文件名(没有扩展名) 21 | base_name = os.path.splitext(os.path.basename(jpg_file))[0] 22 | # 创建新的txt文件名 23 | new_name = "{}.txt".format(base_name) 24 | new_name_path = os.path.join(folder1, new_name) 25 | # 重命名txt文件 26 | os.rename(txt_file, new_name_path) -------------------------------------------------------------------------------- /dataset_prepare/rotate_with_label.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import math 4 | import os 5 | from tqdm import tqdm 6 | # 7 | # # 图像和标签框一起旋转 8 | # def rotate_image_and_boxes(img, boxes, angle, scale=1.): 9 | # w, h = img.shape[1], img.shape[0] 10 | # cx, cy = w // 2, h // 2 11 | # 12 | # M = cv2.getRotationMatrix2D((cx, cy), angle, scale) 13 | # rotated_img = cv2.warpAffine(img, M, (w, h)) 14 | # 15 | # rotated_boxes = [] 16 | # for box in boxes: 17 | # label,x, y, w, h = box 18 | # corners = np.array([ 19 | # [x-w/2, y-h/2], 20 | # [x-w/2, y+h/2], 21 | # [x+w/2, y-h/2], 22 | # [x+w/2, y+h/2] 23 | # ]) 24 | # 25 | # corners = np.hstack((corners, np.ones((4, 1)))) 26 | # corners = np.dot(M, corners.T).T 27 | # x_min, y_min = corners.min(axis=0)[:2] 28 | # x_max, y_max = corners.max(axis=0)[:2] 29 | # 30 | # rotated_boxes.append([label,x_min + (x_max - x_min) / 2, y_min + (y_max - y_min) / 2, x_max - x_min, y_max - y_min]) 31 | # 32 | # return rotated_img, rotated_boxes 33 | # 34 | # # 读取标记文件 35 | # def read_annotation_file(file_path): 36 | # boxes = [] 37 | # with open(file_path, 'r') as file: 38 | # lines = file.readlines() 39 | # for line in lines: 40 | # items = line.strip().split(" ") 41 | # class_id = int(items[0]) 42 | # x, y, w, h = map(float, items[1:]) 43 | # boxes.append([class_id, x, y, w, h]) 44 | # return boxes 45 | # 46 | # img = cv2.imread(r'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\Defective_Insulators\images\001.jpg') 47 | # boxes = read_annotation_file(r'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\Defective_Insulators\labels\worktxt\001.txt') 48 | # 49 | # rotated_img, rotated_boxes = rotate_image_and_boxes(img, boxes, angle=30) 50 | # 51 | # cv2.imwrite(r'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\Defective_Insulators\labels\rotated_image.jpg', rotated_img) 52 | # with open(r'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\Defective_Insulators\labels\rotated_image.txt', 'w') as file: 53 | # for box in rotated_boxes: 54 | # file.write(" ".join(map(str, box)) + "\n") 55 | 56 | def rotate_image_and_boxes(image, boxes): 57 | (h, w) = image.shape[:2] 58 | center = (w / 2, h / 2) 59 | 60 | M = cv2.getRotationMatrix2D(center, 30, 1.0) 61 | rotated = cv2.warpAffine(image, M, (w, h)) 62 | 63 | # 转换成点并应用旋转和转换反馈到bounding box的格式 64 | new_boxes = [] 65 | for box in boxes: 66 | points = np.int0(cv2.transform(np.array([[ 67 | [box[0], box[1]], 68 | [box[0] + box[2], box[1]], 69 | [box[0] + box[2], box[1] + box[3]], 70 | [box[0], box[1] + box[3]] 71 | ]]), M)) 72 | new_box = cv2.boundingRect(points) 73 | new_boxes.append(new_box) 74 | 75 | return rotated, new_boxes 76 | 77 | for image_file in tqdm(os.listdir(r'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\Defective_Insulators\images/')): 78 | image = cv2.imread(fr'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\Defective_Insulators/images/{image_file}') 79 | base_name = os.path.splitext(image_file)[0] 80 | box_file = fr'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\Defective_Insulators\worktxt/{base_name}.txt' 81 | with open(box_file, 'r') as f: 82 | boxes = [] 83 | for line in f: 84 | elements = line.strip().split() 85 | x_center, y_center, box_w, box_h = map(float, elements[1:]) 86 | x1 = (x_center - box_w / 2) * image.shape[1] 87 | y1 = (y_center - box_h / 2) * image.shape[0] 88 | x2 = x1 + box_w * image.shape[1] 89 | y2 = y1 + box_h * image.shape[0] 90 | boxes.append([x1, y1, x2-x1, y2-y1]) 91 | rotated_image, new_boxes = rotate_image_and_boxes(image, boxes) 92 | # 保存旋转后的图像 93 | img_path=fr'D:\desk\yolov5\dataset_raw\rotated_images_30/{base_name}_rotated_30.jpg' 94 | base_img_path = os.path.dirname(img_path) 95 | if not os.path.exists(base_img_path): 96 | os.makedirs(base_img_path) # 如果不存在,创建路径 97 | cv2.imwrite(img_path, rotated_image) 98 | # 保存旋转后的框,您可能会想要将这些框转换回YOLO格式 99 | out_file = fr'D:\desk\yolov5\dataset_raw\rotated_boxes_30/{base_name}_rotated_30.txt' 100 | 101 | base_out_path= os.path.dirname(out_file) 102 | 103 | if not os.path.exists(base_out_path): 104 | os.makedirs(base_out_path) # 如果不存在,创建路径 105 | with open(out_file, 'w') as f: 106 | for box in new_boxes: 107 | x_center = (box[0] + box[2] / 2) / rotated_image.shape[1] 108 | y_center = (box[1] + box[3] / 2) / rotated_image.shape[0] 109 | box_w = box[2] / rotated_image.shape[1] 110 | box_h = box[3] / rotated_image.shape[0] 111 | # 写入类别标签,这里假设类别不变仍为0 112 | f.write(f'0 {x_center} {y_center} {box_w} {box_h}\n') -------------------------------------------------------------------------------- /dataset_prepare/synthetic_fog.py: -------------------------------------------------------------------------------- 1 | """ 2 | 直接运行程序可以测试合成雾气效果 3 | Produced by: zhangzhengde@sjtu.edu.cn 4 | """ 5 | import os, sys 6 | from pathlib import Path 7 | import argparse 8 | import math 9 | import cv2 10 | import copy 11 | import time 12 | from pathlib import Path 13 | import numpy as np 14 | import matplotlib.pyplot as plt 15 | 16 | pydir = Path(os.path.abspath(__file__)).parent 17 | if f'{pydir.parent}' not in sys.path: 18 | sys.path.insert(0, f'{pydir.parent}') 19 | os.chdir(f'{pydir.parent}') 20 | 21 | 22 | class SyntheticFog(object): 23 | def __init__(self): 24 | pass 25 | 26 | def __call__(self,speed_up, img_path , out_path): 27 | img_path = img_path 28 | # img_path = '../sources/IMG_6685.JPG' 29 | assert os.path.exists(img_path), f'error: img does not exists, {img_path}' 30 | img = copy.copy(cv2.imread(img_path)) 31 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 32 | print(img.shape) 33 | img = img/255.0 34 | print(f'fogging...') 35 | t0 = time.time() 36 | br = 0.7 37 | th = 0.05 38 | fogged_img = self.fogging_img( 39 | img, brightness=br, thickness=th, 40 | high_efficiency = speed_up) 41 | print(f'fogging time: {(time.time()-t0)*1000:.4f}ms') 42 | rf = 1 # resize factor 43 | img = cv2.resize(img, (int(img.shape[1]*rf), int(img.shape[0]*rf))) 44 | fogged_img = cv2.resize(fogged_img, ((int(fogged_img.shape[1]*rf)), (int(fogged_img.shape[0]*rf)))) 45 | fogged_img = np.array(fogged_img*255, dtype=np.uint8) 46 | # cv2.imshow('src', img) 47 | # cv2.imshow('fogged', fogged_img) 48 | # cv2.waitKey(0) 49 | save = True if out_path else False 50 | if save: 51 | cv2.imwrite(out_path+f'{Path(img_path).stem}_br{br}_th{th}.jpg', fogged_img) 52 | else: 53 | h, w, c = img.shape 54 | fig, ax = plt.subplots(1, 2, figsize=(w/100, h/100)) 55 | ax[0].imshow(img) 56 | ax[1].imshow(fogged_img) 57 | plt.show() 58 | 59 | cv2.imshow('src', img) 60 | cv2.imshow('fogged', fogged_img) 61 | cv2.waitKey(0) 62 | 63 | 64 | def fogging_img(self, img, brightness=0.7, thickness=0.06, high_efficiency = False): 65 | """ 66 | fogging image 67 | :param img: src img 68 | :param brightness: brightness 69 | :param thickness: fog thickness, without fog when 0, max 0.1, 70 | :param high_efficiency: use matrix to improve fogging speed when high_efficiency is True, else use loops 71 | low efficiency: about 4000ms, high efficiency: about 80ms, tested in (864, 1152, 3) img 72 | :return: fogged image 73 | """ 74 | assert 0 <= brightness <= 1 75 | assert 0 <= thickness <= 0.1 76 | fogged_img = img.copy() 77 | h, w, c = fogged_img.shape 78 | if not high_efficiency: # use default loop to fogging, low efficiency 79 | size = np.sqrt(np.max(fogged_img.shape[:2])) # 雾化尺寸 80 | center = (h // 2, w // 2) # 雾化中心 81 | # print(f'shape: {img.shape} center: {center} size: {size}') # 33 82 | # d_list = [] 83 | for j in range(h): # 84 | for l in range(w): 85 | d = -0.04 * math.sqrt((j - center[0]) ** 2 + (l - center[1]) ** 2) + size 86 | # print(f'd {d}') 87 | td = math.exp(-thickness * d) 88 | # d_list.append(td) 89 | fogged_img[j][l][:] = fogged_img[j][l][:] * td + brightness * (1 - td) 90 | # x = np.arange(len(d_list)) 91 | # plt.plot(x, d_list, 'o') 92 | # if j == 5: 93 | # break 94 | else: # use matrix # TODO: 直接使用像素坐标,距离参数不适用于大分辨率图像,会变成鱼眼镜头的样子. done. 95 | use_pixel = True 96 | size = np.sqrt(np.max(fogged_img.shape[:2])) if use_pixel else 1 # 雾化尺寸,sqrt(w), (w, h, 3) 97 | h, w, c = fogged_img.shape 98 | hc, wc = h // 2, w // 2 99 | mask = self.get_mask(h=h, w=w, hc=hc, wc=wc, pixel=use_pixel) # (h, w, 2) # O(max(w, h)) 100 | d = -0.04 * np.linalg.norm(mask, axis=2) + size # (h, w, 2) -> (h, w), O(h*w) 101 | 102 | td = np.exp(-thickness * d) 103 | 104 | for cc in range(c): 105 | fogged_img[..., cc] = fogged_img[..., cc] * td + brightness*(1-td) 106 | 107 | # a = np.linalg.norm(mask, axis=2) 108 | # print(f'size: {fogged_img.shape} a: {a} max: {np.max(fogged_img)} {np.min(fogged_img)}') 109 | 110 | fogged_img = np.clip(fogged_img, 0, 1) # 解决黑白噪点的问题 111 | # print(f'mask: {mask[:, :, 1]} {mask.shape}') 112 | # print(f'd: {d} {d.shape}') 113 | 114 | return fogged_img 115 | 116 | def get_mask(self, h, w, hc, wc, pixel=True): 117 | mask = np.zeros((h, w, 2), dtype=np.float32) 118 | if pixel: 119 | mask[:, :, 0] = np.repeat(np.arange(h).reshape((h, 1)), w, axis=1) - hc # loop o(h) 120 | mask[:, :, 1] = np.repeat(np.arange(w).reshape((1, w)), h, axis=0) - wc # loop o(w) 121 | else: 122 | mask[:, :, 0] = np.repeat(np.linspace(0, 1, h).reshape(h, 1), w, axis=1) - 0.5 123 | mask[:, :, 1] = np.repeat(np.linspace(0, 1, w).reshape((1, w)), h, axis=0) - 0.5 124 | return mask 125 | 126 | 127 | # if __name__ == '__main__': 128 | # parser = argparse.ArgumentParser(prog='synthetic_fog.py') 129 | # parser.add_argument('--speed_up', action='store_true', default=False, help='matrix optimization') 130 | # parser.add_argument('--source', type=str, default= 'data/SFID_demo/images/train/001040.jpg', help='source img path') 131 | # parser.add_argument('--save-dir', type=str, default=None, help='output img path') 132 | # opt = parser.parse_args() 133 | # print(opt) 134 | # synf = SyntheticFog() 135 | # synf(opt.speed_up,opt.source, opt.save_dir) 136 | 137 | 138 | if __name__ == '__main__': 139 | parser = argparse.ArgumentParser(prog='synthetic_fog.py') 140 | parser.add_argument('--speed_up', action='store_true', default=True, help='matrix optimization') 141 | parser.add_argument('--source', type=str, default= r'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\datasets\images/', help='source img path') 142 | parser.add_argument('--save-dir', type=str, default=r"D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\datasets\foggy_img/", help='output img path') 143 | opt = parser.parse_args() 144 | print(opt) 145 | synf = SyntheticFog() 146 | 147 | # Check if source is a directory 148 | if os.path.isdir(opt.source): 149 | # Iterate over every image in the source directory 150 | for img_file in os.listdir(opt.source): 151 | img_path = os.path.join(opt.source, img_file) 152 | 153 | # Check if it's a file 154 | if os.path.isfile(img_path): 155 | # Apply fog to the image 156 | synf(opt.speed_up, img_path, opt.save_dir) 157 | else: 158 | print("Provided source is not a valid directory!") 159 | 160 | 161 | -------------------------------------------------------------------------------- /dataset_prepare/use.md: -------------------------------------------------------------------------------- 1 | # 数据集准备相关代码文件 2 | 本文件夹主要是制作电网线路绝缘子与鸟巢检测的数据集所用到的相关代码,包括雾化,yolo格式的带标注框旋转,随机裁剪,划分数据集 3 | -------------------------------------------------------------------------------- /label_format_conversion/coco_split_trainVal.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import shutil 4 | json_dir="demo/coco/annotations/annotations.json" 5 | with open(json_dir) as f: 6 | json_file = json.load(f) 7 | print('所有图片的数量:', len(json_file['images'])) 8 | print('所有标注的数量:', len(json_file['annotations'])) 9 | 10 | 11 | def get_key(images, image_id): 12 | for image in images: 13 | if image["id"] == image_id: # 根据anno的id反推图像的名称 14 | return image["file_name"] 15 | 16 | background=[] 17 | obj=[] 18 | # read box info for csv format 19 | annotations = json_file['annotations'] 20 | images = json_file['images'] 21 | 22 | all_images=[] 23 | for image in images: 24 | all_images.append(image["file_name"]) 25 | 26 | for annotation in annotations: 27 | key = annotation["image_id"] # 图像的名字 28 | im_id=get_key(images,key) 29 | if im_id not in obj: 30 | obj.append(im_id) 31 | 32 | #value = annotation["bbox"] + annotation["category_id"] 33 | 34 | #删除背景图像 35 | print('原始图像数量:', len(images)) 36 | 37 | print('有标注的图像数量:', len(obj)) 38 | 39 | for img in images: 40 | if img["file_name"] not in obj: 41 | background.append(img) 42 | 43 | for i in background: 44 | images.remove(i) 45 | print('删除背景后的图像数量',len(images))# 46 | #根据obj筛选图片 47 | image_dir='demo/coco/images' 48 | #dst_dir='/home/limzero/clear_images' 49 | #for name in obj: 50 | #shutil.copy(os.path.join(image_dir,name),os.path.join(dst_dir,name)) 51 | 52 | json_file['images']=images 53 | with open('demo/coco/annotations/annotations_washed.json', 'w') as f: 54 | json.dump(json_file, f) 55 | 56 | #分割训练集和验证集 57 | import random 58 | val = random.sample(obj, int(len(images)*0.1)) 59 | train=[] 60 | for o in obj: 61 | if o not in val: 62 | train.append(o) 63 | 64 | # 65 | train_dir='demo/coco/train2017' 66 | val_dir='demo/coco/val2017' 67 | if not os.path.exists(train_dir): 68 | os.makedirs(train_dir) 69 | if not os.path.exists(val_dir): 70 | os.makedirs(val_dir) 71 | for v in val: 72 | shutil.copy(os.path.join(image_dir,v),os.path.join(val_dir,v)) 73 | for t in train: 74 | shutil.copy(os.path.join(image_dir,t),os.path.join(train_dir,t)) 75 | 76 | 77 | #annotations 78 | 79 | val_images=images[:] 80 | train_images=images[:] 81 | val_annotations=annotations[:] 82 | train_annotations=annotations[:] 83 | 84 | print('images:',len(images),'val:',len(val),'train',len(train)) 85 | c=0 86 | for img in images: 87 | if img['file_name'] in train: 88 | c=c+1 89 | val_images.remove(img) 90 | else: 91 | train_images.remove(img) 92 | print('len(images):',len(images)) 93 | print("c:",c) 94 | print('val_images:',len(val_images),'train_images:',len(train_images)) 95 | 96 | def get_id(images,name): 97 | for image in images: 98 | if image['file_name']==name: 99 | return image['id'] 100 | for t in train: 101 | id=get_id(images,t) 102 | for ann in annotations: 103 | if ann['image_id']==id: 104 | val_annotations.remove(ann) 105 | for v in val: 106 | id=get_id(images,v) 107 | for ann in annotations: 108 | if ann['image_id']==id: 109 | train_annotations.remove(ann) 110 | print('train_ann:',len(train_annotations),'val_ann:',len(val_annotations)) 111 | 112 | json_train=json_file.copy() 113 | json_val=json_file.copy() 114 | json_train['images']=train_images 115 | json_train['annotations']=train_annotations 116 | json_val['images']=val_images 117 | json_val['annotations']=val_annotations 118 | 119 | #reindex 120 | for idx in range(len(json_train['annotations'])): 121 | json_train['annotations'][idx]['id'] = idx 122 | 123 | for idx in range(len(json_val['annotations'])): 124 | json_val['annotations'][idx]['id'] = idx 125 | 126 | #write in json file 127 | with open('demo/coco/annotations/train2017.json', 'w') as f: 128 | json.dump(json_train, f) 129 | 130 | with open('demo/coco/annotations/val2017.json', 'w') as f: 131 | json.dump(json_val, f) 132 | 133 | 134 | -------------------------------------------------------------------------------- /label_format_conversion/coco_visulize.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | 4 | from pycocotools.coco import COCO 5 | 6 | json_file = '/home/trojanjet/baidu_qyl/tianma/detect/mmdetection/data/coco/annotations/instances_val2017.json' 7 | dataset_dir = '/home/trojanjet/baidu_qyl/tianma/detect/mmdetection/data/coco/val2017/' 8 | coco = COCO(json_file) 9 | imgIds = coco.getImgIds() # 10 | for i in range(len(imgIds)): 11 | img = coco.loadImgs(imgIds[i])[0] 12 | image = cv2.imread(dataset_dir + img['file_name']) 13 | annIds = coco.getAnnIds(imgIds=img['id']) 14 | annos = coco.loadAnns(annIds) 15 | for ann in annos: 16 | bbox = ann['bbox'] 17 | x, y, w, h = bbox 18 | anno_image = cv2.rectangle(image, (int(x), int(y)), (int(x + w), int(y + h)), (0, 255, 255), 2) 19 | cv2.imwrite('demo.jpg', anno_image) 20 | break 21 | 22 | -------------------------------------------------------------------------------- /label_format_conversion/generate_persudo_json.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | import sys 4 | import os 5 | import codecs 6 | import cv2 7 | import json 8 | underwater_classes = ['holothurian', 'echinus', 'scallop', 'starfish'] 9 | #!/usr/bin/env python 10 | # -*- coding: utf-8 -*- 11 | import os 12 | # 批量重命名文件 13 | 14 | 15 | def interpr_json(): 16 | test_json_raw = json.load(open("../../data/train/annotations/testA.json", "r")) 17 | test_json = json.load(open("../../results/cas_r50.bbox.json" , "r")) 18 | img_dir='../../data/test-A-image' 19 | root = '../../data/persudo/' 20 | img = test_json_raw['images'] 21 | images = [] 22 | imgid2anno = {} 23 | imgid2name = {} 24 | for imageinfo in test_json_raw['images']: 25 | imgid = imageinfo['id'] 26 | imgid2name[imgid] = imageinfo['file_name'] 27 | for anno in test_json: 28 | img_id = anno['image_id'] 29 | if img_id not in imgid2anno: 30 | imgid2anno[img_id] = [] 31 | imgid2anno[img_id].append(anno) 32 | for imgid, annos in imgid2anno.items(): 33 | image_name = imgid2name[imgid] 34 | image_id = image_name.split('.')[0] 35 | image_path = os.path.join(img_dir, image_id + '.jpg') 36 | img = cv2.imread(image_path) 37 | height, width ,depth= img.shape 38 | with codecs.open(root+ image_id + '_test.xml', 'w', 'utf-8') as xml: 39 | xml.write('\n') 40 | xml.write('\t' + image_id + '_test' + '\n') 41 | xml.write('\t\n') 42 | xml.write('\t\t' + str(width) + '\n') 43 | xml.write('\t\t' + str(height) + '\n') 44 | xml.write('\t\t' + str(depth) + '\n') 45 | xml.write('\t\n') 46 | cnt=0 47 | for anno in annos: 48 | xmin, ymin, w, h = anno['bbox'] 49 | xmax = xmin + w 50 | ymax = ymin + h 51 | xmin = int(xmin) 52 | ymin = int(ymin) 53 | xmax = int(xmax) 54 | ymax = int(ymax) 55 | confidence = anno['score'] 56 | class_id = int(anno['category_id']) 57 | class_name = underwater_classes[class_id - 1] 58 | image_name = imgid2name[imgid] 59 | image_id = image_name.split('.')[0] 60 | # 61 | if cnt==0: 62 | xml.write('\t\n') 63 | xml.write('\t\t' + class_name + '\n') 64 | xml.write('\t\t\n') 65 | xml.write('\t\t\t' + str(xmin) + '\n') 66 | xml.write('\t\t\t' + str(ymin) + '\n') 67 | xml.write('\t\t\t' + str(xmax) + '\n') 68 | xml.write('\t\t\t' + str(ymax) + '\n') 69 | xml.write('\t\t\n') 70 | xml.write('\t\n') 71 | cnt+=1 72 | if confidence>0.4: 73 | cnt+=1 74 | xml.write('\t\n') 75 | xml.write('\t\t'+class_name+'\n') 76 | xml.write('\t\t\n') 77 | xml.write('\t\t\t' + str(xmin) + '\n') 78 | xml.write('\t\t\t' + str(ymin) + '\n') 79 | xml.write('\t\t\t' + str(xmax) + '\n') 80 | xml.write('\t\t\t' + str(ymax) + '\n') 81 | xml.write('\t\t\n') 82 | xml.write('\t\n') 83 | assert cnt>0 84 | xml.write('') 85 | 86 | interpr_json() 87 | -------------------------------------------------------------------------------- /label_format_conversion/make_voc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import xml 3 | import json 4 | import codecs 5 | import cv2 6 | import shutil 7 | from config import Config 8 | 9 | obstacles_classes = ['施工围挡', '路障', '锥桶', '告示牌1','告示牌2','施工痕迹','施工机械','工地正门'] 10 | opt=Config() 11 | rawImgDir=opt.raw_data_dir 12 | rawLabelDir=opt.raw_json 13 | anno_dir='../demo/voc/annotations/' 14 | image_dir='../demo/voc/JPEGImages' 15 | if not os.path.exists(anno_dir): 16 | os.makedirs(anno_dir) 17 | if not os.path.exists(image_dir): 18 | os.makedirs(image_dir) 19 | with open(rawLabelDir) as f: 20 | d=json.load(f) 21 | # 22 | annos=d['annotations'] 23 | for anno in annos: 24 | status=anno['status'] 25 | frames=anno['frames'] 26 | imgId = anno['id'] 27 | if status==3: 28 | for frame in frames: 29 | if 'obstacles' not in frame: 30 | continue 31 | obstacles=frame['obstacles'] 32 | bboxs=[item['bbox'] for item in obstacles] 33 | frame_name=frame['frame_name'] 34 | imgId_frame_name=imgId+'_'+frame_name 35 | image_path=os.path.join(rawImgDir, imgId, frame_name) 36 | shutil.copy(os.path.join(rawImgDir, imgId, frame_name), os.path.join(image_dir, imgId_frame_name)) 37 | img = cv2.imread(image_path) 38 | height, width, depth = img.shape 39 | with codecs.open(anno_dir + imgId_frame_name[:-4] + '.xml', 'w', 'utf-8') as xml: 40 | xml.write('\n') 41 | xml.write('\t' + imgId_frame_name + '\n') 42 | xml.write('\t\n') 43 | xml.write('\t\t' + str(width) + '\n') 44 | xml.write('\t\t' + str(height) + '\n') 45 | xml.write('\t\t' + str(depth) + '\n') 46 | xml.write('\t\n') 47 | cnt = 0 48 | for bbox in bboxs: 49 | xmin, ymin, xmax, ymax = bbox 50 | class_name = 'obstacles' 51 | # 52 | xml.write('\t\n') 53 | xml.write('\t\t' + class_name + '\n') 54 | xml.write('\t\t\n') 55 | xml.write('\t\t\t' + str(xmin) + '\n') 56 | xml.write('\t\t\t' + str(ymin) + '\n') 57 | xml.write('\t\t\t' + str(xmax) + '\n') 58 | xml.write('\t\t\t' + str(ymax) + '\n') 59 | xml.write('\t\t\n') 60 | xml.write('\t\n') 61 | cnt += 1 62 | assert cnt > 0 63 | xml.write('') 64 | -------------------------------------------------------------------------------- /label_format_conversion/readme.md: -------------------------------------------------------------------------------- 1 | # 目标检测标注格式转换 2 | 文件夹内主要是voc格式标注的.xml文件与yolo标注的txt格式相互转换 3 | -------------------------------------------------------------------------------- /label_format_conversion/voc_split_trainVal.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import sys 4 | 5 | 6 | root_path = 'demo/voc' 7 | 8 | xmlfilepath = root_path + '/Annotations' 9 | 10 | txtsavepath = root_path + '/ImageSets/Main' 11 | 12 | 13 | if not os.path.exists(txtsavepath): 14 | os.makedirs(txtsavepath) 15 | 16 | trainval_percent = 1 17 | train_percent = 0.9 18 | total_xml = os.listdir(xmlfilepath) 19 | num = len(total_xml) 20 | list = range(num) 21 | tv = int(num * trainval_percent) 22 | tr = int(tv * train_percent) 23 | trainval = random.sample(list, tv) 24 | train = random.sample(trainval, tr) 25 | 26 | print("train and val size:", tv) 27 | print("train size:", tr) 28 | 29 | ftrainval = open(txtsavepath + '/trainval.txt', 'w') 30 | ftest = open(txtsavepath + '/test.txt', 'w') 31 | ftrain = open(txtsavepath + '/train.txt', 'w') 32 | fval = open(txtsavepath + '/val.txt', 'w') 33 | 34 | for i in list: 35 | name = total_xml[i][:-4] + '\n' 36 | if i in trainval: 37 | ftrainval.write(name) 38 | if i in train: 39 | ftrain.write(name) 40 | else: 41 | fval.write(name) 42 | else: 43 | ftest.write(name) 44 | 45 | ftrainval.close() 46 | ftrain.close() 47 | fval.close() 48 | ftest.close() 49 | -------------------------------------------------------------------------------- /label_format_conversion/voc_to_coco_v1.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | #!/usr/bin/python 3 | 4 | import sys 5 | import os 6 | import shutil 7 | import numpy as np 8 | import json 9 | import xml.etree.ElementTree as ET 10 | import mmcv 11 | # 检测框的ID起始值 12 | START_BOUNDING_BOX_ID = 1 13 | # 类别列表无必要预先创建,程序中会根据所有图像中包含的ID来创建并更新 14 | PRE_DEFINE_CATEGORIES = {} 15 | # If necessary, pre-define category and its id 16 | # PRE_DEFINE_CATEGORIES = {"aeroplane": 1, "bicycle": 2, "bird": 3, "boat": 4, 17 | # "bottle":5, "bus": 6, "car": 7, "cat": 8, "chair": 9, 18 | # "cow": 10, "diningtable": 11, "dog": 12, "horse": 13, 19 | # "motorbike": 14, "person": 15, "pottedplant": 16, 20 | # "sheep": 17, "sofa": 18, "train": 19, "tvmonitor": 20} 21 | 22 | 23 | def get(root, name): 24 | vars = root.findall(name) 25 | return vars 26 | 27 | 28 | def get_and_check(root, name, length): 29 | vars = root.findall(name) 30 | if len(vars) == 0: 31 | raise NotImplementedError('Can not find %s in %s.'%(name, root.tag)) 32 | if length > 0 and len(vars) != length: 33 | raise NotImplementedError('The size of %s is supposed to be %d, but is %d.'%(name, length, len(vars))) 34 | if length == 1: 35 | vars = vars[0] 36 | return vars 37 | 38 | 39 | 40 | def convert(xml_list, xml_dir, json_file): 41 | ''' 42 | :param xml_list: 需要转换的XML文件列表 43 | :param xml_dir: XML的存储文件夹 44 | :param json_file: 导出json文件的路径 45 | :return: None 46 | ''' 47 | list_fp = xml_list 48 | image_id=1 49 | # 标注基本结构 50 | json_dict = {"images":[], 51 | "type": "instances", 52 | "annotations": [], 53 | "categories": []} 54 | categories = PRE_DEFINE_CATEGORIES 55 | bnd_id = START_BOUNDING_BOX_ID 56 | for line in list_fp: 57 | line = line.strip() 58 | print(" Processing {}".format(line)) 59 | # 解析XML 60 | xml_f = os.path.join(xml_dir, line) 61 | tree = ET.parse(xml_f) 62 | root = tree.getroot() 63 | filename = root.find('filename').text 64 | # 取出图片名字 65 | image_id+=1 66 | size = get_and_check(root, 'size', 1) 67 | # 图片的基本信息 68 | width = int(get_and_check(size, 'width', 1).text) 69 | height = int(get_and_check(size, 'height', 1).text) 70 | image = {'file_name': filename, 71 | 'height': height, 72 | 'width': width, 73 | 'id':image_id} 74 | json_dict['images'].append(image) 75 | # 处理每个标注的检测框 76 | for obj in get(root, 'object'): 77 | # 取出检测框类别名称 78 | category = get_and_check(obj, 'name', 1).text 79 | # 更新类别ID字典 80 | if category not in categories: 81 | new_id = len(categories) 82 | categories[category] = new_id 83 | category_id = categories[category] 84 | bndbox = get_and_check(obj, 'bndbox', 1) 85 | xmin = int(get_and_check(bndbox, 'xmin', 1).text) - 1 86 | ymin = int(get_and_check(bndbox, 'ymin', 1).text) - 1 87 | xmax = int(get_and_check(bndbox, 'xmax', 1).text) 88 | ymax = int(get_and_check(bndbox, 'ymax', 1).text) 89 | assert(xmax > xmin) 90 | assert(ymax > ymin) 91 | o_width = abs(xmax - xmin) 92 | o_height = abs(ymax - ymin) 93 | annotation = dict() 94 | annotation['area'] = o_width*o_height 95 | annotation['iscrowd'] = 0 96 | annotation['image_id'] = image_id 97 | annotation['bbox'] = [xmin, ymin, o_width, o_height] 98 | annotation['category_id'] = category_id 99 | annotation['id'] = bnd_id 100 | annotation['ignore'] = 0 101 | # 设置分割数据,点的顺序为逆时针方向 102 | annotation['segmentation'] = [[xmin,ymin,xmin,ymax,xmax,ymax,xmax,ymin]] 103 | 104 | json_dict['annotations'].append(annotation) 105 | bnd_id = bnd_id + 1 106 | 107 | # 写入类别ID字典 108 | for cate, cid in categories.items(): 109 | cat = {'supercategory': 'none', 'id': cid, 'name': cate} 110 | json_dict['categories'].append(cat) 111 | # 导出到json 112 | #mmcv.dump(json_dict, json_file) 113 | print(type(json_dict)) 114 | json_data = json.dumps(json_dict) 115 | with open(json_file, 'w') as w: 116 | w.write(json_data) 117 | 118 | 119 | if __name__ == '__main__': 120 | root_path = './demo' 121 | 122 | if not os.path.exists(os.path.join(root_path,'coco/annotations')): 123 | os.makedirs(os.path.join(root_path,'coco/annotations')) 124 | if not os.path.exists(os.path.join(root_path, 'coco/train2014')): 125 | os.makedirs(os.path.join(root_path, 'coco/train2014')) 126 | if not os.path.exists(os.path.join(root_path, 'coco/val2014')): 127 | os.makedirs(os.path.join(root_path, 'coco/val2014')) 128 | xml_dir = os.path.join(root_path,'voc/Annotations') #已知的voc的标注 129 | 130 | xml_labels = os.listdir(xml_dir) 131 | np.random.shuffle(xml_labels) 132 | split_point = int(len(xml_labels)/10) 133 | 134 | # validation data 135 | xml_list = xml_labels[0:split_point] 136 | json_file = os.path.join(root_path,'coco/annotations/instances_val2014.json') 137 | convert(xml_list, xml_dir, json_file) 138 | for xml_file in xml_list: 139 | img_name = xml_file[:-4] + '.jpg' 140 | shutil.copy(os.path.join(root_path, 'voc/JPEGImages', img_name), 141 | os.path.join(root_path, 'coco/val2014', img_name)) 142 | # train data 143 | xml_list = xml_labels[split_point:] 144 | json_file = os.path.join(root_path,'coco/annotations/instances_train2014.json') 145 | convert(xml_list, xml_dir, json_file) 146 | for xml_file in xml_list: 147 | img_name = xml_file[:-4] + '.jpg' 148 | shutil.copy(os.path.join(root_path, 'voc/JPEGImages', img_name), 149 | os.path.join(root_path, 'coco/train2014', img_name)) 150 | -------------------------------------------------------------------------------- /label_format_conversion/voc_to_coco_v2.py: -------------------------------------------------------------------------------- 1 | 2 | import os.path as osp 3 | import xml.etree.ElementTree as ET 4 | 5 | import mmcv 6 | import os 7 | 8 | from glob import glob 9 | from tqdm import tqdm 10 | from PIL import Image 11 | def object_classes():#这里定义了自己的数据集的目标类别 12 | return ['window_shielding', 'multi_signs', 'non_traffic_sign'] 13 | label_ids = {name: i + 1 for i, name in enumerate(object_classes())} 14 | print(label_ids) 15 | 16 | def get_segmentation(points): 17 | 18 | return [points[0], points[1], points[2] + points[0], points[1], 19 | points[2] + points[0], points[3] + points[1], points[0], points[3] + points[1]] 20 | 21 | 22 | def parse_xml(xml_path, img_id, anno_id): 23 | tree = ET.parse(xml_path) 24 | root = tree.getroot() 25 | annotation = [] 26 | for obj in root.findall('object'): 27 | name = obj.find('name').text 28 | if name == 'xxx':#当要忽略某一个类别时 29 | continue 30 | category_id = label_ids[name] 31 | bnd_box = obj.find('bndbox') 32 | xmin = int(bnd_box.find('xmin').text) 33 | ymin = int(bnd_box.find('ymin').text) 34 | xmax = int(bnd_box.find('xmax').text) 35 | ymax = int(bnd_box.find('ymax').text) 36 | w = xmax - xmin + 1 37 | h = ymax - ymin + 1 38 | area = w*h 39 | segmentation = get_segmentation([xmin, ymin, w, h]) 40 | annotation.append({ 41 | "segmentation": segmentation, 42 | "area": area, 43 | "iscrowd": 0, 44 | "image_id": img_id, 45 | "bbox": [xmin, ymin, w, h], 46 | "category_id": category_id, 47 | "id": anno_id, 48 | "ignore": 0}) 49 | anno_id += 1 50 | return annotation, anno_id 51 | 52 | 53 | def cvt_annotations(img_path, xml_path, out_file): 54 | images = [] 55 | annotations = [] 56 | 57 | # xml_paths = glob(xml_path + '/*.xml') 58 | img_id = 1 59 | anno_id = 1 60 | for img_path in tqdm(glob(img_path + '/*.jpg')): 61 | w, h = Image.open(img_path).size 62 | img_name = osp.basename(img_path) 63 | img = {"file_name": img_name, "height": int(h), "width": int(w), "id": img_id} 64 | images.append(img) 65 | 66 | xml_file_name = img_name.split('.')[0] + '.xml' 67 | xml_file_path = osp.join(xml_path, xml_file_name) 68 | annos, anno_id = parse_xml(xml_file_path, img_id, anno_id) 69 | annotations.extend(annos) 70 | img_id += 1 71 | 72 | categories = [] 73 | for k,v in label_ids.items(): 74 | categories.append({"name": k, "id": v}) 75 | final_result = {"images": images, "annotations": annotations, "categories": categories} 76 | mmcv.dump(final_result, out_file) 77 | return annotations 78 | 79 | 80 | def main(): 81 | 82 | xml_path = 'demo/voc/Annotations' 83 | img_path = 'demo/voc/JPEGImages' 84 | print('processing {} ...'.format("xml format annotations")) 85 | cvt_annotations(img_path, xml_path, 'demo/coco/annotations/annotations.json') 86 | print('Done!') 87 | 88 | 89 | if __name__ == '__main__': 90 | root_path='./demo' 91 | if not os.path.exists(os.path.join(root_path,'coco/annotations')): 92 | os.makedirs(os.path.join(root_path,'coco/annotations')) 93 | main() 94 | -------------------------------------------------------------------------------- /label_format_conversion/voc_to_yoloV3.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | import os 3 | import cv2 4 | classes = ['window_shielding', 'multi_signs', 'non_traffic_sign'] 5 | 6 | def convert_annotation(image_id): 7 | in_file = open('demo/voc/Annotations/%s.xml' % image_id) 8 | 9 | if not os.path.exists('demo/yolov3/custom/labels/'): 10 | os.makedirs('demo/yolov3/custom/labels/') 11 | out_file_img = open('demo/yolov3/custom/trainval.txt', 'a') # 生成txt格式文件 12 | 13 | out_file_label = open('demo/yolov3/custom/labels/%s.txt' % image_id,'a') # 生成txt格式文件 14 | 15 | tree = ET.parse(in_file) 16 | root = tree.getroot() 17 | size = root.find('size') 18 | voc_img_dir='demo/voc/JPEGImages/{}.jpg'.format(image_id) 19 | out_file_img.write(voc_img_dir) 20 | out_file_img.write("\n") 21 | img=cv2.imread(voc_img_dir) 22 | dh = 1. / img.shape[0] 23 | dw = 1. / img.shape[1] 24 | cnt=len(root.findall('object')) 25 | if cnt==0: 26 | print('nulll null null.....') 27 | print(image_id) 28 | cc=0 29 | for obj in root.iter('object'): 30 | cc+=1 31 | cls = obj.find('name').text 32 | if cls not in classes: 33 | continue 34 | cls_id = classes.index(cls) 35 | xmlbox = obj.find('bndbox') 36 | if dw*float(xmlbox.find('xmin').text)<0. or dw*float(xmlbox.find('xmax').text)<0. or dh*float(xmlbox.find('ymin').text)<0. or dh*float(xmlbox.find('ymax').text)<0.: 37 | print(image_id) 38 | 39 | b = (dw*float(xmlbox.find('xmin').text), dw*float(xmlbox.find('xmax').text), dh*float(xmlbox.find('ymin').text), 40 | dh*float(xmlbox.find('ymax').text)) 41 | out_file_label.write(str(cls_id)+ " " + str((b[0]+b[1])/2) + " " + str((b[2]+b[3])/2) + " " + str(b[1]-b[0]) + " " + str(b[3]-b[2])) 42 | if cc=1: 27 | w=0.99 28 | if h>=1: 29 | h=0.99 30 | return (x,y,w,h) 31 | 32 | def convert_annotation(rootpath,xmlname): 33 | xmlpath = rootpath + '/Annotations' 34 | xmlfile = os.path.join(xmlpath,xmlname) 35 | with open(xmlfile, "r", encoding='UTF-8') as in_file: 36 | txtname = xmlname[:-4]+'.txt' 37 | print(txtname) 38 | txtpath = rootpath + '/worktxt'#生成的.txt文件会被保存在worktxt目录下 39 | if not os.path.exists(txtpath): 40 | os.makedirs(txtpath) 41 | txtfile = os.path.join(txtpath,txtname) 42 | with open(txtfile, "w+" ,encoding='UTF-8') as out_file: 43 | tree=ET.parse(in_file) 44 | root = tree.getroot() 45 | size = root.find('size') 46 | w = int(size.find('width').text) 47 | h = int(size.find('height').text) 48 | out_file.truncate() 49 | for obj in root.iter('object'): 50 | difficult = obj.find('difficult').text 51 | cls = obj.find('name').text 52 | if cls not in classes or int(difficult)==1: 53 | continue 54 | cls_id = classes.index(cls) 55 | xmlbox = obj.find('bndbox') 56 | b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) 57 | bb = convert((w,h), b) 58 | out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') 59 | 60 | 61 | if __name__ == "__main__": 62 | rootpath='demo/voc/' 63 | xmlpath=rootpath+'/Annotations' 64 | list=os.listdir(xmlpath) 65 | for i in range(0,len(list)) : 66 | path = os.path.join(xmlpath,list[i]) 67 | if ('.xml' in path)or('.XML' in path): 68 | convert_annotation(rootpath,list[i]) 69 | print('done', i) 70 | else: 71 | print('not xml file',i) 72 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcd955/Insulator_defect-nest_detection/f01a20bfebaca6af3c9e6d68d15a6bc17af18916/models/__init__.py -------------------------------------------------------------------------------- /models/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcd955/Insulator_defect-nest_detection/f01a20bfebaca6af3c9e6d68d15a6bc17af18916/models/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /models/__pycache__/common.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcd955/Insulator_defect-nest_detection/f01a20bfebaca6af3c9e6d68d15a6bc17af18916/models/__pycache__/common.cpython-310.pyc -------------------------------------------------------------------------------- /models/__pycache__/experimental.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcd955/Insulator_defect-nest_detection/f01a20bfebaca6af3c9e6d68d15a6bc17af18916/models/__pycache__/experimental.cpython-310.pyc -------------------------------------------------------------------------------- /models/__pycache__/yolo.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcd955/Insulator_defect-nest_detection/f01a20bfebaca6af3c9e6d68d15a6bc17af18916/models/__pycache__/yolo.cpython-310.pyc -------------------------------------------------------------------------------- /models/experimental.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | """ 3 | Experimental modules 4 | """ 5 | import math 6 | 7 | import numpy as np 8 | import torch 9 | import torch.nn as nn 10 | 11 | from utils.downloads import attempt_download 12 | 13 | 14 | class Sum(nn.Module): 15 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 16 | def __init__(self, n, weight=False): # n: number of inputs 17 | super().__init__() 18 | self.weight = weight # apply weights boolean 19 | self.iter = range(n - 1) # iter object 20 | if weight: 21 | self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights 22 | 23 | def forward(self, x): 24 | y = x[0] # no weight 25 | if self.weight: 26 | w = torch.sigmoid(self.w) * 2 27 | for i in self.iter: 28 | y = y + x[i + 1] * w[i] 29 | else: 30 | for i in self.iter: 31 | y = y + x[i + 1] 32 | return y 33 | 34 | 35 | class MixConv2d(nn.Module): 36 | # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595 37 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch_out, kernel, stride, ch_strategy 38 | super().__init__() 39 | n = len(k) # number of convolutions 40 | if equal_ch: # equal c_ per group 41 | i = torch.linspace(0, n - 1E-6, c2).floor() # c2 indices 42 | c_ = [(i == g).sum() for g in range(n)] # intermediate channels 43 | else: # equal weight.numel() per group 44 | b = [c2] + [0] * n 45 | a = np.eye(n + 1, n, k=-1) 46 | a -= np.roll(a, 1, axis=1) 47 | a *= np.array(k) ** 2 48 | a[0] = 1 49 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 50 | 51 | self.m = nn.ModuleList([ 52 | nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)]) 53 | self.bn = nn.BatchNorm2d(c2) 54 | self.act = nn.SiLU() 55 | 56 | def forward(self, x): 57 | return self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 58 | 59 | 60 | class Ensemble(nn.ModuleList): 61 | # Ensemble of models 62 | def __init__(self): 63 | super().__init__() 64 | 65 | def forward(self, x, augment=False, profile=False, visualize=False): 66 | y = [module(x, augment, profile, visualize)[0] for module in self] 67 | # y = torch.stack(y).max(0)[0] # max ensemble 68 | # y = torch.stack(y).mean(0) # mean ensemble 69 | y = torch.cat(y, 1) # nms ensemble 70 | return y, None # inference, train output 71 | 72 | 73 | def attempt_load(weights, device=None, inplace=True, fuse=True): 74 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a 75 | from models.yolo import Detect, Model 76 | 77 | model = Ensemble() 78 | for w in weights if isinstance(weights, list) else [weights]: 79 | ckpt = torch.load(attempt_download(w), map_location='cpu') # load 80 | ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model 81 | 82 | # Model compatibility updates 83 | if not hasattr(ckpt, 'stride'): 84 | ckpt.stride = torch.tensor([32.]) 85 | if hasattr(ckpt, 'names') and isinstance(ckpt.names, (list, tuple)): 86 | ckpt.names = dict(enumerate(ckpt.names)) # convert to dict 87 | 88 | model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval()) # model in eval mode 89 | 90 | # Module updates 91 | for m in model.modules(): 92 | t = type(m) 93 | if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model): 94 | m.inplace = inplace 95 | if t is Detect and not isinstance(m.anchor_grid, list): 96 | delattr(m, 'anchor_grid') 97 | setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl) 98 | elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'): 99 | m.recompute_scale_factor = None # torch 1.11.0 compatibility 100 | 101 | # Return model 102 | if len(model) == 1: 103 | return model[-1] 104 | 105 | # Return detection ensemble 106 | print(f'Ensemble created with {weights}\n') 107 | for k in 'names', 'nc', 'yaml': 108 | setattr(model, k, getattr(model[0], k)) 109 | model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride 110 | assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}' 111 | return model 112 | -------------------------------------------------------------------------------- /models/hub/anchors.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | # Default anchors for COCO data 3 | 4 | 5 | # P5 ------------------------------------------------------------------------------------------------------------------- 6 | # P5-640: 7 | anchors_p5_640: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | 13 | # P6 ------------------------------------------------------------------------------------------------------------------- 14 | # P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387 15 | anchors_p6_640: 16 | - [9,11, 21,19, 17,41] # P3/8 17 | - [43,32, 39,70, 86,64] # P4/16 18 | - [65,131, 134,130, 120,265] # P5/32 19 | - [282,180, 247,354, 512,387] # P6/64 20 | 21 | # P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 22 | anchors_p6_1280: 23 | - [19,27, 44,40, 38,94] # P3/8 24 | - [96,68, 86,152, 180,137] # P4/16 25 | - [140,301, 303,264, 238,542] # P5/32 26 | - [436,615, 739,380, 925,792] # P6/64 27 | 28 | # P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187 29 | anchors_p6_1920: 30 | - [28,41, 67,59, 57,141] # P3/8 31 | - [144,103, 129,227, 270,205] # P4/16 32 | - [209,452, 455,396, 358,812] # P5/32 33 | - [653,922, 1109,570, 1387,1187] # P6/64 34 | 35 | 36 | # P7 ------------------------------------------------------------------------------------------------------------------- 37 | # P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372 38 | anchors_p7_640: 39 | - [11,11, 13,30, 29,20] # P3/8 40 | - [30,46, 61,38, 39,92] # P4/16 41 | - [78,80, 146,66, 79,163] # P5/32 42 | - [149,150, 321,143, 157,303] # P6/64 43 | - [257,402, 359,290, 524,372] # P7/128 44 | 45 | # P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818 46 | anchors_p7_1280: 47 | - [19,22, 54,36, 32,77] # P3/8 48 | - [70,83, 138,71, 75,173] # P4/16 49 | - [165,159, 148,334, 375,151] # P5/32 50 | - [334,317, 251,626, 499,474] # P6/64 51 | - [750,326, 534,814, 1079,818] # P7/128 52 | 53 | # P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227 54 | anchors_p7_1920: 55 | - [29,34, 81,55, 47,115] # P3/8 56 | - [105,124, 207,107, 113,259] # P4/16 57 | - [247,238, 222,500, 563,227] # P5/32 58 | - [501,476, 376,939, 749,711] # P6/64 59 | - [1126,489, 801,1222, 1618,1227] # P7/128 60 | -------------------------------------------------------------------------------- /models/hub/yolov3-spp.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3-SPP head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, SPP, [512, [5, 9, 13]]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /models/hub/yolov3-tiny.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,14, 23,27, 37,58] # P4/16 9 | - [81,82, 135,169, 344,319] # P5/32 10 | 11 | # YOLOv3-tiny backbone 12 | backbone: 13 | # [from, number, module, args] 14 | [[-1, 1, Conv, [16, 3, 1]], # 0 15 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2 16 | [-1, 1, Conv, [32, 3, 1]], 17 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4 18 | [-1, 1, Conv, [64, 3, 1]], 19 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8 20 | [-1, 1, Conv, [128, 3, 1]], 21 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16 22 | [-1, 1, Conv, [256, 3, 1]], 23 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32 24 | [-1, 1, Conv, [512, 3, 1]], 25 | [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11 26 | [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12 27 | ] 28 | 29 | # YOLOv3-tiny head 30 | head: 31 | [[-1, 1, Conv, [1024, 3, 1]], 32 | [-1, 1, Conv, [256, 1, 1]], 33 | [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large) 34 | 35 | [-2, 1, Conv, [128, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 38 | [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium) 39 | 40 | [[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5) 41 | ] 42 | -------------------------------------------------------------------------------- /models/hub/yolov3.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3 head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, Conv, [512, 1, 1]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /models/hub/yolov5-bifpn.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 BiFPN head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14, 6], 1, Concat, [1]], # cat P4 <--- BiFPN change 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/hub/yolov5-fpn.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 FPN head 28 | head: 29 | [[-1, 3, C3, [1024, False]], # 10 (P5/32-large) 30 | 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 3, C3, [512, False]], # 14 (P4/16-medium) 35 | 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 38 | [-1, 1, Conv, [256, 1, 1]], 39 | [-1, 3, C3, [256, False]], # 18 (P3/8-small) 40 | 41 | [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 42 | ] 43 | -------------------------------------------------------------------------------- /models/hub/yolov5-p2.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 3 # AutoAnchor evolves 3 anchors per P output layer 8 | 9 | # YOLOv5 v6.0 backbone 10 | backbone: 11 | # [from, number, module, args] 12 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 13 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 14 | [-1, 3, C3, [128]], 15 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 16 | [-1, 6, C3, [256]], 17 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 18 | [-1, 9, C3, [512]], 19 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 20 | [-1, 3, C3, [1024]], 21 | [-1, 1, SPPF, [1024, 5]], # 9 22 | ] 23 | 24 | # YOLOv5 v6.0 head with (P2, P3, P4, P5) outputs 25 | head: 26 | [[-1, 1, Conv, [512, 1, 1]], 27 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 28 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 29 | [-1, 3, C3, [512, False]], # 13 30 | 31 | [-1, 1, Conv, [256, 1, 1]], 32 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 33 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 34 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 35 | 36 | [-1, 1, Conv, [128, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 2], 1, Concat, [1]], # cat backbone P2 39 | [-1, 1, C3, [128, False]], # 21 (P2/4-xsmall) 40 | 41 | [-1, 1, Conv, [128, 3, 2]], 42 | [[-1, 18], 1, Concat, [1]], # cat head P3 43 | [-1, 3, C3, [256, False]], # 24 (P3/8-small) 44 | 45 | [-1, 1, Conv, [256, 3, 2]], 46 | [[-1, 14], 1, Concat, [1]], # cat head P4 47 | [-1, 3, C3, [512, False]], # 27 (P4/16-medium) 48 | 49 | [-1, 1, Conv, [512, 3, 2]], 50 | [[-1, 10], 1, Concat, [1]], # cat head P5 51 | [-1, 3, C3, [1024, False]], # 30 (P5/32-large) 52 | 53 | [[21, 24, 27, 30], 1, Detect, [nc, anchors]], # Detect(P2, P3, P4, P5) 54 | ] 55 | -------------------------------------------------------------------------------- /models/hub/yolov5-p34.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 3 # AutoAnchor evolves 3 anchors per P output layer 8 | 9 | # YOLOv5 v6.0 backbone 10 | backbone: 11 | # [from, number, module, args] 12 | [ [ -1, 1, Conv, [ 64, 6, 2, 2 ] ], # 0-P1/2 13 | [ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4 14 | [ -1, 3, C3, [ 128 ] ], 15 | [ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8 16 | [ -1, 6, C3, [ 256 ] ], 17 | [ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16 18 | [ -1, 9, C3, [ 512 ] ], 19 | [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 7-P5/32 20 | [ -1, 3, C3, [ 1024 ] ], 21 | [ -1, 1, SPPF, [ 1024, 5 ] ], # 9 22 | ] 23 | 24 | # YOLOv5 v6.0 head with (P3, P4) outputs 25 | head: 26 | [ [ -1, 1, Conv, [ 512, 1, 1 ] ], 27 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 28 | [ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4 29 | [ -1, 3, C3, [ 512, False ] ], # 13 30 | 31 | [ -1, 1, Conv, [ 256, 1, 1 ] ], 32 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 33 | [ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3 34 | [ -1, 3, C3, [ 256, False ] ], # 17 (P3/8-small) 35 | 36 | [ -1, 1, Conv, [ 256, 3, 2 ] ], 37 | [ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P4 38 | [ -1, 3, C3, [ 512, False ] ], # 20 (P4/16-medium) 39 | 40 | [ [ 17, 20 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4) 41 | ] 42 | -------------------------------------------------------------------------------- /models/hub/yolov5-p6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 3 # AutoAnchor evolves 3 anchors per P output layer 8 | 9 | # YOLOv5 v6.0 backbone 10 | backbone: 11 | # [from, number, module, args] 12 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 13 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 14 | [-1, 3, C3, [128]], 15 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 16 | [-1, 6, C3, [256]], 17 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 18 | [-1, 9, C3, [512]], 19 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 20 | [-1, 3, C3, [768]], 21 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 22 | [-1, 3, C3, [1024]], 23 | [-1, 1, SPPF, [1024, 5]], # 11 24 | ] 25 | 26 | # YOLOv5 v6.0 head with (P3, P4, P5, P6) outputs 27 | head: 28 | [[-1, 1, Conv, [768, 1, 1]], 29 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 30 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 31 | [-1, 3, C3, [768, False]], # 15 32 | 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 35 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 36 | [-1, 3, C3, [512, False]], # 19 37 | 38 | [-1, 1, Conv, [256, 1, 1]], 39 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 40 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 41 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 42 | 43 | [-1, 1, Conv, [256, 3, 2]], 44 | [[-1, 20], 1, Concat, [1]], # cat head P4 45 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 46 | 47 | [-1, 1, Conv, [512, 3, 2]], 48 | [[-1, 16], 1, Concat, [1]], # cat head P5 49 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 50 | 51 | [-1, 1, Conv, [768, 3, 2]], 52 | [[-1, 12], 1, Concat, [1]], # cat head P6 53 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 54 | 55 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 56 | ] 57 | -------------------------------------------------------------------------------- /models/hub/yolov5-p7.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 3 # AutoAnchor evolves 3 anchors per P output layer 8 | 9 | # YOLOv5 v6.0 backbone 10 | backbone: 11 | # [from, number, module, args] 12 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 13 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 14 | [-1, 3, C3, [128]], 15 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 16 | [-1, 6, C3, [256]], 17 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 18 | [-1, 9, C3, [512]], 19 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 20 | [-1, 3, C3, [768]], 21 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 22 | [-1, 3, C3, [1024]], 23 | [-1, 1, Conv, [1280, 3, 2]], # 11-P7/128 24 | [-1, 3, C3, [1280]], 25 | [-1, 1, SPPF, [1280, 5]], # 13 26 | ] 27 | 28 | # YOLOv5 v6.0 head with (P3, P4, P5, P6, P7) outputs 29 | head: 30 | [[-1, 1, Conv, [1024, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 10], 1, Concat, [1]], # cat backbone P6 33 | [-1, 3, C3, [1024, False]], # 17 34 | 35 | [-1, 1, Conv, [768, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 38 | [-1, 3, C3, [768, False]], # 21 39 | 40 | [-1, 1, Conv, [512, 1, 1]], 41 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 42 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 43 | [-1, 3, C3, [512, False]], # 25 44 | 45 | [-1, 1, Conv, [256, 1, 1]], 46 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 47 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 48 | [-1, 3, C3, [256, False]], # 29 (P3/8-small) 49 | 50 | [-1, 1, Conv, [256, 3, 2]], 51 | [[-1, 26], 1, Concat, [1]], # cat head P4 52 | [-1, 3, C3, [512, False]], # 32 (P4/16-medium) 53 | 54 | [-1, 1, Conv, [512, 3, 2]], 55 | [[-1, 22], 1, Concat, [1]], # cat head P5 56 | [-1, 3, C3, [768, False]], # 35 (P5/32-large) 57 | 58 | [-1, 1, Conv, [768, 3, 2]], 59 | [[-1, 18], 1, Concat, [1]], # cat head P6 60 | [-1, 3, C3, [1024, False]], # 38 (P6/64-xlarge) 61 | 62 | [-1, 1, Conv, [1024, 3, 2]], 63 | [[-1, 14], 1, Concat, [1]], # cat head P7 64 | [-1, 3, C3, [1280, False]], # 41 (P7/128-xxlarge) 65 | 66 | [[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6, P7) 67 | ] 68 | -------------------------------------------------------------------------------- /models/hub/yolov5-panet.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 PANet head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/hub/yolov5l6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [19,27, 44,40, 38,94] # P3/8 9 | - [96,68, 86,152, 180,137] # P4/16 10 | - [140,301, 303,264, 238,542] # P5/32 11 | - [436,615, 739,380, 925,792] # P6/64 12 | 13 | # YOLOv5 v6.0 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 20 | [-1, 6, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 26 | [-1, 3, C3, [1024]], 27 | [-1, 1, SPPF, [1024, 5]], # 11 28 | ] 29 | 30 | # YOLOv5 v6.0 head 31 | head: 32 | [[-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 15 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 19 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 20], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 16], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 12], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 58 | 59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] 61 | -------------------------------------------------------------------------------- /models/hub/yolov5m6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.67 # model depth multiple 6 | width_multiple: 0.75 # layer channel multiple 7 | anchors: 8 | - [19,27, 44,40, 38,94] # P3/8 9 | - [96,68, 86,152, 180,137] # P4/16 10 | - [140,301, 303,264, 238,542] # P5/32 11 | - [436,615, 739,380, 925,792] # P6/64 12 | 13 | # YOLOv5 v6.0 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 20 | [-1, 6, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 26 | [-1, 3, C3, [1024]], 27 | [-1, 1, SPPF, [1024, 5]], # 11 28 | ] 29 | 30 | # YOLOv5 v6.0 head 31 | head: 32 | [[-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 15 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 19 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 20], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 16], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 12], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 58 | 59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] 61 | -------------------------------------------------------------------------------- /models/hub/yolov5n6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.25 # layer channel multiple 7 | anchors: 8 | - [19,27, 44,40, 38,94] # P3/8 9 | - [96,68, 86,152, 180,137] # P4/16 10 | - [140,301, 303,264, 238,542] # P5/32 11 | - [436,615, 739,380, 925,792] # P6/64 12 | 13 | # YOLOv5 v6.0 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 20 | [-1, 6, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 26 | [-1, 3, C3, [1024]], 27 | [-1, 1, SPPF, [1024, 5]], # 11 28 | ] 29 | 30 | # YOLOv5 v6.0 head 31 | head: 32 | [[-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 15 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 19 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 20], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 16], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 12], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 58 | 59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] 61 | -------------------------------------------------------------------------------- /models/hub/yolov5s-LeakyReLU.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | activation: nn.LeakyReLU(0.1) # <----- Conv() activation used throughout entire YOLOv5 model 6 | depth_multiple: 0.33 # model depth multiple 7 | width_multiple: 0.50 # layer channel multiple 8 | anchors: 9 | - [10,13, 16,30, 33,23] # P3/8 10 | - [30,61, 62,45, 59,119] # P4/16 11 | - [116,90, 156,198, 373,326] # P5/32 12 | 13 | # YOLOv5 v6.0 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 20 | [-1, 6, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [1024]], 25 | [-1, 1, SPPF, [1024, 5]], # 9 26 | ] 27 | 28 | # YOLOv5 v6.0 head 29 | head: 30 | [[-1, 1, Conv, [512, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 33 | [-1, 3, C3, [512, False]], # 13 34 | 35 | [-1, 1, Conv, [256, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 38 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 39 | 40 | [-1, 1, Conv, [256, 3, 2]], 41 | [[-1, 14], 1, Concat, [1]], # cat head P4 42 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 43 | 44 | [-1, 1, Conv, [512, 3, 2]], 45 | [[-1, 10], 1, Concat, [1]], # cat head P5 46 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 47 | 48 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 49 | ] 50 | -------------------------------------------------------------------------------- /models/hub/yolov5s-ghost.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3Ghost, [128]], 18 | [-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3Ghost, [256]], 20 | [-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3Ghost, [512]], 22 | [-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3Ghost, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, GhostConv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3Ghost, [512, False]], # 13 33 | 34 | [-1, 1, GhostConv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, GhostConv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, GhostConv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/hub/yolov5s-transformer.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3TR, [1024]], # 9 <--- C3TR() Transformer module 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/hub/yolov5s6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [19,27, 44,40, 38,94] # P3/8 9 | - [96,68, 86,152, 180,137] # P4/16 10 | - [140,301, 303,264, 238,542] # P5/32 11 | - [436,615, 739,380, 925,792] # P6/64 12 | 13 | # YOLOv5 v6.0 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 20 | [-1, 6, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 26 | [-1, 3, C3, [1024]], 27 | [-1, 1, SPPF, [1024, 5]], # 11 28 | ] 29 | 30 | # YOLOv5 v6.0 head 31 | head: 32 | [[-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 15 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 19 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 20], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 16], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 12], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 58 | 59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] 61 | -------------------------------------------------------------------------------- /models/hub/yolov5x6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.33 # model depth multiple 6 | width_multiple: 1.25 # layer channel multiple 7 | anchors: 8 | - [19,27, 44,40, 38,94] # P3/8 9 | - [96,68, 86,152, 180,137] # P4/16 10 | - [140,301, 303,264, 238,542] # P5/32 11 | - [436,615, 739,380, 925,792] # P6/64 12 | 13 | # YOLOv5 v6.0 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 20 | [-1, 6, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 26 | [-1, 3, C3, [1024]], 27 | [-1, 1, SPPF, [1024, 5]], # 11 28 | ] 29 | 30 | # YOLOv5 v6.0 head 31 | head: 32 | [[-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 15 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 19 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 20], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 16], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 12], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 58 | 59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] 61 | -------------------------------------------------------------------------------- /models/readme.md: -------------------------------------------------------------------------------- 1 | 本文件夹内文件主要是yolov5模型的改进模型yaml文件 2 | -------------------------------------------------------------------------------- /models/segment/yolov5l-seg.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/segment/yolov5m-seg.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.67 # model depth multiple 6 | width_multiple: 0.75 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/segment/yolov5n-seg.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.25 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/segment/yolov5s-seg.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.5 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/segment/yolov5x-seg.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.33 # model depth multiple 6 | width_multiple: 1.25 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/tf.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | """ 3 | TensorFlow, Keras and TFLite versions of YOLOv5 4 | Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127 5 | 6 | Usage: 7 | $ python models/tf.py --weights yolov5s.pt 8 | 9 | Export: 10 | $ python export.py --weights yolov5s.pt --include saved_model pb tflite tfjs 11 | """ 12 | 13 | import argparse 14 | import sys 15 | from copy import deepcopy 16 | from pathlib import Path 17 | 18 | FILE = Path(__file__).resolve() 19 | ROOT = FILE.parents[1] # YOLOv5 root directory 20 | if str(ROOT) not in sys.path: 21 | sys.path.append(str(ROOT)) # add ROOT to PATH 22 | # ROOT = ROOT.relative_to(Path.cwd()) # relative 23 | 24 | import numpy as np 25 | import tensorflow as tf 26 | import torch 27 | import torch.nn as nn 28 | from tensorflow import keras 29 | 30 | from models.common import (C3, SPP, SPPF, Bottleneck, BottleneckCSP, C3x, Concat, Conv, CrossConv, DWConv, 31 | DWConvTranspose2d, Focus, autopad) 32 | from models.experimental import MixConv2d, attempt_load 33 | from models.yolo import Detect, Segment 34 | from utils.activations import SiLU 35 | from utils.general import LOGGER, make_divisible, print_args 36 | 37 | 38 | class TFBN(keras.layers.Layer): 39 | # TensorFlow BatchNormalization wrapper 40 | def __init__(self, w=None): 41 | super().__init__() 42 | self.bn = keras.layers.BatchNormalization( 43 | beta_initializer=keras.initializers.Constant(w.bias.numpy()), 44 | gamma_initializer=keras.initializers.Constant(w.weight.numpy()), 45 | moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()), 46 | moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()), 47 | epsilon=w.eps) 48 | 49 | def call(self, inputs): 50 | return self.bn(inputs) 51 | 52 | 53 | class TFPad(keras.layers.Layer): 54 | # Pad inputs in spatial dimensions 1 and 2 55 | def __init__(self, pad): 56 | super().__init__() 57 | if isinstance(pad, int): 58 | self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]]) 59 | else: # tuple/list 60 | self.pad = tf.constant([[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]]) 61 | 62 | def call(self, inputs): 63 | return tf.pad(inputs, self.pad, mode='constant', constant_values=0) 64 | 65 | 66 | class TFConv(keras.layers.Layer): 67 | # Standard convolution 68 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None): 69 | # ch_in, ch_out, weights, kernel, stride, padding, groups 70 | super().__init__() 71 | assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument" 72 | # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding) 73 | # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch 74 | conv = keras.layers.Conv2D( 75 | filters=c2, 76 | kernel_size=k, 77 | strides=s, 78 | padding='SAME' if s == 1 else 'VALID', 79 | use_bias=not hasattr(w, 'bn'), 80 | kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()), 81 | bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy())) 82 | self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv]) 83 | self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity 84 | self.act = activations(w.act) if act else tf.identity 85 | 86 | def call(self, inputs): 87 | return self.act(self.bn(self.conv(inputs))) 88 | 89 | 90 | class TFDWConv(keras.layers.Layer): 91 | # Depthwise convolution 92 | def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None): 93 | # ch_in, ch_out, weights, kernel, stride, padding, groups 94 | super().__init__() 95 | assert c2 % c1 == 0, f'TFDWConv() output={c2} must be a multiple of input={c1} channels' 96 | conv = keras.layers.DepthwiseConv2D( 97 | kernel_size=k, 98 | depth_multiplier=c2 // c1, 99 | strides=s, 100 | padding='SAME' if s == 1 else 'VALID', 101 | use_bias=not hasattr(w, 'bn'), 102 | depthwise_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()), 103 | bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy())) 104 | self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv]) 105 | self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity 106 | self.act = activations(w.act) if act else tf.identity 107 | 108 | def call(self, inputs): 109 | return self.act(self.bn(self.conv(inputs))) 110 | 111 | 112 | class TFDWConvTranspose2d(keras.layers.Layer): 113 | # Depthwise ConvTranspose2d 114 | def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None): 115 | # ch_in, ch_out, weights, kernel, stride, padding, groups 116 | super().__init__() 117 | assert c1 == c2, f'TFDWConv() output={c2} must be equal to input={c1} channels' 118 | assert k == 4 and p1 == 1, 'TFDWConv() only valid for k=4 and p1=1' 119 | weight, bias = w.weight.permute(2, 3, 1, 0).numpy(), w.bias.numpy() 120 | self.c1 = c1 121 | self.conv = [ 122 | keras.layers.Conv2DTranspose(filters=1, 123 | kernel_size=k, 124 | strides=s, 125 | padding='VALID', 126 | output_padding=p2, 127 | use_bias=True, 128 | kernel_initializer=keras.initializers.Constant(weight[..., i:i + 1]), 129 | bias_initializer=keras.initializers.Constant(bias[i])) for i in range(c1)] 130 | 131 | def call(self, inputs): 132 | return tf.concat([m(x) for m, x in zip(self.conv, tf.split(inputs, self.c1, 3))], 3)[:, 1:-1, 1:-1] 133 | 134 | 135 | class TFFocus(keras.layers.Layer): 136 | # Focus wh information into c-space 137 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None): 138 | # ch_in, ch_out, kernel, stride, padding, groups 139 | super().__init__() 140 | self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv) 141 | 142 | def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c) 143 | # inputs = inputs / 255 # normalize 0-255 to 0-1 144 | inputs = [inputs[:, ::2, ::2, :], inputs[:, 1::2, ::2, :], inputs[:, ::2, 1::2, :], inputs[:, 1::2, 1::2, :]] 145 | return self.conv(tf.concat(inputs, 3)) 146 | 147 | 148 | class TFBottleneck(keras.layers.Layer): 149 | # Standard bottleneck 150 | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, shortcut, groups, expansion 151 | super().__init__() 152 | c_ = int(c2 * e) # hidden channels 153 | self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) 154 | self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2) 155 | self.add = shortcut and c1 == c2 156 | 157 | def call(self, inputs): 158 | return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs)) 159 | 160 | 161 | class TFCrossConv(keras.layers.Layer): 162 | # Cross Convolution 163 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None): 164 | super().__init__() 165 | c_ = int(c2 * e) # hidden channels 166 | self.cv1 = TFConv(c1, c_, (1, k), (1, s), w=w.cv1) 167 | self.cv2 = TFConv(c_, c2, (k, 1), (s, 1), g=g, w=w.cv2) 168 | self.add = shortcut and c1 == c2 169 | 170 | def call(self, inputs): 171 | return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs)) 172 | 173 | 174 | class TFConv2d(keras.layers.Layer): 175 | # Substitution for PyTorch nn.Conv2D 176 | def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None): 177 | super().__init__() 178 | assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument" 179 | self.conv = keras.layers.Conv2D(filters=c2, 180 | kernel_size=k, 181 | strides=s, 182 | padding='VALID', 183 | use_bias=bias, 184 | kernel_initializer=keras.initializers.Constant( 185 | w.weight.permute(2, 3, 1, 0).numpy()), 186 | bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None) 187 | 188 | def call(self, inputs): 189 | return self.conv(inputs) 190 | 191 | 192 | class TFBottleneckCSP(keras.layers.Layer): 193 | # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks 194 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None): 195 | # ch_in, ch_out, number, shortcut, groups, expansion 196 | super().__init__() 197 | c_ = int(c2 * e) # hidden channels 198 | self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) 199 | self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2) 200 | self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3) 201 | self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4) 202 | self.bn = TFBN(w.bn) 203 | self.act = lambda x: keras.activations.swish(x) 204 | self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)]) 205 | 206 | def call(self, inputs): 207 | y1 = self.cv3(self.m(self.cv1(inputs))) 208 | y2 = self.cv2(inputs) 209 | return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3)))) 210 | 211 | 212 | class TFC3(keras.layers.Layer): 213 | # CSP Bottleneck with 3 convolutions 214 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None): 215 | # ch_in, ch_out, number, shortcut, groups, expansion 216 | super().__init__() 217 | c_ = int(c2 * e) # hidden channels 218 | self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) 219 | self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2) 220 | self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3) 221 | self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)]) 222 | 223 | def call(self, inputs): 224 | return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3)) 225 | 226 | 227 | class TFC3x(keras.layers.Layer): 228 | # 3 module with cross-convolutions 229 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None): 230 | # ch_in, ch_out, number, shortcut, groups, expansion 231 | super().__init__() 232 | c_ = int(c2 * e) # hidden channels 233 | self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) 234 | self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2) 235 | self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3) 236 | self.m = keras.Sequential([ 237 | TFCrossConv(c_, c_, k=3, s=1, g=g, e=1.0, shortcut=shortcut, w=w.m[j]) for j in range(n)]) 238 | 239 | def call(self, inputs): 240 | return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3)) 241 | 242 | 243 | class TFSPP(keras.layers.Layer): 244 | # Spatial pyramid pooling layer used in YOLOv3-SPP 245 | def __init__(self, c1, c2, k=(5, 9, 13), w=None): 246 | super().__init__() 247 | c_ = c1 // 2 # hidden channels 248 | self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) 249 | self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2) 250 | self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding='SAME') for x in k] 251 | 252 | def call(self, inputs): 253 | x = self.cv1(inputs) 254 | return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3)) 255 | 256 | 257 | class TFSPPF(keras.layers.Layer): 258 | # Spatial pyramid pooling-Fast layer 259 | def __init__(self, c1, c2, k=5, w=None): 260 | super().__init__() 261 | c_ = c1 // 2 # hidden channels 262 | self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) 263 | self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2) 264 | self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding='SAME') 265 | 266 | def call(self, inputs): 267 | x = self.cv1(inputs) 268 | y1 = self.m(x) 269 | y2 = self.m(y1) 270 | return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3)) 271 | 272 | 273 | class TFDetect(keras.layers.Layer): 274 | # TF YOLOv5 Detect layer 275 | def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None): # detection layer 276 | super().__init__() 277 | self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32) 278 | self.nc = nc # number of classes 279 | self.no = nc + 5 # number of outputs per anchor 280 | self.nl = len(anchors) # number of detection layers 281 | self.na = len(anchors[0]) // 2 # number of anchors 282 | self.grid = [tf.zeros(1)] * self.nl # init grid 283 | self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32) 284 | self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]), [self.nl, 1, -1, 1, 2]) 285 | self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)] 286 | self.training = False # set to False after building model 287 | self.imgsz = imgsz 288 | for i in range(self.nl): 289 | ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i] 290 | self.grid[i] = self._make_grid(nx, ny) 291 | 292 | def call(self, inputs): 293 | z = [] # inference output 294 | x = [] 295 | for i in range(self.nl): 296 | x.append(self.m[i](inputs[i])) 297 | # x(bs,20,20,255) to x(bs,3,20,20,85) 298 | ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i] 299 | x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no]) 300 | 301 | if not self.training: # inference 302 | y = x[i] 303 | grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5 304 | anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4 305 | xy = (tf.sigmoid(y[..., 0:2]) * 2 + grid) * self.stride[i] # xy 306 | wh = tf.sigmoid(y[..., 2:4]) ** 2 * anchor_grid 307 | # Normalize xywh to 0-1 to reduce calibration error 308 | xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32) 309 | wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32) 310 | y = tf.concat([xy, wh, tf.sigmoid(y[..., 4:5 + self.nc]), y[..., 5 + self.nc:]], -1) 311 | z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no])) 312 | 313 | return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1), ) 314 | 315 | @staticmethod 316 | def _make_grid(nx=20, ny=20): 317 | # yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 318 | # return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 319 | xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny)) 320 | return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32) 321 | 322 | 323 | class TFSegment(TFDetect): 324 | # YOLOv5 Segment head for segmentation models 325 | def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None): 326 | super().__init__(nc, anchors, ch, imgsz, w) 327 | self.nm = nm # number of masks 328 | self.npr = npr # number of protos 329 | self.no = 5 + nc + self.nm # number of outputs per anchor 330 | self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)] # output conv 331 | self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto) # protos 332 | self.detect = TFDetect.call 333 | 334 | def call(self, x): 335 | p = self.proto(x[0]) 336 | # p = TFUpsample(None, scale_factor=4, mode='nearest')(self.proto(x[0])) # (optional) full-size protos 337 | p = tf.transpose(p, [0, 3, 1, 2]) # from shape(1,160,160,32) to shape(1,32,160,160) 338 | x = self.detect(self, x) 339 | return (x, p) if self.training else (x[0], p) 340 | 341 | 342 | class TFProto(keras.layers.Layer): 343 | 344 | def __init__(self, c1, c_=256, c2=32, w=None): 345 | super().__init__() 346 | self.cv1 = TFConv(c1, c_, k=3, w=w.cv1) 347 | self.upsample = TFUpsample(None, scale_factor=2, mode='nearest') 348 | self.cv2 = TFConv(c_, c_, k=3, w=w.cv2) 349 | self.cv3 = TFConv(c_, c2, w=w.cv3) 350 | 351 | def call(self, inputs): 352 | return self.cv3(self.cv2(self.upsample(self.cv1(inputs)))) 353 | 354 | 355 | class TFUpsample(keras.layers.Layer): 356 | # TF version of torch.nn.Upsample() 357 | def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w' 358 | super().__init__() 359 | assert scale_factor % 2 == 0, 'scale_factor must be multiple of 2' 360 | self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * scale_factor, x.shape[2] * scale_factor), mode) 361 | # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode) 362 | # with default arguments: align_corners=False, half_pixel_centers=False 363 | # self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x, 364 | # size=(x.shape[1] * 2, x.shape[2] * 2)) 365 | 366 | def call(self, inputs): 367 | return self.upsample(inputs) 368 | 369 | 370 | class TFConcat(keras.layers.Layer): 371 | # TF version of torch.concat() 372 | def __init__(self, dimension=1, w=None): 373 | super().__init__() 374 | assert dimension == 1, 'convert only NCHW to NHWC concat' 375 | self.d = 3 376 | 377 | def call(self, inputs): 378 | return tf.concat(inputs, self.d) 379 | 380 | 381 | def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3) 382 | LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}") 383 | anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] 384 | na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors 385 | no = na * (nc + 5) # number of outputs = anchors * (classes + 5) 386 | 387 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out 388 | for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args 389 | m_str = m 390 | m = eval(m) if isinstance(m, str) else m # eval strings 391 | for j, a in enumerate(args): 392 | try: 393 | args[j] = eval(a) if isinstance(a, str) else a # eval strings 394 | except NameError: 395 | pass 396 | 397 | n = max(round(n * gd), 1) if n > 1 else n # depth gain 398 | if m in [ 399 | nn.Conv2d, Conv, DWConv, DWConvTranspose2d, Bottleneck, SPP, SPPF, MixConv2d, Focus, CrossConv, 400 | BottleneckCSP, C3, C3x]: 401 | c1, c2 = ch[f], args[0] 402 | c2 = make_divisible(c2 * gw, 8) if c2 != no else c2 403 | 404 | args = [c1, c2, *args[1:]] 405 | if m in [BottleneckCSP, C3, C3x]: 406 | args.insert(2, n) 407 | n = 1 408 | elif m is nn.BatchNorm2d: 409 | args = [ch[f]] 410 | elif m is Concat: 411 | c2 = sum(ch[-1 if x == -1 else x + 1] for x in f) 412 | elif m in [Detect, Segment]: 413 | args.append([ch[x + 1] for x in f]) 414 | if isinstance(args[1], int): # number of anchors 415 | args[1] = [list(range(args[1] * 2))] * len(f) 416 | if m is Segment: 417 | args[3] = make_divisible(args[3] * gw, 8) 418 | args.append(imgsz) 419 | else: 420 | c2 = ch[f] 421 | 422 | tf_m = eval('TF' + m_str.replace('nn.', '')) 423 | m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \ 424 | else tf_m(*args, w=model.model[i]) # module 425 | 426 | torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module 427 | t = str(m)[8:-2].replace('__main__.', '') # module type 428 | np = sum(x.numel() for x in torch_m_.parameters()) # number params 429 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params 430 | LOGGER.info(f'{i:>3}{str(f):>18}{str(n):>3}{np:>10} {t:<40}{str(args):<30}') # print 431 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist 432 | layers.append(m_) 433 | ch.append(c2) 434 | return keras.Sequential(layers), sorted(save) 435 | 436 | 437 | class TFModel: 438 | # TF YOLOv5 model 439 | def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)): # model, channels, classes 440 | super().__init__() 441 | if isinstance(cfg, dict): 442 | self.yaml = cfg # model dict 443 | else: # is *.yaml 444 | import yaml # for torch hub 445 | self.yaml_file = Path(cfg).name 446 | with open(cfg) as f: 447 | self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict 448 | 449 | # Define model 450 | if nc and nc != self.yaml['nc']: 451 | LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}") 452 | self.yaml['nc'] = nc # override yaml value 453 | self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz) 454 | 455 | def predict(self, 456 | inputs, 457 | tf_nms=False, 458 | agnostic_nms=False, 459 | topk_per_class=100, 460 | topk_all=100, 461 | iou_thres=0.45, 462 | conf_thres=0.25): 463 | y = [] # outputs 464 | x = inputs 465 | for m in self.model.layers: 466 | if m.f != -1: # if not from previous layer 467 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers 468 | 469 | x = m(x) # run 470 | y.append(x if m.i in self.savelist else None) # save output 471 | 472 | # Add TensorFlow NMS 473 | if tf_nms: 474 | boxes = self._xywh2xyxy(x[0][..., :4]) 475 | probs = x[0][:, :, 4:5] 476 | classes = x[0][:, :, 5:] 477 | scores = probs * classes 478 | if agnostic_nms: 479 | nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres) 480 | else: 481 | boxes = tf.expand_dims(boxes, 2) 482 | nms = tf.image.combined_non_max_suppression(boxes, 483 | scores, 484 | topk_per_class, 485 | topk_all, 486 | iou_thres, 487 | conf_thres, 488 | clip_boxes=False) 489 | return (nms, ) 490 | return x # output [1,6300,85] = [xywh, conf, class0, class1, ...] 491 | # x = x[0] # [x(1,6300,85), ...] to x(6300,85) 492 | # xywh = x[..., :4] # x(6300,4) boxes 493 | # conf = x[..., 4:5] # x(6300,1) confidences 494 | # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes 495 | # return tf.concat([conf, cls, xywh], 1) 496 | 497 | @staticmethod 498 | def _xywh2xyxy(xywh): 499 | # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right 500 | x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1) 501 | return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1) 502 | 503 | 504 | class AgnosticNMS(keras.layers.Layer): 505 | # TF Agnostic NMS 506 | def call(self, input, topk_all, iou_thres, conf_thres): 507 | # wrap map_fn to avoid TypeSpec related error https://stackoverflow.com/a/65809989/3036450 508 | return tf.map_fn(lambda x: self._nms(x, topk_all, iou_thres, conf_thres), 509 | input, 510 | fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32), 511 | name='agnostic_nms') 512 | 513 | @staticmethod 514 | def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25): # agnostic NMS 515 | boxes, classes, scores = x 516 | class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32) 517 | scores_inp = tf.reduce_max(scores, -1) 518 | selected_inds = tf.image.non_max_suppression(boxes, 519 | scores_inp, 520 | max_output_size=topk_all, 521 | iou_threshold=iou_thres, 522 | score_threshold=conf_thres) 523 | selected_boxes = tf.gather(boxes, selected_inds) 524 | padded_boxes = tf.pad(selected_boxes, 525 | paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]], 526 | mode='CONSTANT', 527 | constant_values=0.0) 528 | selected_scores = tf.gather(scores_inp, selected_inds) 529 | padded_scores = tf.pad(selected_scores, 530 | paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]], 531 | mode='CONSTANT', 532 | constant_values=-1.0) 533 | selected_classes = tf.gather(class_inds, selected_inds) 534 | padded_classes = tf.pad(selected_classes, 535 | paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]], 536 | mode='CONSTANT', 537 | constant_values=-1.0) 538 | valid_detections = tf.shape(selected_inds)[0] 539 | return padded_boxes, padded_scores, padded_classes, valid_detections 540 | 541 | 542 | def activations(act=nn.SiLU): 543 | # Returns TF activation from input PyTorch activation 544 | if isinstance(act, nn.LeakyReLU): 545 | return lambda x: keras.activations.relu(x, alpha=0.1) 546 | elif isinstance(act, nn.Hardswish): 547 | return lambda x: x * tf.nn.relu6(x + 3) * 0.166666667 548 | elif isinstance(act, (nn.SiLU, SiLU)): 549 | return lambda x: keras.activations.swish(x) 550 | else: 551 | raise Exception(f'no matching TensorFlow activation found for PyTorch activation {act}') 552 | 553 | 554 | def representative_dataset_gen(dataset, ncalib=100): 555 | # Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays 556 | for n, (path, img, im0s, vid_cap, string) in enumerate(dataset): 557 | im = np.transpose(img, [1, 2, 0]) 558 | im = np.expand_dims(im, axis=0).astype(np.float32) 559 | im /= 255 560 | yield [im] 561 | if n >= ncalib: 562 | break 563 | 564 | 565 | def run( 566 | weights=ROOT / 'yolov5s.pt', # weights path 567 | imgsz=(640, 640), # inference size h,w 568 | batch_size=1, # batch size 569 | dynamic=False, # dynamic batch size 570 | ): 571 | # PyTorch model 572 | im = torch.zeros((batch_size, 3, *imgsz)) # BCHW image 573 | model = attempt_load(weights, device=torch.device('cpu'), inplace=True, fuse=False) 574 | _ = model(im) # inference 575 | model.info() 576 | 577 | # TensorFlow model 578 | im = tf.zeros((batch_size, *imgsz, 3)) # BHWC image 579 | tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz) 580 | _ = tf_model.predict(im) # inference 581 | 582 | # Keras model 583 | im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size) 584 | keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im)) 585 | keras_model.summary() 586 | 587 | LOGGER.info('PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.') 588 | 589 | 590 | def parse_opt(): 591 | parser = argparse.ArgumentParser() 592 | parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path') 593 | parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') 594 | parser.add_argument('--batch-size', type=int, default=1, help='batch size') 595 | parser.add_argument('--dynamic', action='store_true', help='dynamic batch size') 596 | opt = parser.parse_args() 597 | opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand 598 | print_args(vars(opt)) 599 | return opt 600 | 601 | 602 | def main(opt): 603 | run(**vars(opt)) 604 | 605 | 606 | if __name__ == '__main__': 607 | opt = parse_opt() 608 | main(opt) 609 | -------------------------------------------------------------------------------- /models/yolo.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | """ 3 | YOLO-specific modules 4 | 5 | Usage: 6 | $ python models/yolo.py --cfg yolov5s.yaml 7 | """ 8 | 9 | import argparse 10 | import contextlib 11 | import os 12 | import platform 13 | import sys 14 | from copy import deepcopy 15 | from pathlib import Path 16 | 17 | FILE = Path(__file__).resolve() 18 | ROOT = FILE.parents[1] # YOLOv5 root directory 19 | if str(ROOT) not in sys.path: 20 | sys.path.append(str(ROOT)) # add ROOT to PATH 21 | if platform.system() != 'Windows': 22 | ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative 23 | 24 | from models.common import * # noqa 25 | from models.experimental import * # noqa 26 | from utils.autoanchor import check_anchor_order 27 | from utils.general import LOGGER, check_version, check_yaml, make_divisible, print_args 28 | from utils.plots import feature_visualization 29 | from utils.torch_utils import (fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device, 30 | time_sync) 31 | 32 | try: 33 | import thop # for FLOPs computation 34 | except ImportError: 35 | thop = None 36 | 37 | 38 | class Detect(nn.Module): 39 | # YOLOv5 Detect head for detection models 40 | stride = None # strides computed during build 41 | dynamic = False # force grid reconstruction 42 | export = False # export mode 43 | 44 | def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer 45 | super().__init__() 46 | self.nc = nc # number of classes 47 | self.no = nc + 5 # number of outputs per anchor 48 | self.nl = len(anchors) # number of detection layers 49 | self.na = len(anchors[0]) // 2 # number of anchors 50 | self.grid = [torch.empty(0) for _ in range(self.nl)] # init grid 51 | self.anchor_grid = [torch.empty(0) for _ in range(self.nl)] # init anchor grid 52 | self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2) 53 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 54 | self.inplace = inplace # use inplace ops (e.g. slice assignment) 55 | 56 | def forward(self, x): 57 | z = [] # inference output 58 | for i in range(self.nl): 59 | x[i] = self.m[i](x[i]) # conv 60 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 61 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 62 | 63 | if not self.training: # inference 64 | if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]: 65 | self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i) 66 | 67 | if isinstance(self, Segment): # (boxes + masks) 68 | xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4) 69 | xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i] # xy 70 | wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i] # wh 71 | y = torch.cat((xy, wh, conf.sigmoid(), mask), 4) 72 | else: # Detect (boxes only) 73 | xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4) 74 | xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy 75 | wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh 76 | y = torch.cat((xy, wh, conf), 4) 77 | z.append(y.view(bs, self.na * nx * ny, self.no)) 78 | 79 | return x if self.training else (torch.cat(z, 1), ) if self.export else (torch.cat(z, 1), x) 80 | 81 | def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__, '1.10.0')): 82 | d = self.anchors[i].device 83 | t = self.anchors[i].dtype 84 | shape = 1, self.na, ny, nx, 2 # grid shape 85 | y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t) 86 | yv, xv = torch.meshgrid(y, x, indexing='ij') if torch_1_10 else torch.meshgrid(y, x) # torch>=0.7 compatibility 87 | grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5 88 | anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape) 89 | return grid, anchor_grid 90 | 91 | 92 | class Segment(Detect): 93 | # YOLOv5 Segment head for segmentation models 94 | def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True): 95 | super().__init__(nc, anchors, ch, inplace) 96 | self.nm = nm # number of masks 97 | self.npr = npr # number of protos 98 | self.no = 5 + nc + self.nm # number of outputs per anchor 99 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 100 | self.proto = Proto(ch[0], self.npr, self.nm) # protos 101 | self.detect = Detect.forward 102 | 103 | def forward(self, x): 104 | p = self.proto(x[0]) 105 | x = self.detect(self, x) 106 | return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1]) 107 | 108 | 109 | class BaseModel(nn.Module): 110 | # YOLOv5 base model 111 | def forward(self, x, profile=False, visualize=False): 112 | return self._forward_once(x, profile, visualize) # single-scale inference, train 113 | 114 | def _forward_once(self, x, profile=False, visualize=False): 115 | y, dt = [], [] # outputs 116 | for m in self.model: 117 | if m.f != -1: # if not from previous layer 118 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers 119 | if profile: 120 | self._profile_one_layer(m, x, dt) 121 | x = m(x) # run 122 | y.append(x if m.i in self.save else None) # save output 123 | if visualize: 124 | feature_visualization(x, m.type, m.i, save_dir=visualize) 125 | return x 126 | 127 | def _profile_one_layer(self, m, x, dt): 128 | c = m == self.model[-1] # is final layer, copy input as inplace fix 129 | o = thop.profile(m, inputs=(x.copy() if c else x, ), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs 130 | t = time_sync() 131 | for _ in range(10): 132 | m(x.copy() if c else x) 133 | dt.append((time_sync() - t) * 100) 134 | if m == self.model[0]: 135 | LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} module") 136 | LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}') 137 | if c: 138 | LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total") 139 | 140 | def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers 141 | LOGGER.info('Fusing layers... ') 142 | for m in self.model.modules(): 143 | if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'): 144 | m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv 145 | delattr(m, 'bn') # remove batchnorm 146 | m.forward = m.forward_fuse # update forward 147 | self.info() 148 | return self 149 | 150 | def info(self, verbose=False, img_size=640): # print model information 151 | model_info(self, verbose, img_size) 152 | 153 | def _apply(self, fn): 154 | # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers 155 | self = super()._apply(fn) 156 | m = self.model[-1] # Detect() 157 | if isinstance(m, (Detect, Segment)): 158 | m.stride = fn(m.stride) 159 | m.grid = list(map(fn, m.grid)) 160 | if isinstance(m.anchor_grid, list): 161 | m.anchor_grid = list(map(fn, m.anchor_grid)) 162 | return self 163 | 164 | 165 | class DetectionModel(BaseModel): 166 | # YOLOv5 detection model 167 | def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes 168 | super().__init__() 169 | if isinstance(cfg, dict): 170 | self.yaml = cfg # model dict 171 | else: # is *.yaml 172 | import yaml # for torch hub 173 | self.yaml_file = Path(cfg).name 174 | with open(cfg, encoding='ascii', errors='ignore') as f: 175 | self.yaml = yaml.safe_load(f) # model dict 176 | 177 | # Define model 178 | ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels 179 | if nc and nc != self.yaml['nc']: 180 | LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") 181 | self.yaml['nc'] = nc # override yaml value 182 | if anchors: 183 | LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}') 184 | self.yaml['anchors'] = round(anchors) # override yaml value 185 | self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist 186 | self.names = [str(i) for i in range(self.yaml['nc'])] # default names 187 | self.inplace = self.yaml.get('inplace', True) 188 | 189 | # Build strides, anchors 190 | m = self.model[-1] # Detect() 191 | if isinstance(m, (Detect, Segment)): 192 | s = 256 # 2x min stride 193 | m.inplace = self.inplace 194 | forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x) 195 | m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward 196 | check_anchor_order(m) 197 | m.anchors /= m.stride.view(-1, 1, 1) 198 | self.stride = m.stride 199 | self._initialize_biases() # only run once 200 | 201 | # Init weights, biases 202 | initialize_weights(self) 203 | self.info() 204 | LOGGER.info('') 205 | 206 | def forward(self, x, augment=False, profile=False, visualize=False): 207 | if augment: 208 | return self._forward_augment(x) # augmented inference, None 209 | return self._forward_once(x, profile, visualize) # single-scale inference, train 210 | 211 | def _forward_augment(self, x): 212 | img_size = x.shape[-2:] # height, width 213 | s = [1, 0.83, 0.67] # scales 214 | f = [None, 3, None] # flips (2-ud, 3-lr) 215 | y = [] # outputs 216 | for si, fi in zip(s, f): 217 | xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max())) 218 | yi = self._forward_once(xi)[0] # forward 219 | # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save 220 | yi = self._descale_pred(yi, fi, si, img_size) 221 | y.append(yi) 222 | y = self._clip_augmented(y) # clip augmented tails 223 | return torch.cat(y, 1), None # augmented inference, train 224 | 225 | def _descale_pred(self, p, flips, scale, img_size): 226 | # de-scale predictions following augmented inference (inverse operation) 227 | if self.inplace: 228 | p[..., :4] /= scale # de-scale 229 | if flips == 2: 230 | p[..., 1] = img_size[0] - p[..., 1] # de-flip ud 231 | elif flips == 3: 232 | p[..., 0] = img_size[1] - p[..., 0] # de-flip lr 233 | else: 234 | x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale 235 | if flips == 2: 236 | y = img_size[0] - y # de-flip ud 237 | elif flips == 3: 238 | x = img_size[1] - x # de-flip lr 239 | p = torch.cat((x, y, wh, p[..., 4:]), -1) 240 | return p 241 | 242 | def _clip_augmented(self, y): 243 | # Clip YOLOv5 augmented inference tails 244 | nl = self.model[-1].nl # number of detection layers (P3-P5) 245 | g = sum(4 ** x for x in range(nl)) # grid points 246 | e = 1 # exclude layer count 247 | i = (y[0].shape[1] // g) * sum(4 ** x for x in range(e)) # indices 248 | y[0] = y[0][:, :-i] # large 249 | i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices 250 | y[-1] = y[-1][:, i:] # small 251 | return y 252 | 253 | def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency 254 | # https://arxiv.org/abs/1708.02002 section 3.3 255 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. 256 | m = self.model[-1] # Detect() module 257 | for mi, s in zip(m.m, m.stride): # from 258 | b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) 259 | b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) 260 | b.data[:, 5:5 + m.nc] += math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum()) # cls 261 | mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 262 | 263 | 264 | Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibility 265 | 266 | 267 | class SegmentationModel(DetectionModel): 268 | # YOLOv5 segmentation model 269 | def __init__(self, cfg='yolov5s-seg.yaml', ch=3, nc=None, anchors=None): 270 | super().__init__(cfg, ch, nc, anchors) 271 | 272 | 273 | class ClassificationModel(BaseModel): 274 | # YOLOv5 classification model 275 | def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): # yaml, model, number of classes, cutoff index 276 | super().__init__() 277 | self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg) 278 | 279 | def _from_detection_model(self, model, nc=1000, cutoff=10): 280 | # Create a YOLOv5 classification model from a YOLOv5 detection model 281 | if isinstance(model, DetectMultiBackend): 282 | model = model.model # unwrap DetectMultiBackend 283 | model.model = model.model[:cutoff] # backbone 284 | m = model.model[-1] # last layer 285 | ch = m.conv.in_channels if hasattr(m, 'conv') else m.cv1.conv.in_channels # ch into module 286 | c = Classify(ch, nc) # Classify() 287 | c.i, c.f, c.type = m.i, m.f, 'models.common.Classify' # index, from, type 288 | model.model[-1] = c # replace 289 | self.model = model.model 290 | self.stride = model.stride 291 | self.save = [] 292 | self.nc = nc 293 | 294 | def _from_yaml(self, cfg): 295 | # Create a YOLOv5 classification model from a *.yaml file 296 | self.model = None 297 | 298 | 299 | def parse_model(d, ch): # model_dict, input_channels(3) 300 | # Parse a YOLOv5 model.yaml dictionary 301 | LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}") 302 | anchors, nc, gd, gw, act = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation') 303 | if act: 304 | Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU() 305 | LOGGER.info(f"{colorstr('activation:')} {act}") # print 306 | na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors 307 | no = na * (nc + 5) # number of outputs = anchors * (classes + 5) 308 | 309 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out 310 | for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args 311 | m = eval(m) if isinstance(m, str) else m # eval strings 312 | for j, a in enumerate(args): 313 | with contextlib.suppress(NameError): 314 | args[j] = eval(a) if isinstance(a, str) else a # eval strings 315 | 316 | n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain 317 | if m in { 318 | Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, 319 | BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x, SElayer}: 320 | c1, c2 = ch[f], args[0] 321 | if c2 != no: # if not output 322 | c2 = make_divisible(c2 * gw, 8) 323 | 324 | args = [c1, c2, *args[1:]] 325 | if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}: 326 | args.insert(2, n) # number of repeats 327 | n = 1 328 | elif m is nn.BatchNorm2d: 329 | args = [ch[f]] 330 | elif m is Concat: 331 | c2 = sum(ch[x] for x in f) 332 | # TODO: channel, gw, gd 333 | elif m in {Detect, Segment}: 334 | args.append([ch[x] for x in f]) 335 | if isinstance(args[1], int): # number of anchors 336 | args[1] = [list(range(args[1] * 2))] * len(f) 337 | if m is Segment: 338 | args[3] = make_divisible(args[3] * gw, 8) 339 | elif m is Contract: 340 | c2 = ch[f] * args[0] ** 2 341 | elif m is Expand: 342 | c2 = ch[f] // args[0] ** 2 343 | else: 344 | c2 = ch[f] 345 | 346 | m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module 347 | t = str(m)[8:-2].replace('__main__.', '') # module type 348 | np = sum(x.numel() for x in m_.parameters()) # number params 349 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params 350 | LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f} {t:<40}{str(args):<30}') # print 351 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist 352 | layers.append(m_) 353 | if i == 0: 354 | ch = [] 355 | ch.append(c2) 356 | return nn.Sequential(*layers), sorted(save) 357 | 358 | 359 | if __name__ == '__main__': 360 | parser = argparse.ArgumentParser() 361 | parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml') 362 | parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs') 363 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 364 | parser.add_argument('--profile', action='store_true', help='profile model speed') 365 | parser.add_argument('--line-profile', action='store_true', help='profile model speed layer by layer') 366 | parser.add_argument('--test', action='store_true', help='test all yolo*.yaml') 367 | opt = parser.parse_args() 368 | opt.cfg = check_yaml(opt.cfg) # check YAML 369 | print_args(vars(opt)) 370 | device = select_device(opt.device) 371 | 372 | # Create model 373 | im = torch.rand(opt.batch_size, 3, 640, 640).to(device) 374 | model = Model(opt.cfg).to(device) 375 | 376 | # Options 377 | if opt.line_profile: # profile layer by layer 378 | model(im, profile=True) 379 | 380 | elif opt.profile: # profile forward-backward 381 | results = profile(input=im, ops=[model], n=3) 382 | 383 | elif opt.test: # test all models 384 | for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'): 385 | try: 386 | _ = Model(cfg) 387 | except Exception as e: 388 | print(f'Error in {cfg}: {e}') 389 | 390 | else: # report fused model summary 391 | model.fuse() 392 | -------------------------------------------------------------------------------- /models/yolov5_ghost_attention.yaml: -------------------------------------------------------------------------------- 1 | nc: 3 # number of classes 2 | depth_multiple: 0.33 # model depth multiple 3 | width_multiple: 0.50 # layer channel multiple 4 | anchors: 5 | - [10,13, 16,30, 33,23] # P3/8 6 | - [30,61, 62,45, 59,119] # P4/16 7 | - [116,90, 156,198, 373,326] # P5/32 8 | 9 | # YOLOv5 with GhostNet backbone 10 | backbone: 11 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2 12 | [-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4 13 | [-1, 3, C3Ghost, [128]], 14 | [-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8 15 | [-1, 6, C3Ghost, [256]], 16 | [-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16 17 | [-1, 9, C3Ghost, [512]], 18 | [-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32 19 | [-1, 3, C3Ghost, [1024]], 20 | [-1, 1, SPPF, [1024, 5]], # 9,用spp层来替代sppf层,获得不同分辨率的鲁棒性 21 | ] 22 | 23 | head: 24 | [[-1, 1, DWConv, [512, 1, 1]], 25 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 26 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 27 | [-1, 3, C3_seblock, [512, False]], # 13 28 | 29 | [-1, 1, DWConv, [256, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 32 | [-1, 3, C3_seblock, [256, False]], # 17 (P3/8-small) 33 | 34 | [-1, 1, DWConv, [256, 3, 2]], 35 | [[-1, 14], 1, Concat, [1]], # cat head P4 36 | [-1, 3, C3_seblock, [512, False]], # 20 (P4/16-medium) 37 | 38 | [-1, 1, GhostConv, [512, 3, 2]], 39 | [[-1, 10], 1, Concat, [1]], # cat head P5 40 | [-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large) 41 | 42 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 43 | ] -------------------------------------------------------------------------------- /models/yolov5l.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5l_2.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, DWConv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, DWConv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, DWConv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, DWConv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, DWConv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, DWConv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, DWConv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, DWConv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5m.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.67 # model depth multiple 6 | width_multiple: 0.75 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5m_2.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.67 # model depth multiple 6 | width_multiple: 0.75 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, DWConv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, DWConv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, DWConv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, DWConv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, DWConv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, DWConv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, DWConv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, DWConv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5n.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.25 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5s-ghost_dw.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3Ghost, [128]], 18 | [-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3Ghost, [256]], 20 | [-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3Ghost, [512]], 22 | [-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3Ghost, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, GhostConv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3Ghost, [512, False]], # 13 33 | 34 | [-1, 1, GhostConv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, GhostConv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, GhostConv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5s-transformer_dw.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, DWConv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, DWConv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, DWConv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, DWConv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3TR, [1024]], # 9 <--- C3TR() Transformer module 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, DWConv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, DWConv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, DWConv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, DWConv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5s.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, DWConv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, DWConv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, DWConv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, DWConv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, DWConv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, DWConv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, DWConv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, DWConv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5s_dw_se.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 3 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, DWConv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, DWConv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, DWConv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, DWConv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SElayer, [1024]], 25 | [-1, 1, SPPF, [1024, 5]], # 9 26 | ] 27 | 28 | # YOLOv5 v6.0 head 29 | head: 30 | [[-1, 1, DWConv, [512, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 33 | [-1, 3, C3, [512, False]], # 13 34 | 35 | [-1, 1, DWConv, [256, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 38 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 39 | 40 | [-1, 1, DWConv, [256, 3, 2]], 41 | [[-1, 14], 1, Concat, [1]], # cat head P4 42 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 43 | 44 | [-1, 1, DWConv, [512, 3, 2]], 45 | [[-1, 10], 1, Concat, [1]], # cat head P5 46 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 47 | 48 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 49 | ] 50 | -------------------------------------------------------------------------------- /models/yolov5s_dw_se_c3ghost.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 3 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, DWConv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3Ghost, [128]], 18 | [-1, 1, DWConv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3Ghost, [256]], 20 | [-1, 1, DWConv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3Ghost, [512]], 22 | [-1, 1, DWConv, [1024, 3, 2]], # 7-P5/32 23 | # [-1, 3, C3x, [1024]], 24 | [-1, 1, SElayer, [1024]], 25 | [-1, 1, SPPF, [1024, 5]], # 9 26 | ] 27 | 28 | # YOLOv5 v6.0 head 29 | head: 30 | [[-1, 1, DWConv, [512, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 33 | [-1, 3, C3Ghost, [512, False]], # 13 34 | 35 | [-1, 1, DWConv, [256, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 38 | [-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small) 39 | 40 | [-1, 1, DWConv, [256, 3, 2]], 41 | [[-1, 14], 1, Concat, [1]], # cat head P4 42 | [-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium) 43 | 44 | [-1, 1, DWConv, [512, 3, 2]], 45 | [[-1, 10], 1, Concat, [1]], # cat head P5 46 | [-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large) 47 | 48 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 49 | ] 50 | -------------------------------------------------------------------------------- /models/yolov5s_dw_se_c3spp_c3ghost.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 3 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, DWConv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3Ghost, [128]], 18 | [-1, 1, DWConv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3Ghost, [256]], 20 | [-1, 1, DWConv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3Ghost, [512]], 22 | [-1, 1, DWConv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3SPP, [1024]], 24 | [-1, 1, SElayer, [1024]], 25 | [-1, 1, SPPF, [1024, 5]], # 9 26 | ] 27 | 28 | # YOLOv5 v6.0 head 29 | head: 30 | [[-1, 1, DWConv, [512, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 33 | [-1, 3, C3Ghost, [512, False]], # 13 34 | 35 | [-1, 1, DWConv, [256, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 38 | [-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small) 39 | 40 | [-1, 1, DWConv, [256, 3, 2]], 41 | [[-1, 14], 1, Concat, [1]], # cat head P4 42 | [-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium) 43 | 44 | [-1, 1, DWConv, [512, 3, 2]], 45 | [[-1, 10], 1, Concat, [1]], # cat head P5 46 | [-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large) 47 | 48 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 49 | ] 50 | -------------------------------------------------------------------------------- /models/yolov5s_dw_spp.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 3 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, DWConv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, DWConv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, DWConv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, DWConv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3SPP, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, DWConv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, DWConv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, DWConv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, DWConv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5s_dw_x.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 3 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, DWConv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, DWConv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, DWConv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, DWConv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3x, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, DWConv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, DWConv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, DWConv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, DWConv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5s_raw.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5x.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.33 # model depth multiple 6 | width_multiple: 1.25 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | # first_layer = DepthwiseSeparableConv(in_channels=3, out_channels=64, kernel_size=6, stride=2, padding=2) 16 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2 17 | [-1, 1, DWConv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, DWConv, [256, 3, 2]], # 3-P3/8 20 | [-1, 6, C3, [256]], 21 | [-1, 1, DWConv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, DWConv, [1024, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [1024]], 25 | [-1, 1, SPPF, [1024, 5]], # 9 26 | ] 27 | ssh -p 35089 root@connect.bjb1.seetacloud.com 28 | # YOLOv5 v6.0 head 29 | head: 30 | [[-1, 1, DWConv, [512, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 33 | [-1, 3, C3, [512, False]], # 13 34 | 35 | [-1, 1, DWConv, [256, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 38 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 39 | 40 | [-1, 1, DWConv, [256, 3, 2]], 41 | [[-1, 14], 1, Concat, [1]], # cat head P4 42 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 43 | 44 | [-1, 1, DWConv, [512, 3, 2]], 45 | [[-1, 10], 1, Concat, [1]], # cat head P5 46 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 47 | 48 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 49 | ] 50 | -------------------------------------------------------------------------------- /models/yolov5x_raw.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.33 # model depth multiple 6 | width_multiple: 1.25 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /yolov5_gradcam.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings('ignore') 3 | warnings.simplefilter('ignore') 4 | import torch, yaml, cv2, os, shutil 5 | import numpy as np 6 | np.random.seed(0) 7 | import matplotlib.pyplot as plt 8 | from tqdm import trange 9 | from PIL import Image 10 | from models.yolo import Model 11 | from utils.general import intersect_dicts 12 | from utils.augmentations import letterbox 13 | from utils.general import xywh2xyxy 14 | from pytorch_grad_cam import GradCAMPlusPlus, GradCAM, XGradCAM 15 | from pytorch_grad_cam.utils.image import show_cam_on_image 16 | from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradients 17 | 18 | class yolov5_heatmap: 19 | def __init__(self, weight, cfg, device, method, layer, backward_type, conf_threshold, ratio): 20 | device = torch.device(device) 21 | ckpt = torch.load(weight) 22 | model_names = ckpt['model'].names 23 | csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 24 | model = Model(cfg, ch=3, nc=len(model_names)).to(device) 25 | csd = intersect_dicts(csd, model.state_dict(), exclude=['anchor']) # intersect 26 | model.load_state_dict(csd, strict=False) # load 27 | # model.fuse().eval() 28 | model.fuse().eval() 29 | print(f'Transferred {len(csd)}/{len(model.state_dict())} items') 30 | 31 | target_layers = [eval(layer)] 32 | method = eval(method) 33 | 34 | colors = np.random.uniform(0, 255, size=(len(model_names), 3)).astype(np.float16) 35 | self.__dict__.update(locals()) 36 | 37 | def post_process(self, result): 38 | logits_ = result[..., 4:] 39 | boxes_ = result[..., :4] 40 | sorted, indices = torch.sort(logits_[..., 0], descending=True) 41 | return logits_[0][indices[0]], xywh2xyxy(boxes_[0][indices[0]]).cpu().detach().numpy() 42 | 43 | def draw_detections(self, box, color, name, img): 44 | xmin, ymin, xmax, ymax = list(map(int, list(box))) 45 | cv2.rectangle(img, (xmin, ymin), (xmax, ymax), tuple(int(x) for x in color), 2) 46 | cv2.putText(img, str(name), (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, tuple(int(x) for x in color), 2, lineType=cv2.LINE_AA) 47 | return img 48 | 49 | def __call__(self, img_path, save_path): 50 | # remove dir if exist 51 | if os.path.exists(save_path): 52 | shutil.rmtree(save_path) 53 | # make dir if not exist 54 | os.makedirs(save_path, exist_ok=True) 55 | 56 | # img process 57 | img = cv2.imread(img_path) 58 | img = letterbox(img)[0] 59 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 60 | img = np.float32(img) / 255.0 61 | tensor = torch.from_numpy(np.transpose(img, axes=[2, 0, 1])).unsqueeze(0).to(self.device) 62 | 63 | # init ActivationsAndGradients 64 | grads = ActivationsAndGradients(self.model, self.target_layers, reshape_transform=None) 65 | 66 | # get ActivationsAndResult 67 | result = grads(tensor) 68 | activations = grads.activations[0].cpu().detach().numpy() 69 | 70 | # postprocess to yolo output 71 | post_result, post_boxes = self.post_process(result[0]) 72 | for i in trange(int(post_result.size(0) * self.ratio)): 73 | if post_result[i][0] < self.conf_threshold: 74 | break 75 | 76 | self.model.zero_grad() 77 | if self.backward_type == 'conf': 78 | post_result[i, 0].backward(retain_graph=True) 79 | else: 80 | # get max probability for this prediction 81 | score = post_result[i, 1:].max() 82 | score.backward(retain_graph=True) 83 | 84 | # process heatmap 85 | gradients = grads.gradients[0] 86 | b, k, u, v = gradients.size() 87 | weights = self.method.get_cam_weights(self.method, None, None, None, activations, gradients.detach().numpy()) 88 | weights = weights.reshape((b, k, 1, 1)) 89 | saliency_map = np.sum(weights * activations, axis=1) 90 | saliency_map = np.squeeze(np.maximum(saliency_map, 0)) 91 | saliency_map = cv2.resize(saliency_map, (tensor.size(3), tensor.size(2))) 92 | saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max() 93 | if (saliency_map_max - saliency_map_min) == 0: 94 | continue 95 | saliency_map = (saliency_map - saliency_map_min) / (saliency_map_max - saliency_map_min) 96 | 97 | # add heatmap and box to image 98 | cam_image = show_cam_on_image(img.copy(), saliency_map, use_rgb=True) 99 | cam_image = self.draw_detections(post_boxes[i], self.colors[int(post_result[i, 1:].argmax())], f'{self.model_names[int(post_result[i, 1:].argmax())]} {post_result[i][0]:.2f}', cam_image) 100 | cam_image = Image.fromarray(cam_image) 101 | cam_image.save(f'{save_path}/{i}.png') 102 | 103 | def get_params(): 104 | params = { 105 | 'weight': '/root/yolov5/runs/train/s_w16_b32_e200/weights/best.pt', 106 | 'cfg': 'models/yolov5s.yaml', 107 | 'device': 'cuda:0', 108 | 'method': 'XGradCAM', # GradCAMPlusPlus, GradCAM, XGradCAM 109 | 'layer': 'model.model[-2]', 110 | 'backward_type': 'class', # class or conf 111 | 'conf_threshold': 0.6, # 0.6 112 | 'ratio': 0.02 # 0.02-0.1 113 | } 114 | return params 115 | 116 | if __name__ == '__main__': 117 | model = yolov5_heatmap(**get_params()) 118 | model("/root/autodl-tmp/datasets_new/images/val/000_br0.7_th0.05.jpg", 'result') 119 | --------------------------------------------------------------------------------