├── LICENSE
├── README.md
├── dataset_prepare
├── augument_with_label.py
├── datasets_prepare.py
├── rename_txt.py
├── rotate_with_label.py
├── synthetic_fog.py
└── use.md
├── label_format_conversion
├── coco_split_trainVal.py
├── coco_visulize.py
├── generate_persudo_json.py
├── make_voc.py
├── readme.md
├── voc_split_trainVal.py
├── voc_to_coco_v1.py
├── voc_to_coco_v2.py
├── voc_to_yoloV3.py
└── voc_to_yoloV5.py
├── models
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-310.pyc
│ ├── common.cpython-310.pyc
│ ├── experimental.cpython-310.pyc
│ └── yolo.cpython-310.pyc
├── common.py
├── experimental.py
├── hub
│ ├── anchors.yaml
│ ├── yolov3-spp.yaml
│ ├── yolov3-tiny.yaml
│ ├── yolov3.yaml
│ ├── yolov5-bifpn.yaml
│ ├── yolov5-fpn.yaml
│ ├── yolov5-p2.yaml
│ ├── yolov5-p34.yaml
│ ├── yolov5-p6.yaml
│ ├── yolov5-p7.yaml
│ ├── yolov5-panet.yaml
│ ├── yolov5l6.yaml
│ ├── yolov5m6.yaml
│ ├── yolov5n6.yaml
│ ├── yolov5s-LeakyReLU.yaml
│ ├── yolov5s-ghost.yaml
│ ├── yolov5s-transformer.yaml
│ ├── yolov5s6.yaml
│ └── yolov5x6.yaml
├── readme.md
├── segment
│ ├── yolov5l-seg.yaml
│ ├── yolov5m-seg.yaml
│ ├── yolov5n-seg.yaml
│ ├── yolov5s-seg.yaml
│ └── yolov5x-seg.yaml
├── tf.py
├── yolo.py
├── yolov5_ghost_attention.yaml
├── yolov5l.yaml
├── yolov5l_2.yaml
├── yolov5m.yaml
├── yolov5m_2.yaml
├── yolov5n.yaml
├── yolov5s-ghost_dw.yaml
├── yolov5s-transformer_dw.yaml
├── yolov5s.yaml
├── yolov5s_dw_se.yaml
├── yolov5s_dw_se_c3ghost.yaml
├── yolov5s_dw_se_c3spp_c3ghost.yaml
├── yolov5s_dw_spp.yaml
├── yolov5s_dw_x.yaml
├── yolov5s_raw.yaml
├── yolov5x.yaml
└── yolov5x_raw.yaml
└── yolov5_gradcam.py
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2024, lcd955
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are met:
7 |
8 | 1. Redistributions of source code must retain the above copyright notice, this
9 | list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 |
15 | 3. Neither the name of the copyright holder nor the names of its
16 | contributors may be used to endorse or promote products derived from
17 | this software without specific prior written permission.
18 |
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Insulator_defect-nest_detection
2 | 基于YOLOv5网络的输配电线路故障检测模型研究,本项目基于yolov5构建而成
3 | ## 1、对于绝缘子缺陷检测
4 |
5 | 各个网络模型已经存放于models文件夹
6 |
7 | ## 2、标注格式转换方法
8 |
9 | 请看label_format_coversion文件夹。
10 |
11 | ## 3、对网上公开数据集的整合,预处理方法,包括图片加雾,添加噪声,随机裁剪方法
12 |
13 | 数据集预处理方法可以看dataset_prepare文件夹
14 |
15 | ## 4、开源数据集如下所示:
16 | 飞桨网址:https://aistudio.baidu.com/datasetdetail/270697/0
17 |
18 | ## 5、可视化特征图方法
19 |
20 | 参考yolov5_gradcam.py函数
21 |
22 |
--------------------------------------------------------------------------------
/dataset_prepare/augument_with_label.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import os
3 | import glob
4 | import numpy as np
5 | import imgaug as ia
6 | import imgaug.augmenters as iaa
7 | from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
8 | from tqdm import tqdm
9 |
10 | def load_yolo_boxes(filename, shape):
11 | with open(filename) as f:
12 | lines = f.readlines()
13 |
14 | boxes = []
15 | for line in lines:
16 | class_id, x_center, y_center, width, height = map(float, line.split())
17 | x1 = (x_center - width / 2) * shape[1]
18 | y1 = (y_center - height / 2) * shape[0]
19 | x2 = (x_center + width / 2) * shape[1]
20 | y2 = (y_center + height / 2) * shape[0]
21 |
22 | boxes.append(BoundingBox(x1=x1, y1=y1, x2=x2, y2=y2, label=class_id))
23 |
24 | return BoundingBoxesOnImage(boxes, shape=shape)
25 |
26 | def save_yolo_boxes(bbs, filename, shape):
27 | with open(filename, 'w') as f:
28 | for bb in bbs.bounding_boxes:
29 | x_center = (bb.x1 + bb.x2) / 2 / shape[1]
30 | y_center = (bb.y1 + bb.y2) / 2 / shape[0]
31 | width = (bb.x2 - bb.x1) / shape[1]
32 | height = (bb.y2 - bb.y1) / shape[0]
33 |
34 | f.write(f'{int(bb.label)} {x_center} {y_center} {width} {height}\n')
35 |
36 | # source_dir = 'path/to/source'
37 | # output_dir = 'path/to/output'
38 | source_dir = r'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\datasets\testdata'
39 |
40 | output_dir = r'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\datasets\testdata\1'
41 |
42 | images = glob.glob(os.path.join(source_dir, '*.jpg'))
43 |
44 | seq = iaa.Sequential([
45 | iaa.Fliplr(0.5), # horizontal flips
46 | iaa.Crop(percent=(0, 0.1)), # random crops
47 | # 尽管小概率,但是务必组合一些其他的扩充方式,这样可以确保标注的持久性
48 | iaa.Sometimes(0.5, iaa.GaussianBlur(sigma=(0, 0.5))),
49 | iaa.Sometimes(0.7, iaa.Affine(scale={"x": (0.8, 1.2), "y": (0.8, 1.2)})),
50 | iaa.Sometimes(0.9,iaa.Affine(translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)})),
51 | ])
52 |
53 | for image_path in tqdm(images):
54 | image = cv2.imread(image_path)
55 | bbs = load_yolo_boxes(image_path.replace('.jpg', '.txt'), image.shape)
56 |
57 | image_aug, bbs_aug = seq(image=image, bounding_boxes=bbs)
58 |
59 | cv2.imwrite(os.path.join(output_dir, 'augument_random' + os.path.basename(image_path)), image_aug)
60 | save_yolo_boxes(bbs_aug, os.path.join(output_dir, 'augument_random' + os.path.basename(image_path).replace('.jpg', '.txt')), image_aug.shape)
61 |
62 | print('Done')
--------------------------------------------------------------------------------
/dataset_prepare/datasets_prepare.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | import numpy as np
4 | import re
5 | import random
6 |
7 | #设置随机种子,保证每次分割的数据集是确定的
8 | random.seed(0)
9 | np.random.seed(0)
10 | os.environ['PYTHONHASHSEED'] = '0'
11 |
12 | # 原始数据集的文件夹路径
13 | img_dir ="/root/autodl-tmp/merged_insulator_data_new/img" #"./data/images"
14 | label_dir ='/root/autodl-tmp/merged_insulator_data_new/labels' #"./data/labels"
15 |
16 |
17 | # 目标文件夹路径
18 | dataset_dir ="/root/autodl-tmp/datasets_0512" #"./dataset"
19 | img_train_dir = os.path.join(dataset_dir, "images/train")
20 | img_val_dir = os.path.join(dataset_dir, "images/val")
21 | label_train_dir = os.path.join(dataset_dir, "labels/train")
22 | label_val_dir = os.path.join(dataset_dir, "labels/val")
23 |
24 | # 创建需要的文件夹
25 | os.makedirs(img_train_dir, exist_ok=True)
26 | os.makedirs(img_val_dir, exist_ok=True)
27 | os.makedirs(label_train_dir, exist_ok=True)
28 | os.makedirs(label_val_dir, exist_ok=True)
29 |
30 | # 获取所有图像文件
31 | img_files = [f for f in os.listdir(img_dir) if os.path.isfile(os.path.join(img_dir, f))]
32 |
33 | # 随机洗牌
34 | np.random.shuffle(img_files)
35 |
36 | # 计算训练集和验证集的划分点
37 | split_idx = int(len(img_files) * 0.7)
38 |
39 | # 将文件分割为训练集和验证集
40 | train_files = img_files[:split_idx]
41 | val_files = img_files[split_idx:]
42 |
43 | #匹配模块,确保数据集里如果有.jpg,.JPG,JPEG,jpeg也能有良好的表现
44 | for f in train_files:
45 | shutil.copy(os.path.join(img_dir, f), os.path.join(img_train_dir, f))
46 | shutil.copy(os.path.join(label_dir, re.sub(r'.[jJ][pP][gG]|.[jJ][pP][eE][gG]$', '.txt', f)), os.path.join(label_train_dir, re.sub(r'.[jJ][pP][gG]|.[jJ][pP][eE][gG]$', '.txt', f)))
47 |
48 | for f in val_files:
49 | shutil.copy(os.path.join(img_dir, f), os.path.join(img_val_dir, f))
50 | shutil.copy(os.path.join(label_dir, re.sub(r'.[jJ][pP][gG]|.[jJ][pP][eE][gG]$', '.txt', f)), os.path.join(label_val_dir, re.sub(r'.[jJ][pP][gG]|.[jJ][pP][eE][gG]$', '.txt', f)))
51 |
--------------------------------------------------------------------------------
/dataset_prepare/rename_txt.py:
--------------------------------------------------------------------------------
1 | import os
2 | import glob
3 |
4 | # 在当前目录下获取所有txt文件
5 | # filepath=[r"D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\datasets\foggy_labels/"]
6 |
7 | folder1 = r"D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\datasets\foggy_labels/" # 需要修改名称的txt文件的目录
8 | folder2 = r'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\datasets\foggy_img/' # 参考的jpg文件的目录
9 |
10 | # 获取所有.txt和.jpg文件
11 | txt_files = sorted(glob.glob(os.path.join(folder1, "*.txt")))
12 | jpg_files = sorted(glob.glob(os.path.join(folder2, "*.jpg")))
13 |
14 | # 检查两个文件夹中的文件数量是否相同
15 | if len(txt_files) != len(jpg_files):
16 | print("文件数量不匹配!")
17 | else:
18 | # 遍历每个txt文件,根据jpg文件进行重命名
19 | for txt_file, jpg_file in zip(txt_files, jpg_files):
20 | # 提取jpg文件的基础文件名(没有扩展名)
21 | base_name = os.path.splitext(os.path.basename(jpg_file))[0]
22 | # 创建新的txt文件名
23 | new_name = "{}.txt".format(base_name)
24 | new_name_path = os.path.join(folder1, new_name)
25 | # 重命名txt文件
26 | os.rename(txt_file, new_name_path)
--------------------------------------------------------------------------------
/dataset_prepare/rotate_with_label.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import math
4 | import os
5 | from tqdm import tqdm
6 | #
7 | # # 图像和标签框一起旋转
8 | # def rotate_image_and_boxes(img, boxes, angle, scale=1.):
9 | # w, h = img.shape[1], img.shape[0]
10 | # cx, cy = w // 2, h // 2
11 | #
12 | # M = cv2.getRotationMatrix2D((cx, cy), angle, scale)
13 | # rotated_img = cv2.warpAffine(img, M, (w, h))
14 | #
15 | # rotated_boxes = []
16 | # for box in boxes:
17 | # label,x, y, w, h = box
18 | # corners = np.array([
19 | # [x-w/2, y-h/2],
20 | # [x-w/2, y+h/2],
21 | # [x+w/2, y-h/2],
22 | # [x+w/2, y+h/2]
23 | # ])
24 | #
25 | # corners = np.hstack((corners, np.ones((4, 1))))
26 | # corners = np.dot(M, corners.T).T
27 | # x_min, y_min = corners.min(axis=0)[:2]
28 | # x_max, y_max = corners.max(axis=0)[:2]
29 | #
30 | # rotated_boxes.append([label,x_min + (x_max - x_min) / 2, y_min + (y_max - y_min) / 2, x_max - x_min, y_max - y_min])
31 | #
32 | # return rotated_img, rotated_boxes
33 | #
34 | # # 读取标记文件
35 | # def read_annotation_file(file_path):
36 | # boxes = []
37 | # with open(file_path, 'r') as file:
38 | # lines = file.readlines()
39 | # for line in lines:
40 | # items = line.strip().split(" ")
41 | # class_id = int(items[0])
42 | # x, y, w, h = map(float, items[1:])
43 | # boxes.append([class_id, x, y, w, h])
44 | # return boxes
45 | #
46 | # img = cv2.imread(r'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\Defective_Insulators\images\001.jpg')
47 | # boxes = read_annotation_file(r'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\Defective_Insulators\labels\worktxt\001.txt')
48 | #
49 | # rotated_img, rotated_boxes = rotate_image_and_boxes(img, boxes, angle=30)
50 | #
51 | # cv2.imwrite(r'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\Defective_Insulators\labels\rotated_image.jpg', rotated_img)
52 | # with open(r'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\Defective_Insulators\labels\rotated_image.txt', 'w') as file:
53 | # for box in rotated_boxes:
54 | # file.write(" ".join(map(str, box)) + "\n")
55 |
56 | def rotate_image_and_boxes(image, boxes):
57 | (h, w) = image.shape[:2]
58 | center = (w / 2, h / 2)
59 |
60 | M = cv2.getRotationMatrix2D(center, 30, 1.0)
61 | rotated = cv2.warpAffine(image, M, (w, h))
62 |
63 | # 转换成点并应用旋转和转换反馈到bounding box的格式
64 | new_boxes = []
65 | for box in boxes:
66 | points = np.int0(cv2.transform(np.array([[
67 | [box[0], box[1]],
68 | [box[0] + box[2], box[1]],
69 | [box[0] + box[2], box[1] + box[3]],
70 | [box[0], box[1] + box[3]]
71 | ]]), M))
72 | new_box = cv2.boundingRect(points)
73 | new_boxes.append(new_box)
74 |
75 | return rotated, new_boxes
76 |
77 | for image_file in tqdm(os.listdir(r'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\Defective_Insulators\images/')):
78 | image = cv2.imread(fr'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\Defective_Insulators/images/{image_file}')
79 | base_name = os.path.splitext(image_file)[0]
80 | box_file = fr'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\Defective_Insulators\worktxt/{base_name}.txt'
81 | with open(box_file, 'r') as f:
82 | boxes = []
83 | for line in f:
84 | elements = line.strip().split()
85 | x_center, y_center, box_w, box_h = map(float, elements[1:])
86 | x1 = (x_center - box_w / 2) * image.shape[1]
87 | y1 = (y_center - box_h / 2) * image.shape[0]
88 | x2 = x1 + box_w * image.shape[1]
89 | y2 = y1 + box_h * image.shape[0]
90 | boxes.append([x1, y1, x2-x1, y2-y1])
91 | rotated_image, new_boxes = rotate_image_and_boxes(image, boxes)
92 | # 保存旋转后的图像
93 | img_path=fr'D:\desk\yolov5\dataset_raw\rotated_images_30/{base_name}_rotated_30.jpg'
94 | base_img_path = os.path.dirname(img_path)
95 | if not os.path.exists(base_img_path):
96 | os.makedirs(base_img_path) # 如果不存在,创建路径
97 | cv2.imwrite(img_path, rotated_image)
98 | # 保存旋转后的框,您可能会想要将这些框转换回YOLO格式
99 | out_file = fr'D:\desk\yolov5\dataset_raw\rotated_boxes_30/{base_name}_rotated_30.txt'
100 |
101 | base_out_path= os.path.dirname(out_file)
102 |
103 | if not os.path.exists(base_out_path):
104 | os.makedirs(base_out_path) # 如果不存在,创建路径
105 | with open(out_file, 'w') as f:
106 | for box in new_boxes:
107 | x_center = (box[0] + box[2] / 2) / rotated_image.shape[1]
108 | y_center = (box[1] + box[3] / 2) / rotated_image.shape[0]
109 | box_w = box[2] / rotated_image.shape[1]
110 | box_h = box[3] / rotated_image.shape[0]
111 | # 写入类别标签,这里假设类别不变仍为0
112 | f.write(f'0 {x_center} {y_center} {box_w} {box_h}\n')
--------------------------------------------------------------------------------
/dataset_prepare/synthetic_fog.py:
--------------------------------------------------------------------------------
1 | """
2 | 直接运行程序可以测试合成雾气效果
3 | Produced by: zhangzhengde@sjtu.edu.cn
4 | """
5 | import os, sys
6 | from pathlib import Path
7 | import argparse
8 | import math
9 | import cv2
10 | import copy
11 | import time
12 | from pathlib import Path
13 | import numpy as np
14 | import matplotlib.pyplot as plt
15 |
16 | pydir = Path(os.path.abspath(__file__)).parent
17 | if f'{pydir.parent}' not in sys.path:
18 | sys.path.insert(0, f'{pydir.parent}')
19 | os.chdir(f'{pydir.parent}')
20 |
21 |
22 | class SyntheticFog(object):
23 | def __init__(self):
24 | pass
25 |
26 | def __call__(self,speed_up, img_path , out_path):
27 | img_path = img_path
28 | # img_path = '../sources/IMG_6685.JPG'
29 | assert os.path.exists(img_path), f'error: img does not exists, {img_path}'
30 | img = copy.copy(cv2.imread(img_path))
31 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
32 | print(img.shape)
33 | img = img/255.0
34 | print(f'fogging...')
35 | t0 = time.time()
36 | br = 0.7
37 | th = 0.05
38 | fogged_img = self.fogging_img(
39 | img, brightness=br, thickness=th,
40 | high_efficiency = speed_up)
41 | print(f'fogging time: {(time.time()-t0)*1000:.4f}ms')
42 | rf = 1 # resize factor
43 | img = cv2.resize(img, (int(img.shape[1]*rf), int(img.shape[0]*rf)))
44 | fogged_img = cv2.resize(fogged_img, ((int(fogged_img.shape[1]*rf)), (int(fogged_img.shape[0]*rf))))
45 | fogged_img = np.array(fogged_img*255, dtype=np.uint8)
46 | # cv2.imshow('src', img)
47 | # cv2.imshow('fogged', fogged_img)
48 | # cv2.waitKey(0)
49 | save = True if out_path else False
50 | if save:
51 | cv2.imwrite(out_path+f'{Path(img_path).stem}_br{br}_th{th}.jpg', fogged_img)
52 | else:
53 | h, w, c = img.shape
54 | fig, ax = plt.subplots(1, 2, figsize=(w/100, h/100))
55 | ax[0].imshow(img)
56 | ax[1].imshow(fogged_img)
57 | plt.show()
58 |
59 | cv2.imshow('src', img)
60 | cv2.imshow('fogged', fogged_img)
61 | cv2.waitKey(0)
62 |
63 |
64 | def fogging_img(self, img, brightness=0.7, thickness=0.06, high_efficiency = False):
65 | """
66 | fogging image
67 | :param img: src img
68 | :param brightness: brightness
69 | :param thickness: fog thickness, without fog when 0, max 0.1,
70 | :param high_efficiency: use matrix to improve fogging speed when high_efficiency is True, else use loops
71 | low efficiency: about 4000ms, high efficiency: about 80ms, tested in (864, 1152, 3) img
72 | :return: fogged image
73 | """
74 | assert 0 <= brightness <= 1
75 | assert 0 <= thickness <= 0.1
76 | fogged_img = img.copy()
77 | h, w, c = fogged_img.shape
78 | if not high_efficiency: # use default loop to fogging, low efficiency
79 | size = np.sqrt(np.max(fogged_img.shape[:2])) # 雾化尺寸
80 | center = (h // 2, w // 2) # 雾化中心
81 | # print(f'shape: {img.shape} center: {center} size: {size}') # 33
82 | # d_list = []
83 | for j in range(h): #
84 | for l in range(w):
85 | d = -0.04 * math.sqrt((j - center[0]) ** 2 + (l - center[1]) ** 2) + size
86 | # print(f'd {d}')
87 | td = math.exp(-thickness * d)
88 | # d_list.append(td)
89 | fogged_img[j][l][:] = fogged_img[j][l][:] * td + brightness * (1 - td)
90 | # x = np.arange(len(d_list))
91 | # plt.plot(x, d_list, 'o')
92 | # if j == 5:
93 | # break
94 | else: # use matrix # TODO: 直接使用像素坐标,距离参数不适用于大分辨率图像,会变成鱼眼镜头的样子. done.
95 | use_pixel = True
96 | size = np.sqrt(np.max(fogged_img.shape[:2])) if use_pixel else 1 # 雾化尺寸,sqrt(w), (w, h, 3)
97 | h, w, c = fogged_img.shape
98 | hc, wc = h // 2, w // 2
99 | mask = self.get_mask(h=h, w=w, hc=hc, wc=wc, pixel=use_pixel) # (h, w, 2) # O(max(w, h))
100 | d = -0.04 * np.linalg.norm(mask, axis=2) + size # (h, w, 2) -> (h, w), O(h*w)
101 |
102 | td = np.exp(-thickness * d)
103 |
104 | for cc in range(c):
105 | fogged_img[..., cc] = fogged_img[..., cc] * td + brightness*(1-td)
106 |
107 | # a = np.linalg.norm(mask, axis=2)
108 | # print(f'size: {fogged_img.shape} a: {a} max: {np.max(fogged_img)} {np.min(fogged_img)}')
109 |
110 | fogged_img = np.clip(fogged_img, 0, 1) # 解决黑白噪点的问题
111 | # print(f'mask: {mask[:, :, 1]} {mask.shape}')
112 | # print(f'd: {d} {d.shape}')
113 |
114 | return fogged_img
115 |
116 | def get_mask(self, h, w, hc, wc, pixel=True):
117 | mask = np.zeros((h, w, 2), dtype=np.float32)
118 | if pixel:
119 | mask[:, :, 0] = np.repeat(np.arange(h).reshape((h, 1)), w, axis=1) - hc # loop o(h)
120 | mask[:, :, 1] = np.repeat(np.arange(w).reshape((1, w)), h, axis=0) - wc # loop o(w)
121 | else:
122 | mask[:, :, 0] = np.repeat(np.linspace(0, 1, h).reshape(h, 1), w, axis=1) - 0.5
123 | mask[:, :, 1] = np.repeat(np.linspace(0, 1, w).reshape((1, w)), h, axis=0) - 0.5
124 | return mask
125 |
126 |
127 | # if __name__ == '__main__':
128 | # parser = argparse.ArgumentParser(prog='synthetic_fog.py')
129 | # parser.add_argument('--speed_up', action='store_true', default=False, help='matrix optimization')
130 | # parser.add_argument('--source', type=str, default= 'data/SFID_demo/images/train/001040.jpg', help='source img path')
131 | # parser.add_argument('--save-dir', type=str, default=None, help='output img path')
132 | # opt = parser.parse_args()
133 | # print(opt)
134 | # synf = SyntheticFog()
135 | # synf(opt.speed_up,opt.source, opt.save_dir)
136 |
137 |
138 | if __name__ == '__main__':
139 | parser = argparse.ArgumentParser(prog='synthetic_fog.py')
140 | parser.add_argument('--speed_up', action='store_true', default=True, help='matrix optimization')
141 | parser.add_argument('--source', type=str, default= r'D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\datasets\images/', help='source img path')
142 | parser.add_argument('--save-dir', type=str, default=r"D:\desk\yolov5\insulator_defect\InsulatorDataSet-master\datasets\foggy_img/", help='output img path')
143 | opt = parser.parse_args()
144 | print(opt)
145 | synf = SyntheticFog()
146 |
147 | # Check if source is a directory
148 | if os.path.isdir(opt.source):
149 | # Iterate over every image in the source directory
150 | for img_file in os.listdir(opt.source):
151 | img_path = os.path.join(opt.source, img_file)
152 |
153 | # Check if it's a file
154 | if os.path.isfile(img_path):
155 | # Apply fog to the image
156 | synf(opt.speed_up, img_path, opt.save_dir)
157 | else:
158 | print("Provided source is not a valid directory!")
159 |
160 |
161 |
--------------------------------------------------------------------------------
/dataset_prepare/use.md:
--------------------------------------------------------------------------------
1 | # 数据集准备相关代码文件
2 | 本文件夹主要是制作电网线路绝缘子与鸟巢检测的数据集所用到的相关代码,包括雾化,yolo格式的带标注框旋转,随机裁剪,划分数据集
3 |
--------------------------------------------------------------------------------
/label_format_conversion/coco_split_trainVal.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import shutil
4 | json_dir="demo/coco/annotations/annotations.json"
5 | with open(json_dir) as f:
6 | json_file = json.load(f)
7 | print('所有图片的数量:', len(json_file['images']))
8 | print('所有标注的数量:', len(json_file['annotations']))
9 |
10 |
11 | def get_key(images, image_id):
12 | for image in images:
13 | if image["id"] == image_id: # 根据anno的id反推图像的名称
14 | return image["file_name"]
15 |
16 | background=[]
17 | obj=[]
18 | # read box info for csv format
19 | annotations = json_file['annotations']
20 | images = json_file['images']
21 |
22 | all_images=[]
23 | for image in images:
24 | all_images.append(image["file_name"])
25 |
26 | for annotation in annotations:
27 | key = annotation["image_id"] # 图像的名字
28 | im_id=get_key(images,key)
29 | if im_id not in obj:
30 | obj.append(im_id)
31 |
32 | #value = annotation["bbox"] + annotation["category_id"]
33 |
34 | #删除背景图像
35 | print('原始图像数量:', len(images))
36 |
37 | print('有标注的图像数量:', len(obj))
38 |
39 | for img in images:
40 | if img["file_name"] not in obj:
41 | background.append(img)
42 |
43 | for i in background:
44 | images.remove(i)
45 | print('删除背景后的图像数量',len(images))#
46 | #根据obj筛选图片
47 | image_dir='demo/coco/images'
48 | #dst_dir='/home/limzero/clear_images'
49 | #for name in obj:
50 | #shutil.copy(os.path.join(image_dir,name),os.path.join(dst_dir,name))
51 |
52 | json_file['images']=images
53 | with open('demo/coco/annotations/annotations_washed.json', 'w') as f:
54 | json.dump(json_file, f)
55 |
56 | #分割训练集和验证集
57 | import random
58 | val = random.sample(obj, int(len(images)*0.1))
59 | train=[]
60 | for o in obj:
61 | if o not in val:
62 | train.append(o)
63 |
64 | #
65 | train_dir='demo/coco/train2017'
66 | val_dir='demo/coco/val2017'
67 | if not os.path.exists(train_dir):
68 | os.makedirs(train_dir)
69 | if not os.path.exists(val_dir):
70 | os.makedirs(val_dir)
71 | for v in val:
72 | shutil.copy(os.path.join(image_dir,v),os.path.join(val_dir,v))
73 | for t in train:
74 | shutil.copy(os.path.join(image_dir,t),os.path.join(train_dir,t))
75 |
76 |
77 | #annotations
78 |
79 | val_images=images[:]
80 | train_images=images[:]
81 | val_annotations=annotations[:]
82 | train_annotations=annotations[:]
83 |
84 | print('images:',len(images),'val:',len(val),'train',len(train))
85 | c=0
86 | for img in images:
87 | if img['file_name'] in train:
88 | c=c+1
89 | val_images.remove(img)
90 | else:
91 | train_images.remove(img)
92 | print('len(images):',len(images))
93 | print("c:",c)
94 | print('val_images:',len(val_images),'train_images:',len(train_images))
95 |
96 | def get_id(images,name):
97 | for image in images:
98 | if image['file_name']==name:
99 | return image['id']
100 | for t in train:
101 | id=get_id(images,t)
102 | for ann in annotations:
103 | if ann['image_id']==id:
104 | val_annotations.remove(ann)
105 | for v in val:
106 | id=get_id(images,v)
107 | for ann in annotations:
108 | if ann['image_id']==id:
109 | train_annotations.remove(ann)
110 | print('train_ann:',len(train_annotations),'val_ann:',len(val_annotations))
111 |
112 | json_train=json_file.copy()
113 | json_val=json_file.copy()
114 | json_train['images']=train_images
115 | json_train['annotations']=train_annotations
116 | json_val['images']=val_images
117 | json_val['annotations']=val_annotations
118 |
119 | #reindex
120 | for idx in range(len(json_train['annotations'])):
121 | json_train['annotations'][idx]['id'] = idx
122 |
123 | for idx in range(len(json_val['annotations'])):
124 | json_val['annotations'][idx]['id'] = idx
125 |
126 | #write in json file
127 | with open('demo/coco/annotations/train2017.json', 'w') as f:
128 | json.dump(json_train, f)
129 |
130 | with open('demo/coco/annotations/val2017.json', 'w') as f:
131 | json.dump(json_val, f)
132 |
133 |
134 |
--------------------------------------------------------------------------------
/label_format_conversion/coco_visulize.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cv2
3 |
4 | from pycocotools.coco import COCO
5 |
6 | json_file = '/home/trojanjet/baidu_qyl/tianma/detect/mmdetection/data/coco/annotations/instances_val2017.json'
7 | dataset_dir = '/home/trojanjet/baidu_qyl/tianma/detect/mmdetection/data/coco/val2017/'
8 | coco = COCO(json_file)
9 | imgIds = coco.getImgIds() #
10 | for i in range(len(imgIds)):
11 | img = coco.loadImgs(imgIds[i])[0]
12 | image = cv2.imread(dataset_dir + img['file_name'])
13 | annIds = coco.getAnnIds(imgIds=img['id'])
14 | annos = coco.loadAnns(annIds)
15 | for ann in annos:
16 | bbox = ann['bbox']
17 | x, y, w, h = bbox
18 | anno_image = cv2.rectangle(image, (int(x), int(y)), (int(x + w), int(y + h)), (0, 255, 255), 2)
19 | cv2.imwrite('demo.jpg', anno_image)
20 | break
21 |
22 |
--------------------------------------------------------------------------------
/label_format_conversion/generate_persudo_json.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | import sys
4 | import os
5 | import codecs
6 | import cv2
7 | import json
8 | underwater_classes = ['holothurian', 'echinus', 'scallop', 'starfish']
9 | #!/usr/bin/env python
10 | # -*- coding: utf-8 -*-
11 | import os
12 | # 批量重命名文件
13 |
14 |
15 | def interpr_json():
16 | test_json_raw = json.load(open("../../data/train/annotations/testA.json", "r"))
17 | test_json = json.load(open("../../results/cas_r50.bbox.json" , "r"))
18 | img_dir='../../data/test-A-image'
19 | root = '../../data/persudo/'
20 | img = test_json_raw['images']
21 | images = []
22 | imgid2anno = {}
23 | imgid2name = {}
24 | for imageinfo in test_json_raw['images']:
25 | imgid = imageinfo['id']
26 | imgid2name[imgid] = imageinfo['file_name']
27 | for anno in test_json:
28 | img_id = anno['image_id']
29 | if img_id not in imgid2anno:
30 | imgid2anno[img_id] = []
31 | imgid2anno[img_id].append(anno)
32 | for imgid, annos in imgid2anno.items():
33 | image_name = imgid2name[imgid]
34 | image_id = image_name.split('.')[0]
35 | image_path = os.path.join(img_dir, image_id + '.jpg')
36 | img = cv2.imread(image_path)
37 | height, width ,depth= img.shape
38 | with codecs.open(root+ image_id + '_test.xml', 'w', 'utf-8') as xml:
39 | xml.write('\n')
40 | xml.write('\t' + image_id + '_test' + '\n')
41 | xml.write('\t\n')
42 | xml.write('\t\t' + str(width) + '\n')
43 | xml.write('\t\t' + str(height) + '\n')
44 | xml.write('\t\t' + str(depth) + '\n')
45 | xml.write('\t\n')
46 | cnt=0
47 | for anno in annos:
48 | xmin, ymin, w, h = anno['bbox']
49 | xmax = xmin + w
50 | ymax = ymin + h
51 | xmin = int(xmin)
52 | ymin = int(ymin)
53 | xmax = int(xmax)
54 | ymax = int(ymax)
55 | confidence = anno['score']
56 | class_id = int(anno['category_id'])
57 | class_name = underwater_classes[class_id - 1]
58 | image_name = imgid2name[imgid]
59 | image_id = image_name.split('.')[0]
60 | #
61 | if cnt==0:
62 | xml.write('\t\n')
71 | cnt+=1
72 | if confidence>0.4:
73 | cnt+=1
74 | xml.write('\t\n')
83 | assert cnt>0
84 | xml.write('')
85 |
86 | interpr_json()
87 |
--------------------------------------------------------------------------------
/label_format_conversion/make_voc.py:
--------------------------------------------------------------------------------
1 | import os
2 | import xml
3 | import json
4 | import codecs
5 | import cv2
6 | import shutil
7 | from config import Config
8 |
9 | obstacles_classes = ['施工围挡', '路障', '锥桶', '告示牌1','告示牌2','施工痕迹','施工机械','工地正门']
10 | opt=Config()
11 | rawImgDir=opt.raw_data_dir
12 | rawLabelDir=opt.raw_json
13 | anno_dir='../demo/voc/annotations/'
14 | image_dir='../demo/voc/JPEGImages'
15 | if not os.path.exists(anno_dir):
16 | os.makedirs(anno_dir)
17 | if not os.path.exists(image_dir):
18 | os.makedirs(image_dir)
19 | with open(rawLabelDir) as f:
20 | d=json.load(f)
21 | #
22 | annos=d['annotations']
23 | for anno in annos:
24 | status=anno['status']
25 | frames=anno['frames']
26 | imgId = anno['id']
27 | if status==3:
28 | for frame in frames:
29 | if 'obstacles' not in frame:
30 | continue
31 | obstacles=frame['obstacles']
32 | bboxs=[item['bbox'] for item in obstacles]
33 | frame_name=frame['frame_name']
34 | imgId_frame_name=imgId+'_'+frame_name
35 | image_path=os.path.join(rawImgDir, imgId, frame_name)
36 | shutil.copy(os.path.join(rawImgDir, imgId, frame_name), os.path.join(image_dir, imgId_frame_name))
37 | img = cv2.imread(image_path)
38 | height, width, depth = img.shape
39 | with codecs.open(anno_dir + imgId_frame_name[:-4] + '.xml', 'w', 'utf-8') as xml:
40 | xml.write('\n')
41 | xml.write('\t' + imgId_frame_name + '\n')
42 | xml.write('\t\n')
43 | xml.write('\t\t' + str(width) + '\n')
44 | xml.write('\t\t' + str(height) + '\n')
45 | xml.write('\t\t' + str(depth) + '\n')
46 | xml.write('\t\n')
47 | cnt = 0
48 | for bbox in bboxs:
49 | xmin, ymin, xmax, ymax = bbox
50 | class_name = 'obstacles'
51 | #
52 | xml.write('\t\n')
61 | cnt += 1
62 | assert cnt > 0
63 | xml.write('')
64 |
--------------------------------------------------------------------------------
/label_format_conversion/readme.md:
--------------------------------------------------------------------------------
1 | # 目标检测标注格式转换
2 | 文件夹内主要是voc格式标注的.xml文件与yolo标注的txt格式相互转换
3 |
--------------------------------------------------------------------------------
/label_format_conversion/voc_split_trainVal.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import sys
4 |
5 |
6 | root_path = 'demo/voc'
7 |
8 | xmlfilepath = root_path + '/Annotations'
9 |
10 | txtsavepath = root_path + '/ImageSets/Main'
11 |
12 |
13 | if not os.path.exists(txtsavepath):
14 | os.makedirs(txtsavepath)
15 |
16 | trainval_percent = 1
17 | train_percent = 0.9
18 | total_xml = os.listdir(xmlfilepath)
19 | num = len(total_xml)
20 | list = range(num)
21 | tv = int(num * trainval_percent)
22 | tr = int(tv * train_percent)
23 | trainval = random.sample(list, tv)
24 | train = random.sample(trainval, tr)
25 |
26 | print("train and val size:", tv)
27 | print("train size:", tr)
28 |
29 | ftrainval = open(txtsavepath + '/trainval.txt', 'w')
30 | ftest = open(txtsavepath + '/test.txt', 'w')
31 | ftrain = open(txtsavepath + '/train.txt', 'w')
32 | fval = open(txtsavepath + '/val.txt', 'w')
33 |
34 | for i in list:
35 | name = total_xml[i][:-4] + '\n'
36 | if i in trainval:
37 | ftrainval.write(name)
38 | if i in train:
39 | ftrain.write(name)
40 | else:
41 | fval.write(name)
42 | else:
43 | ftest.write(name)
44 |
45 | ftrainval.close()
46 | ftrain.close()
47 | fval.close()
48 | ftest.close()
49 |
--------------------------------------------------------------------------------
/label_format_conversion/voc_to_coco_v1.py:
--------------------------------------------------------------------------------
1 | # -*- coding=utf-8 -*-
2 | #!/usr/bin/python
3 |
4 | import sys
5 | import os
6 | import shutil
7 | import numpy as np
8 | import json
9 | import xml.etree.ElementTree as ET
10 | import mmcv
11 | # 检测框的ID起始值
12 | START_BOUNDING_BOX_ID = 1
13 | # 类别列表无必要预先创建,程序中会根据所有图像中包含的ID来创建并更新
14 | PRE_DEFINE_CATEGORIES = {}
15 | # If necessary, pre-define category and its id
16 | # PRE_DEFINE_CATEGORIES = {"aeroplane": 1, "bicycle": 2, "bird": 3, "boat": 4,
17 | # "bottle":5, "bus": 6, "car": 7, "cat": 8, "chair": 9,
18 | # "cow": 10, "diningtable": 11, "dog": 12, "horse": 13,
19 | # "motorbike": 14, "person": 15, "pottedplant": 16,
20 | # "sheep": 17, "sofa": 18, "train": 19, "tvmonitor": 20}
21 |
22 |
23 | def get(root, name):
24 | vars = root.findall(name)
25 | return vars
26 |
27 |
28 | def get_and_check(root, name, length):
29 | vars = root.findall(name)
30 | if len(vars) == 0:
31 | raise NotImplementedError('Can not find %s in %s.'%(name, root.tag))
32 | if length > 0 and len(vars) != length:
33 | raise NotImplementedError('The size of %s is supposed to be %d, but is %d.'%(name, length, len(vars)))
34 | if length == 1:
35 | vars = vars[0]
36 | return vars
37 |
38 |
39 |
40 | def convert(xml_list, xml_dir, json_file):
41 | '''
42 | :param xml_list: 需要转换的XML文件列表
43 | :param xml_dir: XML的存储文件夹
44 | :param json_file: 导出json文件的路径
45 | :return: None
46 | '''
47 | list_fp = xml_list
48 | image_id=1
49 | # 标注基本结构
50 | json_dict = {"images":[],
51 | "type": "instances",
52 | "annotations": [],
53 | "categories": []}
54 | categories = PRE_DEFINE_CATEGORIES
55 | bnd_id = START_BOUNDING_BOX_ID
56 | for line in list_fp:
57 | line = line.strip()
58 | print(" Processing {}".format(line))
59 | # 解析XML
60 | xml_f = os.path.join(xml_dir, line)
61 | tree = ET.parse(xml_f)
62 | root = tree.getroot()
63 | filename = root.find('filename').text
64 | # 取出图片名字
65 | image_id+=1
66 | size = get_and_check(root, 'size', 1)
67 | # 图片的基本信息
68 | width = int(get_and_check(size, 'width', 1).text)
69 | height = int(get_and_check(size, 'height', 1).text)
70 | image = {'file_name': filename,
71 | 'height': height,
72 | 'width': width,
73 | 'id':image_id}
74 | json_dict['images'].append(image)
75 | # 处理每个标注的检测框
76 | for obj in get(root, 'object'):
77 | # 取出检测框类别名称
78 | category = get_and_check(obj, 'name', 1).text
79 | # 更新类别ID字典
80 | if category not in categories:
81 | new_id = len(categories)
82 | categories[category] = new_id
83 | category_id = categories[category]
84 | bndbox = get_and_check(obj, 'bndbox', 1)
85 | xmin = int(get_and_check(bndbox, 'xmin', 1).text) - 1
86 | ymin = int(get_and_check(bndbox, 'ymin', 1).text) - 1
87 | xmax = int(get_and_check(bndbox, 'xmax', 1).text)
88 | ymax = int(get_and_check(bndbox, 'ymax', 1).text)
89 | assert(xmax > xmin)
90 | assert(ymax > ymin)
91 | o_width = abs(xmax - xmin)
92 | o_height = abs(ymax - ymin)
93 | annotation = dict()
94 | annotation['area'] = o_width*o_height
95 | annotation['iscrowd'] = 0
96 | annotation['image_id'] = image_id
97 | annotation['bbox'] = [xmin, ymin, o_width, o_height]
98 | annotation['category_id'] = category_id
99 | annotation['id'] = bnd_id
100 | annotation['ignore'] = 0
101 | # 设置分割数据,点的顺序为逆时针方向
102 | annotation['segmentation'] = [[xmin,ymin,xmin,ymax,xmax,ymax,xmax,ymin]]
103 |
104 | json_dict['annotations'].append(annotation)
105 | bnd_id = bnd_id + 1
106 |
107 | # 写入类别ID字典
108 | for cate, cid in categories.items():
109 | cat = {'supercategory': 'none', 'id': cid, 'name': cate}
110 | json_dict['categories'].append(cat)
111 | # 导出到json
112 | #mmcv.dump(json_dict, json_file)
113 | print(type(json_dict))
114 | json_data = json.dumps(json_dict)
115 | with open(json_file, 'w') as w:
116 | w.write(json_data)
117 |
118 |
119 | if __name__ == '__main__':
120 | root_path = './demo'
121 |
122 | if not os.path.exists(os.path.join(root_path,'coco/annotations')):
123 | os.makedirs(os.path.join(root_path,'coco/annotations'))
124 | if not os.path.exists(os.path.join(root_path, 'coco/train2014')):
125 | os.makedirs(os.path.join(root_path, 'coco/train2014'))
126 | if not os.path.exists(os.path.join(root_path, 'coco/val2014')):
127 | os.makedirs(os.path.join(root_path, 'coco/val2014'))
128 | xml_dir = os.path.join(root_path,'voc/Annotations') #已知的voc的标注
129 |
130 | xml_labels = os.listdir(xml_dir)
131 | np.random.shuffle(xml_labels)
132 | split_point = int(len(xml_labels)/10)
133 |
134 | # validation data
135 | xml_list = xml_labels[0:split_point]
136 | json_file = os.path.join(root_path,'coco/annotations/instances_val2014.json')
137 | convert(xml_list, xml_dir, json_file)
138 | for xml_file in xml_list:
139 | img_name = xml_file[:-4] + '.jpg'
140 | shutil.copy(os.path.join(root_path, 'voc/JPEGImages', img_name),
141 | os.path.join(root_path, 'coco/val2014', img_name))
142 | # train data
143 | xml_list = xml_labels[split_point:]
144 | json_file = os.path.join(root_path,'coco/annotations/instances_train2014.json')
145 | convert(xml_list, xml_dir, json_file)
146 | for xml_file in xml_list:
147 | img_name = xml_file[:-4] + '.jpg'
148 | shutil.copy(os.path.join(root_path, 'voc/JPEGImages', img_name),
149 | os.path.join(root_path, 'coco/train2014', img_name))
150 |
--------------------------------------------------------------------------------
/label_format_conversion/voc_to_coco_v2.py:
--------------------------------------------------------------------------------
1 |
2 | import os.path as osp
3 | import xml.etree.ElementTree as ET
4 |
5 | import mmcv
6 | import os
7 |
8 | from glob import glob
9 | from tqdm import tqdm
10 | from PIL import Image
11 | def object_classes():#这里定义了自己的数据集的目标类别
12 | return ['window_shielding', 'multi_signs', 'non_traffic_sign']
13 | label_ids = {name: i + 1 for i, name in enumerate(object_classes())}
14 | print(label_ids)
15 |
16 | def get_segmentation(points):
17 |
18 | return [points[0], points[1], points[2] + points[0], points[1],
19 | points[2] + points[0], points[3] + points[1], points[0], points[3] + points[1]]
20 |
21 |
22 | def parse_xml(xml_path, img_id, anno_id):
23 | tree = ET.parse(xml_path)
24 | root = tree.getroot()
25 | annotation = []
26 | for obj in root.findall('object'):
27 | name = obj.find('name').text
28 | if name == 'xxx':#当要忽略某一个类别时
29 | continue
30 | category_id = label_ids[name]
31 | bnd_box = obj.find('bndbox')
32 | xmin = int(bnd_box.find('xmin').text)
33 | ymin = int(bnd_box.find('ymin').text)
34 | xmax = int(bnd_box.find('xmax').text)
35 | ymax = int(bnd_box.find('ymax').text)
36 | w = xmax - xmin + 1
37 | h = ymax - ymin + 1
38 | area = w*h
39 | segmentation = get_segmentation([xmin, ymin, w, h])
40 | annotation.append({
41 | "segmentation": segmentation,
42 | "area": area,
43 | "iscrowd": 0,
44 | "image_id": img_id,
45 | "bbox": [xmin, ymin, w, h],
46 | "category_id": category_id,
47 | "id": anno_id,
48 | "ignore": 0})
49 | anno_id += 1
50 | return annotation, anno_id
51 |
52 |
53 | def cvt_annotations(img_path, xml_path, out_file):
54 | images = []
55 | annotations = []
56 |
57 | # xml_paths = glob(xml_path + '/*.xml')
58 | img_id = 1
59 | anno_id = 1
60 | for img_path in tqdm(glob(img_path + '/*.jpg')):
61 | w, h = Image.open(img_path).size
62 | img_name = osp.basename(img_path)
63 | img = {"file_name": img_name, "height": int(h), "width": int(w), "id": img_id}
64 | images.append(img)
65 |
66 | xml_file_name = img_name.split('.')[0] + '.xml'
67 | xml_file_path = osp.join(xml_path, xml_file_name)
68 | annos, anno_id = parse_xml(xml_file_path, img_id, anno_id)
69 | annotations.extend(annos)
70 | img_id += 1
71 |
72 | categories = []
73 | for k,v in label_ids.items():
74 | categories.append({"name": k, "id": v})
75 | final_result = {"images": images, "annotations": annotations, "categories": categories}
76 | mmcv.dump(final_result, out_file)
77 | return annotations
78 |
79 |
80 | def main():
81 |
82 | xml_path = 'demo/voc/Annotations'
83 | img_path = 'demo/voc/JPEGImages'
84 | print('processing {} ...'.format("xml format annotations"))
85 | cvt_annotations(img_path, xml_path, 'demo/coco/annotations/annotations.json')
86 | print('Done!')
87 |
88 |
89 | if __name__ == '__main__':
90 | root_path='./demo'
91 | if not os.path.exists(os.path.join(root_path,'coco/annotations')):
92 | os.makedirs(os.path.join(root_path,'coco/annotations'))
93 | main()
94 |
--------------------------------------------------------------------------------
/label_format_conversion/voc_to_yoloV3.py:
--------------------------------------------------------------------------------
1 | import xml.etree.ElementTree as ET
2 | import os
3 | import cv2
4 | classes = ['window_shielding', 'multi_signs', 'non_traffic_sign']
5 |
6 | def convert_annotation(image_id):
7 | in_file = open('demo/voc/Annotations/%s.xml' % image_id)
8 |
9 | if not os.path.exists('demo/yolov3/custom/labels/'):
10 | os.makedirs('demo/yolov3/custom/labels/')
11 | out_file_img = open('demo/yolov3/custom/trainval.txt', 'a') # 生成txt格式文件
12 |
13 | out_file_label = open('demo/yolov3/custom/labels/%s.txt' % image_id,'a') # 生成txt格式文件
14 |
15 | tree = ET.parse(in_file)
16 | root = tree.getroot()
17 | size = root.find('size')
18 | voc_img_dir='demo/voc/JPEGImages/{}.jpg'.format(image_id)
19 | out_file_img.write(voc_img_dir)
20 | out_file_img.write("\n")
21 | img=cv2.imread(voc_img_dir)
22 | dh = 1. / img.shape[0]
23 | dw = 1. / img.shape[1]
24 | cnt=len(root.findall('object'))
25 | if cnt==0:
26 | print('nulll null null.....')
27 | print(image_id)
28 | cc=0
29 | for obj in root.iter('object'):
30 | cc+=1
31 | cls = obj.find('name').text
32 | if cls not in classes:
33 | continue
34 | cls_id = classes.index(cls)
35 | xmlbox = obj.find('bndbox')
36 | if dw*float(xmlbox.find('xmin').text)<0. or dw*float(xmlbox.find('xmax').text)<0. or dh*float(xmlbox.find('ymin').text)<0. or dh*float(xmlbox.find('ymax').text)<0.:
37 | print(image_id)
38 |
39 | b = (dw*float(xmlbox.find('xmin').text), dw*float(xmlbox.find('xmax').text), dh*float(xmlbox.find('ymin').text),
40 | dh*float(xmlbox.find('ymax').text))
41 | out_file_label.write(str(cls_id)+ " " + str((b[0]+b[1])/2) + " " + str((b[2]+b[3])/2) + " " + str(b[1]-b[0]) + " " + str(b[3]-b[2]))
42 | if cc=1:
27 | w=0.99
28 | if h>=1:
29 | h=0.99
30 | return (x,y,w,h)
31 |
32 | def convert_annotation(rootpath,xmlname):
33 | xmlpath = rootpath + '/Annotations'
34 | xmlfile = os.path.join(xmlpath,xmlname)
35 | with open(xmlfile, "r", encoding='UTF-8') as in_file:
36 | txtname = xmlname[:-4]+'.txt'
37 | print(txtname)
38 | txtpath = rootpath + '/worktxt'#生成的.txt文件会被保存在worktxt目录下
39 | if not os.path.exists(txtpath):
40 | os.makedirs(txtpath)
41 | txtfile = os.path.join(txtpath,txtname)
42 | with open(txtfile, "w+" ,encoding='UTF-8') as out_file:
43 | tree=ET.parse(in_file)
44 | root = tree.getroot()
45 | size = root.find('size')
46 | w = int(size.find('width').text)
47 | h = int(size.find('height').text)
48 | out_file.truncate()
49 | for obj in root.iter('object'):
50 | difficult = obj.find('difficult').text
51 | cls = obj.find('name').text
52 | if cls not in classes or int(difficult)==1:
53 | continue
54 | cls_id = classes.index(cls)
55 | xmlbox = obj.find('bndbox')
56 | b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
57 | bb = convert((w,h), b)
58 | out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
59 |
60 |
61 | if __name__ == "__main__":
62 | rootpath='demo/voc/'
63 | xmlpath=rootpath+'/Annotations'
64 | list=os.listdir(xmlpath)
65 | for i in range(0,len(list)) :
66 | path = os.path.join(xmlpath,list[i])
67 | if ('.xml' in path)or('.XML' in path):
68 | convert_annotation(rootpath,list[i])
69 | print('done', i)
70 | else:
71 | print('not xml file',i)
72 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcd955/Insulator_defect-nest_detection/f01a20bfebaca6af3c9e6d68d15a6bc17af18916/models/__init__.py
--------------------------------------------------------------------------------
/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcd955/Insulator_defect-nest_detection/f01a20bfebaca6af3c9e6d68d15a6bc17af18916/models/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/models/__pycache__/common.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcd955/Insulator_defect-nest_detection/f01a20bfebaca6af3c9e6d68d15a6bc17af18916/models/__pycache__/common.cpython-310.pyc
--------------------------------------------------------------------------------
/models/__pycache__/experimental.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcd955/Insulator_defect-nest_detection/f01a20bfebaca6af3c9e6d68d15a6bc17af18916/models/__pycache__/experimental.cpython-310.pyc
--------------------------------------------------------------------------------
/models/__pycache__/yolo.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcd955/Insulator_defect-nest_detection/f01a20bfebaca6af3c9e6d68d15a6bc17af18916/models/__pycache__/yolo.cpython-310.pyc
--------------------------------------------------------------------------------
/models/experimental.py:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 | """
3 | Experimental modules
4 | """
5 | import math
6 |
7 | import numpy as np
8 | import torch
9 | import torch.nn as nn
10 |
11 | from utils.downloads import attempt_download
12 |
13 |
14 | class Sum(nn.Module):
15 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
16 | def __init__(self, n, weight=False): # n: number of inputs
17 | super().__init__()
18 | self.weight = weight # apply weights boolean
19 | self.iter = range(n - 1) # iter object
20 | if weight:
21 | self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights
22 |
23 | def forward(self, x):
24 | y = x[0] # no weight
25 | if self.weight:
26 | w = torch.sigmoid(self.w) * 2
27 | for i in self.iter:
28 | y = y + x[i + 1] * w[i]
29 | else:
30 | for i in self.iter:
31 | y = y + x[i + 1]
32 | return y
33 |
34 |
35 | class MixConv2d(nn.Module):
36 | # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
37 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch_out, kernel, stride, ch_strategy
38 | super().__init__()
39 | n = len(k) # number of convolutions
40 | if equal_ch: # equal c_ per group
41 | i = torch.linspace(0, n - 1E-6, c2).floor() # c2 indices
42 | c_ = [(i == g).sum() for g in range(n)] # intermediate channels
43 | else: # equal weight.numel() per group
44 | b = [c2] + [0] * n
45 | a = np.eye(n + 1, n, k=-1)
46 | a -= np.roll(a, 1, axis=1)
47 | a *= np.array(k) ** 2
48 | a[0] = 1
49 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
50 |
51 | self.m = nn.ModuleList([
52 | nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)])
53 | self.bn = nn.BatchNorm2d(c2)
54 | self.act = nn.SiLU()
55 |
56 | def forward(self, x):
57 | return self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
58 |
59 |
60 | class Ensemble(nn.ModuleList):
61 | # Ensemble of models
62 | def __init__(self):
63 | super().__init__()
64 |
65 | def forward(self, x, augment=False, profile=False, visualize=False):
66 | y = [module(x, augment, profile, visualize)[0] for module in self]
67 | # y = torch.stack(y).max(0)[0] # max ensemble
68 | # y = torch.stack(y).mean(0) # mean ensemble
69 | y = torch.cat(y, 1) # nms ensemble
70 | return y, None # inference, train output
71 |
72 |
73 | def attempt_load(weights, device=None, inplace=True, fuse=True):
74 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
75 | from models.yolo import Detect, Model
76 |
77 | model = Ensemble()
78 | for w in weights if isinstance(weights, list) else [weights]:
79 | ckpt = torch.load(attempt_download(w), map_location='cpu') # load
80 | ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model
81 |
82 | # Model compatibility updates
83 | if not hasattr(ckpt, 'stride'):
84 | ckpt.stride = torch.tensor([32.])
85 | if hasattr(ckpt, 'names') and isinstance(ckpt.names, (list, tuple)):
86 | ckpt.names = dict(enumerate(ckpt.names)) # convert to dict
87 |
88 | model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval()) # model in eval mode
89 |
90 | # Module updates
91 | for m in model.modules():
92 | t = type(m)
93 | if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model):
94 | m.inplace = inplace
95 | if t is Detect and not isinstance(m.anchor_grid, list):
96 | delattr(m, 'anchor_grid')
97 | setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl)
98 | elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'):
99 | m.recompute_scale_factor = None # torch 1.11.0 compatibility
100 |
101 | # Return model
102 | if len(model) == 1:
103 | return model[-1]
104 |
105 | # Return detection ensemble
106 | print(f'Ensemble created with {weights}\n')
107 | for k in 'names', 'nc', 'yaml':
108 | setattr(model, k, getattr(model[0], k))
109 | model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride
110 | assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}'
111 | return model
112 |
--------------------------------------------------------------------------------
/models/hub/anchors.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 | # Default anchors for COCO data
3 |
4 |
5 | # P5 -------------------------------------------------------------------------------------------------------------------
6 | # P5-640:
7 | anchors_p5_640:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 |
13 | # P6 -------------------------------------------------------------------------------------------------------------------
14 | # P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387
15 | anchors_p6_640:
16 | - [9,11, 21,19, 17,41] # P3/8
17 | - [43,32, 39,70, 86,64] # P4/16
18 | - [65,131, 134,130, 120,265] # P5/32
19 | - [282,180, 247,354, 512,387] # P6/64
20 |
21 | # P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
22 | anchors_p6_1280:
23 | - [19,27, 44,40, 38,94] # P3/8
24 | - [96,68, 86,152, 180,137] # P4/16
25 | - [140,301, 303,264, 238,542] # P5/32
26 | - [436,615, 739,380, 925,792] # P6/64
27 |
28 | # P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187
29 | anchors_p6_1920:
30 | - [28,41, 67,59, 57,141] # P3/8
31 | - [144,103, 129,227, 270,205] # P4/16
32 | - [209,452, 455,396, 358,812] # P5/32
33 | - [653,922, 1109,570, 1387,1187] # P6/64
34 |
35 |
36 | # P7 -------------------------------------------------------------------------------------------------------------------
37 | # P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372
38 | anchors_p7_640:
39 | - [11,11, 13,30, 29,20] # P3/8
40 | - [30,46, 61,38, 39,92] # P4/16
41 | - [78,80, 146,66, 79,163] # P5/32
42 | - [149,150, 321,143, 157,303] # P6/64
43 | - [257,402, 359,290, 524,372] # P7/128
44 |
45 | # P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818
46 | anchors_p7_1280:
47 | - [19,22, 54,36, 32,77] # P3/8
48 | - [70,83, 138,71, 75,173] # P4/16
49 | - [165,159, 148,334, 375,151] # P5/32
50 | - [334,317, 251,626, 499,474] # P6/64
51 | - [750,326, 534,814, 1079,818] # P7/128
52 |
53 | # P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227
54 | anchors_p7_1920:
55 | - [29,34, 81,55, 47,115] # P3/8
56 | - [105,124, 207,107, 113,259] # P4/16
57 | - [247,238, 222,500, 563,227] # P5/32
58 | - [501,476, 376,939, 749,711] # P6/64
59 | - [1126,489, 801,1222, 1618,1227] # P7/128
60 |
--------------------------------------------------------------------------------
/models/hub/yolov3-spp.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.0 # model depth multiple
6 | width_multiple: 1.0 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # darknet53 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [32, 3, 1]], # 0
16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17 | [-1, 1, Bottleneck, [64]],
18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19 | [-1, 2, Bottleneck, [128]],
20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21 | [-1, 8, Bottleneck, [256]],
22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23 | [-1, 8, Bottleneck, [512]],
24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25 | [-1, 4, Bottleneck, [1024]], # 10
26 | ]
27 |
28 | # YOLOv3-SPP head
29 | head:
30 | [[-1, 1, Bottleneck, [1024, False]],
31 | [-1, 1, SPP, [512, [5, 9, 13]]],
32 | [-1, 1, Conv, [1024, 3, 1]],
33 | [-1, 1, Conv, [512, 1, 1]],
34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
35 |
36 | [-2, 1, Conv, [256, 1, 1]],
37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4
39 | [-1, 1, Bottleneck, [512, False]],
40 | [-1, 1, Bottleneck, [512, False]],
41 | [-1, 1, Conv, [256, 1, 1]],
42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
43 |
44 | [-2, 1, Conv, [128, 1, 1]],
45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3
47 | [-1, 1, Bottleneck, [256, False]],
48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
49 |
50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
51 | ]
52 |
--------------------------------------------------------------------------------
/models/hub/yolov3-tiny.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.0 # model depth multiple
6 | width_multiple: 1.0 # layer channel multiple
7 | anchors:
8 | - [10,14, 23,27, 37,58] # P4/16
9 | - [81,82, 135,169, 344,319] # P5/32
10 |
11 | # YOLOv3-tiny backbone
12 | backbone:
13 | # [from, number, module, args]
14 | [[-1, 1, Conv, [16, 3, 1]], # 0
15 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
16 | [-1, 1, Conv, [32, 3, 1]],
17 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
18 | [-1, 1, Conv, [64, 3, 1]],
19 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
20 | [-1, 1, Conv, [128, 3, 1]],
21 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
22 | [-1, 1, Conv, [256, 3, 1]],
23 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
24 | [-1, 1, Conv, [512, 3, 1]],
25 | [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
26 | [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
27 | ]
28 |
29 | # YOLOv3-tiny head
30 | head:
31 | [[-1, 1, Conv, [1024, 3, 1]],
32 | [-1, 1, Conv, [256, 1, 1]],
33 | [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
34 |
35 | [-2, 1, Conv, [128, 1, 1]],
36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 | [[-1, 8], 1, Concat, [1]], # cat backbone P4
38 | [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
39 |
40 | [[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
41 | ]
42 |
--------------------------------------------------------------------------------
/models/hub/yolov3.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.0 # model depth multiple
6 | width_multiple: 1.0 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # darknet53 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [32, 3, 1]], # 0
16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17 | [-1, 1, Bottleneck, [64]],
18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19 | [-1, 2, Bottleneck, [128]],
20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21 | [-1, 8, Bottleneck, [256]],
22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23 | [-1, 8, Bottleneck, [512]],
24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25 | [-1, 4, Bottleneck, [1024]], # 10
26 | ]
27 |
28 | # YOLOv3 head
29 | head:
30 | [[-1, 1, Bottleneck, [1024, False]],
31 | [-1, 1, Conv, [512, 1, 1]],
32 | [-1, 1, Conv, [1024, 3, 1]],
33 | [-1, 1, Conv, [512, 1, 1]],
34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
35 |
36 | [-2, 1, Conv, [256, 1, 1]],
37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4
39 | [-1, 1, Bottleneck, [512, False]],
40 | [-1, 1, Bottleneck, [512, False]],
41 | [-1, 1, Conv, [256, 1, 1]],
42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
43 |
44 | [-2, 1, Conv, [128, 1, 1]],
45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3
47 | [-1, 1, Bottleneck, [256, False]],
48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
49 |
50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
51 | ]
52 |
--------------------------------------------------------------------------------
/models/hub/yolov5-bifpn.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.0 # model depth multiple
6 | width_multiple: 1.0 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3, [1024]],
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 BiFPN head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14, 6], 1, Concat, [1]], # cat P4 <--- BiFPN change
41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/hub/yolov5-fpn.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.0 # model depth multiple
6 | width_multiple: 1.0 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3, [1024]],
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 FPN head
28 | head:
29 | [[-1, 3, C3, [1024, False]], # 10 (P5/32-large)
30 |
31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
33 | [-1, 1, Conv, [512, 1, 1]],
34 | [-1, 3, C3, [512, False]], # 14 (P4/16-medium)
35 |
36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
38 | [-1, 1, Conv, [256, 1, 1]],
39 | [-1, 3, C3, [256, False]], # 18 (P3/8-small)
40 |
41 | [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
42 | ]
43 |
--------------------------------------------------------------------------------
/models/hub/yolov5-p2.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.0 # model depth multiple
6 | width_multiple: 1.0 # layer channel multiple
7 | anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
8 |
9 | # YOLOv5 v6.0 backbone
10 | backbone:
11 | # [from, number, module, args]
12 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
13 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
14 | [-1, 3, C3, [128]],
15 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
16 | [-1, 6, C3, [256]],
17 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
18 | [-1, 9, C3, [512]],
19 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
20 | [-1, 3, C3, [1024]],
21 | [-1, 1, SPPF, [1024, 5]], # 9
22 | ]
23 |
24 | # YOLOv5 v6.0 head with (P2, P3, P4, P5) outputs
25 | head:
26 | [[-1, 1, Conv, [512, 1, 1]],
27 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
28 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
29 | [-1, 3, C3, [512, False]], # 13
30 |
31 | [-1, 1, Conv, [256, 1, 1]],
32 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
33 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
34 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
35 |
36 | [-1, 1, Conv, [128, 1, 1]],
37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 | [[-1, 2], 1, Concat, [1]], # cat backbone P2
39 | [-1, 1, C3, [128, False]], # 21 (P2/4-xsmall)
40 |
41 | [-1, 1, Conv, [128, 3, 2]],
42 | [[-1, 18], 1, Concat, [1]], # cat head P3
43 | [-1, 3, C3, [256, False]], # 24 (P3/8-small)
44 |
45 | [-1, 1, Conv, [256, 3, 2]],
46 | [[-1, 14], 1, Concat, [1]], # cat head P4
47 | [-1, 3, C3, [512, False]], # 27 (P4/16-medium)
48 |
49 | [-1, 1, Conv, [512, 3, 2]],
50 | [[-1, 10], 1, Concat, [1]], # cat head P5
51 | [-1, 3, C3, [1024, False]], # 30 (P5/32-large)
52 |
53 | [[21, 24, 27, 30], 1, Detect, [nc, anchors]], # Detect(P2, P3, P4, P5)
54 | ]
55 |
--------------------------------------------------------------------------------
/models/hub/yolov5-p34.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.33 # model depth multiple
6 | width_multiple: 0.50 # layer channel multiple
7 | anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
8 |
9 | # YOLOv5 v6.0 backbone
10 | backbone:
11 | # [from, number, module, args]
12 | [ [ -1, 1, Conv, [ 64, 6, 2, 2 ] ], # 0-P1/2
13 | [ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
14 | [ -1, 3, C3, [ 128 ] ],
15 | [ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
16 | [ -1, 6, C3, [ 256 ] ],
17 | [ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
18 | [ -1, 9, C3, [ 512 ] ],
19 | [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 7-P5/32
20 | [ -1, 3, C3, [ 1024 ] ],
21 | [ -1, 1, SPPF, [ 1024, 5 ] ], # 9
22 | ]
23 |
24 | # YOLOv5 v6.0 head with (P3, P4) outputs
25 | head:
26 | [ [ -1, 1, Conv, [ 512, 1, 1 ] ],
27 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
28 | [ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
29 | [ -1, 3, C3, [ 512, False ] ], # 13
30 |
31 | [ -1, 1, Conv, [ 256, 1, 1 ] ],
32 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
33 | [ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
34 | [ -1, 3, C3, [ 256, False ] ], # 17 (P3/8-small)
35 |
36 | [ -1, 1, Conv, [ 256, 3, 2 ] ],
37 | [ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P4
38 | [ -1, 3, C3, [ 512, False ] ], # 20 (P4/16-medium)
39 |
40 | [ [ 17, 20 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4)
41 | ]
42 |
--------------------------------------------------------------------------------
/models/hub/yolov5-p6.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.0 # model depth multiple
6 | width_multiple: 1.0 # layer channel multiple
7 | anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
8 |
9 | # YOLOv5 v6.0 backbone
10 | backbone:
11 | # [from, number, module, args]
12 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
13 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
14 | [-1, 3, C3, [128]],
15 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
16 | [-1, 6, C3, [256]],
17 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
18 | [-1, 9, C3, [512]],
19 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
20 | [-1, 3, C3, [768]],
21 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
22 | [-1, 3, C3, [1024]],
23 | [-1, 1, SPPF, [1024, 5]], # 11
24 | ]
25 |
26 | # YOLOv5 v6.0 head with (P3, P4, P5, P6) outputs
27 | head:
28 | [[-1, 1, Conv, [768, 1, 1]],
29 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
30 | [[-1, 8], 1, Concat, [1]], # cat backbone P5
31 | [-1, 3, C3, [768, False]], # 15
32 |
33 | [-1, 1, Conv, [512, 1, 1]],
34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
36 | [-1, 3, C3, [512, False]], # 19
37 |
38 | [-1, 1, Conv, [256, 1, 1]],
39 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
40 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
41 | [-1, 3, C3, [256, False]], # 23 (P3/8-small)
42 |
43 | [-1, 1, Conv, [256, 3, 2]],
44 | [[-1, 20], 1, Concat, [1]], # cat head P4
45 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
46 |
47 | [-1, 1, Conv, [512, 3, 2]],
48 | [[-1, 16], 1, Concat, [1]], # cat head P5
49 | [-1, 3, C3, [768, False]], # 29 (P5/32-large)
50 |
51 | [-1, 1, Conv, [768, 3, 2]],
52 | [[-1, 12], 1, Concat, [1]], # cat head P6
53 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
54 |
55 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
56 | ]
57 |
--------------------------------------------------------------------------------
/models/hub/yolov5-p7.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.0 # model depth multiple
6 | width_multiple: 1.0 # layer channel multiple
7 | anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
8 |
9 | # YOLOv5 v6.0 backbone
10 | backbone:
11 | # [from, number, module, args]
12 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
13 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
14 | [-1, 3, C3, [128]],
15 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
16 | [-1, 6, C3, [256]],
17 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
18 | [-1, 9, C3, [512]],
19 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
20 | [-1, 3, C3, [768]],
21 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
22 | [-1, 3, C3, [1024]],
23 | [-1, 1, Conv, [1280, 3, 2]], # 11-P7/128
24 | [-1, 3, C3, [1280]],
25 | [-1, 1, SPPF, [1280, 5]], # 13
26 | ]
27 |
28 | # YOLOv5 v6.0 head with (P3, P4, P5, P6, P7) outputs
29 | head:
30 | [[-1, 1, Conv, [1024, 1, 1]],
31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 | [[-1, 10], 1, Concat, [1]], # cat backbone P6
33 | [-1, 3, C3, [1024, False]], # 17
34 |
35 | [-1, 1, Conv, [768, 1, 1]],
36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 | [[-1, 8], 1, Concat, [1]], # cat backbone P5
38 | [-1, 3, C3, [768, False]], # 21
39 |
40 | [-1, 1, Conv, [512, 1, 1]],
41 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
42 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
43 | [-1, 3, C3, [512, False]], # 25
44 |
45 | [-1, 1, Conv, [256, 1, 1]],
46 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
47 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
48 | [-1, 3, C3, [256, False]], # 29 (P3/8-small)
49 |
50 | [-1, 1, Conv, [256, 3, 2]],
51 | [[-1, 26], 1, Concat, [1]], # cat head P4
52 | [-1, 3, C3, [512, False]], # 32 (P4/16-medium)
53 |
54 | [-1, 1, Conv, [512, 3, 2]],
55 | [[-1, 22], 1, Concat, [1]], # cat head P5
56 | [-1, 3, C3, [768, False]], # 35 (P5/32-large)
57 |
58 | [-1, 1, Conv, [768, 3, 2]],
59 | [[-1, 18], 1, Concat, [1]], # cat head P6
60 | [-1, 3, C3, [1024, False]], # 38 (P6/64-xlarge)
61 |
62 | [-1, 1, Conv, [1024, 3, 2]],
63 | [[-1, 14], 1, Concat, [1]], # cat head P7
64 | [-1, 3, C3, [1280, False]], # 41 (P7/128-xxlarge)
65 |
66 | [[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6, P7)
67 | ]
68 |
--------------------------------------------------------------------------------
/models/hub/yolov5-panet.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.0 # model depth multiple
6 | width_multiple: 1.0 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3, [1024]],
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 PANet head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/hub/yolov5l6.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.0 # model depth multiple
6 | width_multiple: 1.0 # layer channel multiple
7 | anchors:
8 | - [19,27, 44,40, 38,94] # P3/8
9 | - [96,68, 86,152, 180,137] # P4/16
10 | - [140,301, 303,264, 238,542] # P5/32
11 | - [436,615, 739,380, 925,792] # P6/64
12 |
13 | # YOLOv5 v6.0 backbone
14 | backbone:
15 | # [from, number, module, args]
16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18 | [-1, 3, C3, [128]],
19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20 | [-1, 6, C3, [256]],
21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22 | [-1, 9, C3, [512]],
23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
24 | [-1, 3, C3, [768]],
25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
26 | [-1, 3, C3, [1024]],
27 | [-1, 1, SPPF, [1024, 5]], # 11
28 | ]
29 |
30 | # YOLOv5 v6.0 head
31 | head:
32 | [[-1, 1, Conv, [768, 1, 1]],
33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5
35 | [-1, 3, C3, [768, False]], # 15
36 |
37 | [-1, 1, Conv, [512, 1, 1]],
38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
40 | [-1, 3, C3, [512, False]], # 19
41 |
42 | [-1, 1, Conv, [256, 1, 1]],
43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small)
46 |
47 | [-1, 1, Conv, [256, 3, 2]],
48 | [[-1, 20], 1, Concat, [1]], # cat head P4
49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
50 |
51 | [-1, 1, Conv, [512, 3, 2]],
52 | [[-1, 16], 1, Concat, [1]], # cat head P5
53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large)
54 |
55 | [-1, 1, Conv, [768, 3, 2]],
56 | [[-1, 12], 1, Concat, [1]], # cat head P6
57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
58 |
59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60 | ]
61 |
--------------------------------------------------------------------------------
/models/hub/yolov5m6.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.67 # model depth multiple
6 | width_multiple: 0.75 # layer channel multiple
7 | anchors:
8 | - [19,27, 44,40, 38,94] # P3/8
9 | - [96,68, 86,152, 180,137] # P4/16
10 | - [140,301, 303,264, 238,542] # P5/32
11 | - [436,615, 739,380, 925,792] # P6/64
12 |
13 | # YOLOv5 v6.0 backbone
14 | backbone:
15 | # [from, number, module, args]
16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18 | [-1, 3, C3, [128]],
19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20 | [-1, 6, C3, [256]],
21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22 | [-1, 9, C3, [512]],
23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
24 | [-1, 3, C3, [768]],
25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
26 | [-1, 3, C3, [1024]],
27 | [-1, 1, SPPF, [1024, 5]], # 11
28 | ]
29 |
30 | # YOLOv5 v6.0 head
31 | head:
32 | [[-1, 1, Conv, [768, 1, 1]],
33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5
35 | [-1, 3, C3, [768, False]], # 15
36 |
37 | [-1, 1, Conv, [512, 1, 1]],
38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
40 | [-1, 3, C3, [512, False]], # 19
41 |
42 | [-1, 1, Conv, [256, 1, 1]],
43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small)
46 |
47 | [-1, 1, Conv, [256, 3, 2]],
48 | [[-1, 20], 1, Concat, [1]], # cat head P4
49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
50 |
51 | [-1, 1, Conv, [512, 3, 2]],
52 | [[-1, 16], 1, Concat, [1]], # cat head P5
53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large)
54 |
55 | [-1, 1, Conv, [768, 3, 2]],
56 | [[-1, 12], 1, Concat, [1]], # cat head P6
57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
58 |
59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60 | ]
61 |
--------------------------------------------------------------------------------
/models/hub/yolov5n6.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.33 # model depth multiple
6 | width_multiple: 0.25 # layer channel multiple
7 | anchors:
8 | - [19,27, 44,40, 38,94] # P3/8
9 | - [96,68, 86,152, 180,137] # P4/16
10 | - [140,301, 303,264, 238,542] # P5/32
11 | - [436,615, 739,380, 925,792] # P6/64
12 |
13 | # YOLOv5 v6.0 backbone
14 | backbone:
15 | # [from, number, module, args]
16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18 | [-1, 3, C3, [128]],
19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20 | [-1, 6, C3, [256]],
21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22 | [-1, 9, C3, [512]],
23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
24 | [-1, 3, C3, [768]],
25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
26 | [-1, 3, C3, [1024]],
27 | [-1, 1, SPPF, [1024, 5]], # 11
28 | ]
29 |
30 | # YOLOv5 v6.0 head
31 | head:
32 | [[-1, 1, Conv, [768, 1, 1]],
33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5
35 | [-1, 3, C3, [768, False]], # 15
36 |
37 | [-1, 1, Conv, [512, 1, 1]],
38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
40 | [-1, 3, C3, [512, False]], # 19
41 |
42 | [-1, 1, Conv, [256, 1, 1]],
43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small)
46 |
47 | [-1, 1, Conv, [256, 3, 2]],
48 | [[-1, 20], 1, Concat, [1]], # cat head P4
49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
50 |
51 | [-1, 1, Conv, [512, 3, 2]],
52 | [[-1, 16], 1, Concat, [1]], # cat head P5
53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large)
54 |
55 | [-1, 1, Conv, [768, 3, 2]],
56 | [[-1, 12], 1, Concat, [1]], # cat head P6
57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
58 |
59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60 | ]
61 |
--------------------------------------------------------------------------------
/models/hub/yolov5s-LeakyReLU.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | activation: nn.LeakyReLU(0.1) # <----- Conv() activation used throughout entire YOLOv5 model
6 | depth_multiple: 0.33 # model depth multiple
7 | width_multiple: 0.50 # layer channel multiple
8 | anchors:
9 | - [10,13, 16,30, 33,23] # P3/8
10 | - [30,61, 62,45, 59,119] # P4/16
11 | - [116,90, 156,198, 373,326] # P5/32
12 |
13 | # YOLOv5 v6.0 backbone
14 | backbone:
15 | # [from, number, module, args]
16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18 | [-1, 3, C3, [128]],
19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20 | [-1, 6, C3, [256]],
21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22 | [-1, 9, C3, [512]],
23 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
24 | [-1, 3, C3, [1024]],
25 | [-1, 1, SPPF, [1024, 5]], # 9
26 | ]
27 |
28 | # YOLOv5 v6.0 head
29 | head:
30 | [[-1, 1, Conv, [512, 1, 1]],
31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
33 | [-1, 3, C3, [512, False]], # 13
34 |
35 | [-1, 1, Conv, [256, 1, 1]],
36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
38 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
39 |
40 | [-1, 1, Conv, [256, 3, 2]],
41 | [[-1, 14], 1, Concat, [1]], # cat head P4
42 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
43 |
44 | [-1, 1, Conv, [512, 3, 2]],
45 | [[-1, 10], 1, Concat, [1]], # cat head P5
46 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
47 |
48 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
49 | ]
50 |
--------------------------------------------------------------------------------
/models/hub/yolov5s-ghost.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.33 # model depth multiple
6 | width_multiple: 0.50 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3Ghost, [128]],
18 | [-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3Ghost, [256]],
20 | [-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3Ghost, [512]],
22 | [-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3Ghost, [1024]],
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 head
28 | head:
29 | [[-1, 1, GhostConv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3Ghost, [512, False]], # 13
33 |
34 | [-1, 1, GhostConv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, GhostConv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, GhostConv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/hub/yolov5s-transformer.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.33 # model depth multiple
6 | width_multiple: 0.50 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3TR, [1024]], # 9 <--- C3TR() Transformer module
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/hub/yolov5s6.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.33 # model depth multiple
6 | width_multiple: 0.50 # layer channel multiple
7 | anchors:
8 | - [19,27, 44,40, 38,94] # P3/8
9 | - [96,68, 86,152, 180,137] # P4/16
10 | - [140,301, 303,264, 238,542] # P5/32
11 | - [436,615, 739,380, 925,792] # P6/64
12 |
13 | # YOLOv5 v6.0 backbone
14 | backbone:
15 | # [from, number, module, args]
16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18 | [-1, 3, C3, [128]],
19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20 | [-1, 6, C3, [256]],
21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22 | [-1, 9, C3, [512]],
23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
24 | [-1, 3, C3, [768]],
25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
26 | [-1, 3, C3, [1024]],
27 | [-1, 1, SPPF, [1024, 5]], # 11
28 | ]
29 |
30 | # YOLOv5 v6.0 head
31 | head:
32 | [[-1, 1, Conv, [768, 1, 1]],
33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5
35 | [-1, 3, C3, [768, False]], # 15
36 |
37 | [-1, 1, Conv, [512, 1, 1]],
38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
40 | [-1, 3, C3, [512, False]], # 19
41 |
42 | [-1, 1, Conv, [256, 1, 1]],
43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small)
46 |
47 | [-1, 1, Conv, [256, 3, 2]],
48 | [[-1, 20], 1, Concat, [1]], # cat head P4
49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
50 |
51 | [-1, 1, Conv, [512, 3, 2]],
52 | [[-1, 16], 1, Concat, [1]], # cat head P5
53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large)
54 |
55 | [-1, 1, Conv, [768, 3, 2]],
56 | [[-1, 12], 1, Concat, [1]], # cat head P6
57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
58 |
59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60 | ]
61 |
--------------------------------------------------------------------------------
/models/hub/yolov5x6.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.33 # model depth multiple
6 | width_multiple: 1.25 # layer channel multiple
7 | anchors:
8 | - [19,27, 44,40, 38,94] # P3/8
9 | - [96,68, 86,152, 180,137] # P4/16
10 | - [140,301, 303,264, 238,542] # P5/32
11 | - [436,615, 739,380, 925,792] # P6/64
12 |
13 | # YOLOv5 v6.0 backbone
14 | backbone:
15 | # [from, number, module, args]
16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18 | [-1, 3, C3, [128]],
19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20 | [-1, 6, C3, [256]],
21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22 | [-1, 9, C3, [512]],
23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
24 | [-1, 3, C3, [768]],
25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
26 | [-1, 3, C3, [1024]],
27 | [-1, 1, SPPF, [1024, 5]], # 11
28 | ]
29 |
30 | # YOLOv5 v6.0 head
31 | head:
32 | [[-1, 1, Conv, [768, 1, 1]],
33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5
35 | [-1, 3, C3, [768, False]], # 15
36 |
37 | [-1, 1, Conv, [512, 1, 1]],
38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
40 | [-1, 3, C3, [512, False]], # 19
41 |
42 | [-1, 1, Conv, [256, 1, 1]],
43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small)
46 |
47 | [-1, 1, Conv, [256, 3, 2]],
48 | [[-1, 20], 1, Concat, [1]], # cat head P4
49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
50 |
51 | [-1, 1, Conv, [512, 3, 2]],
52 | [[-1, 16], 1, Concat, [1]], # cat head P5
53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large)
54 |
55 | [-1, 1, Conv, [768, 3, 2]],
56 | [[-1, 12], 1, Concat, [1]], # cat head P6
57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
58 |
59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60 | ]
61 |
--------------------------------------------------------------------------------
/models/readme.md:
--------------------------------------------------------------------------------
1 | 本文件夹内文件主要是yolov5模型的改进模型yaml文件
2 |
--------------------------------------------------------------------------------
/models/segment/yolov5l-seg.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.0 # model depth multiple
6 | width_multiple: 1.0 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3, [1024]],
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/segment/yolov5m-seg.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.67 # model depth multiple
6 | width_multiple: 0.75 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3, [1024]],
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/segment/yolov5n-seg.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.33 # model depth multiple
6 | width_multiple: 0.25 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3, [1024]],
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/segment/yolov5s-seg.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.33 # model depth multiple
6 | width_multiple: 0.5 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3, [1024]],
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/segment/yolov5x-seg.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.33 # model depth multiple
6 | width_multiple: 1.25 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3, [1024]],
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/tf.py:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 | """
3 | TensorFlow, Keras and TFLite versions of YOLOv5
4 | Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127
5 |
6 | Usage:
7 | $ python models/tf.py --weights yolov5s.pt
8 |
9 | Export:
10 | $ python export.py --weights yolov5s.pt --include saved_model pb tflite tfjs
11 | """
12 |
13 | import argparse
14 | import sys
15 | from copy import deepcopy
16 | from pathlib import Path
17 |
18 | FILE = Path(__file__).resolve()
19 | ROOT = FILE.parents[1] # YOLOv5 root directory
20 | if str(ROOT) not in sys.path:
21 | sys.path.append(str(ROOT)) # add ROOT to PATH
22 | # ROOT = ROOT.relative_to(Path.cwd()) # relative
23 |
24 | import numpy as np
25 | import tensorflow as tf
26 | import torch
27 | import torch.nn as nn
28 | from tensorflow import keras
29 |
30 | from models.common import (C3, SPP, SPPF, Bottleneck, BottleneckCSP, C3x, Concat, Conv, CrossConv, DWConv,
31 | DWConvTranspose2d, Focus, autopad)
32 | from models.experimental import MixConv2d, attempt_load
33 | from models.yolo import Detect, Segment
34 | from utils.activations import SiLU
35 | from utils.general import LOGGER, make_divisible, print_args
36 |
37 |
38 | class TFBN(keras.layers.Layer):
39 | # TensorFlow BatchNormalization wrapper
40 | def __init__(self, w=None):
41 | super().__init__()
42 | self.bn = keras.layers.BatchNormalization(
43 | beta_initializer=keras.initializers.Constant(w.bias.numpy()),
44 | gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
45 | moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()),
46 | moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()),
47 | epsilon=w.eps)
48 |
49 | def call(self, inputs):
50 | return self.bn(inputs)
51 |
52 |
53 | class TFPad(keras.layers.Layer):
54 | # Pad inputs in spatial dimensions 1 and 2
55 | def __init__(self, pad):
56 | super().__init__()
57 | if isinstance(pad, int):
58 | self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
59 | else: # tuple/list
60 | self.pad = tf.constant([[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]])
61 |
62 | def call(self, inputs):
63 | return tf.pad(inputs, self.pad, mode='constant', constant_values=0)
64 |
65 |
66 | class TFConv(keras.layers.Layer):
67 | # Standard convolution
68 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
69 | # ch_in, ch_out, weights, kernel, stride, padding, groups
70 | super().__init__()
71 | assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
72 | # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
73 | # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
74 | conv = keras.layers.Conv2D(
75 | filters=c2,
76 | kernel_size=k,
77 | strides=s,
78 | padding='SAME' if s == 1 else 'VALID',
79 | use_bias=not hasattr(w, 'bn'),
80 | kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
81 | bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
82 | self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
83 | self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
84 | self.act = activations(w.act) if act else tf.identity
85 |
86 | def call(self, inputs):
87 | return self.act(self.bn(self.conv(inputs)))
88 |
89 |
90 | class TFDWConv(keras.layers.Layer):
91 | # Depthwise convolution
92 | def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None):
93 | # ch_in, ch_out, weights, kernel, stride, padding, groups
94 | super().__init__()
95 | assert c2 % c1 == 0, f'TFDWConv() output={c2} must be a multiple of input={c1} channels'
96 | conv = keras.layers.DepthwiseConv2D(
97 | kernel_size=k,
98 | depth_multiplier=c2 // c1,
99 | strides=s,
100 | padding='SAME' if s == 1 else 'VALID',
101 | use_bias=not hasattr(w, 'bn'),
102 | depthwise_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
103 | bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
104 | self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
105 | self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
106 | self.act = activations(w.act) if act else tf.identity
107 |
108 | def call(self, inputs):
109 | return self.act(self.bn(self.conv(inputs)))
110 |
111 |
112 | class TFDWConvTranspose2d(keras.layers.Layer):
113 | # Depthwise ConvTranspose2d
114 | def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None):
115 | # ch_in, ch_out, weights, kernel, stride, padding, groups
116 | super().__init__()
117 | assert c1 == c2, f'TFDWConv() output={c2} must be equal to input={c1} channels'
118 | assert k == 4 and p1 == 1, 'TFDWConv() only valid for k=4 and p1=1'
119 | weight, bias = w.weight.permute(2, 3, 1, 0).numpy(), w.bias.numpy()
120 | self.c1 = c1
121 | self.conv = [
122 | keras.layers.Conv2DTranspose(filters=1,
123 | kernel_size=k,
124 | strides=s,
125 | padding='VALID',
126 | output_padding=p2,
127 | use_bias=True,
128 | kernel_initializer=keras.initializers.Constant(weight[..., i:i + 1]),
129 | bias_initializer=keras.initializers.Constant(bias[i])) for i in range(c1)]
130 |
131 | def call(self, inputs):
132 | return tf.concat([m(x) for m, x in zip(self.conv, tf.split(inputs, self.c1, 3))], 3)[:, 1:-1, 1:-1]
133 |
134 |
135 | class TFFocus(keras.layers.Layer):
136 | # Focus wh information into c-space
137 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
138 | # ch_in, ch_out, kernel, stride, padding, groups
139 | super().__init__()
140 | self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
141 |
142 | def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c)
143 | # inputs = inputs / 255 # normalize 0-255 to 0-1
144 | inputs = [inputs[:, ::2, ::2, :], inputs[:, 1::2, ::2, :], inputs[:, ::2, 1::2, :], inputs[:, 1::2, 1::2, :]]
145 | return self.conv(tf.concat(inputs, 3))
146 |
147 |
148 | class TFBottleneck(keras.layers.Layer):
149 | # Standard bottleneck
150 | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, shortcut, groups, expansion
151 | super().__init__()
152 | c_ = int(c2 * e) # hidden channels
153 | self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
154 | self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
155 | self.add = shortcut and c1 == c2
156 |
157 | def call(self, inputs):
158 | return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
159 |
160 |
161 | class TFCrossConv(keras.layers.Layer):
162 | # Cross Convolution
163 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None):
164 | super().__init__()
165 | c_ = int(c2 * e) # hidden channels
166 | self.cv1 = TFConv(c1, c_, (1, k), (1, s), w=w.cv1)
167 | self.cv2 = TFConv(c_, c2, (k, 1), (s, 1), g=g, w=w.cv2)
168 | self.add = shortcut and c1 == c2
169 |
170 | def call(self, inputs):
171 | return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
172 |
173 |
174 | class TFConv2d(keras.layers.Layer):
175 | # Substitution for PyTorch nn.Conv2D
176 | def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
177 | super().__init__()
178 | assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
179 | self.conv = keras.layers.Conv2D(filters=c2,
180 | kernel_size=k,
181 | strides=s,
182 | padding='VALID',
183 | use_bias=bias,
184 | kernel_initializer=keras.initializers.Constant(
185 | w.weight.permute(2, 3, 1, 0).numpy()),
186 | bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None)
187 |
188 | def call(self, inputs):
189 | return self.conv(inputs)
190 |
191 |
192 | class TFBottleneckCSP(keras.layers.Layer):
193 | # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
194 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
195 | # ch_in, ch_out, number, shortcut, groups, expansion
196 | super().__init__()
197 | c_ = int(c2 * e) # hidden channels
198 | self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
199 | self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
200 | self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3)
201 | self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4)
202 | self.bn = TFBN(w.bn)
203 | self.act = lambda x: keras.activations.swish(x)
204 | self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
205 |
206 | def call(self, inputs):
207 | y1 = self.cv3(self.m(self.cv1(inputs)))
208 | y2 = self.cv2(inputs)
209 | return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))
210 |
211 |
212 | class TFC3(keras.layers.Layer):
213 | # CSP Bottleneck with 3 convolutions
214 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
215 | # ch_in, ch_out, number, shortcut, groups, expansion
216 | super().__init__()
217 | c_ = int(c2 * e) # hidden channels
218 | self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
219 | self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
220 | self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
221 | self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
222 |
223 | def call(self, inputs):
224 | return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
225 |
226 |
227 | class TFC3x(keras.layers.Layer):
228 | # 3 module with cross-convolutions
229 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
230 | # ch_in, ch_out, number, shortcut, groups, expansion
231 | super().__init__()
232 | c_ = int(c2 * e) # hidden channels
233 | self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
234 | self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
235 | self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
236 | self.m = keras.Sequential([
237 | TFCrossConv(c_, c_, k=3, s=1, g=g, e=1.0, shortcut=shortcut, w=w.m[j]) for j in range(n)])
238 |
239 | def call(self, inputs):
240 | return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
241 |
242 |
243 | class TFSPP(keras.layers.Layer):
244 | # Spatial pyramid pooling layer used in YOLOv3-SPP
245 | def __init__(self, c1, c2, k=(5, 9, 13), w=None):
246 | super().__init__()
247 | c_ = c1 // 2 # hidden channels
248 | self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
249 | self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
250 | self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding='SAME') for x in k]
251 |
252 | def call(self, inputs):
253 | x = self.cv1(inputs)
254 | return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
255 |
256 |
257 | class TFSPPF(keras.layers.Layer):
258 | # Spatial pyramid pooling-Fast layer
259 | def __init__(self, c1, c2, k=5, w=None):
260 | super().__init__()
261 | c_ = c1 // 2 # hidden channels
262 | self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
263 | self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2)
264 | self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding='SAME')
265 |
266 | def call(self, inputs):
267 | x = self.cv1(inputs)
268 | y1 = self.m(x)
269 | y2 = self.m(y1)
270 | return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))
271 |
272 |
273 | class TFDetect(keras.layers.Layer):
274 | # TF YOLOv5 Detect layer
275 | def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None): # detection layer
276 | super().__init__()
277 | self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
278 | self.nc = nc # number of classes
279 | self.no = nc + 5 # number of outputs per anchor
280 | self.nl = len(anchors) # number of detection layers
281 | self.na = len(anchors[0]) // 2 # number of anchors
282 | self.grid = [tf.zeros(1)] * self.nl # init grid
283 | self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32)
284 | self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]), [self.nl, 1, -1, 1, 2])
285 | self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]
286 | self.training = False # set to False after building model
287 | self.imgsz = imgsz
288 | for i in range(self.nl):
289 | ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
290 | self.grid[i] = self._make_grid(nx, ny)
291 |
292 | def call(self, inputs):
293 | z = [] # inference output
294 | x = []
295 | for i in range(self.nl):
296 | x.append(self.m[i](inputs[i]))
297 | # x(bs,20,20,255) to x(bs,3,20,20,85)
298 | ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
299 | x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no])
300 |
301 | if not self.training: # inference
302 | y = x[i]
303 | grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5
304 | anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4
305 | xy = (tf.sigmoid(y[..., 0:2]) * 2 + grid) * self.stride[i] # xy
306 | wh = tf.sigmoid(y[..., 2:4]) ** 2 * anchor_grid
307 | # Normalize xywh to 0-1 to reduce calibration error
308 | xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
309 | wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
310 | y = tf.concat([xy, wh, tf.sigmoid(y[..., 4:5 + self.nc]), y[..., 5 + self.nc:]], -1)
311 | z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
312 |
313 | return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1), )
314 |
315 | @staticmethod
316 | def _make_grid(nx=20, ny=20):
317 | # yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
318 | # return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
319 | xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
320 | return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
321 |
322 |
323 | class TFSegment(TFDetect):
324 | # YOLOv5 Segment head for segmentation models
325 | def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
326 | super().__init__(nc, anchors, ch, imgsz, w)
327 | self.nm = nm # number of masks
328 | self.npr = npr # number of protos
329 | self.no = 5 + nc + self.nm # number of outputs per anchor
330 | self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)] # output conv
331 | self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto) # protos
332 | self.detect = TFDetect.call
333 |
334 | def call(self, x):
335 | p = self.proto(x[0])
336 | # p = TFUpsample(None, scale_factor=4, mode='nearest')(self.proto(x[0])) # (optional) full-size protos
337 | p = tf.transpose(p, [0, 3, 1, 2]) # from shape(1,160,160,32) to shape(1,32,160,160)
338 | x = self.detect(self, x)
339 | return (x, p) if self.training else (x[0], p)
340 |
341 |
342 | class TFProto(keras.layers.Layer):
343 |
344 | def __init__(self, c1, c_=256, c2=32, w=None):
345 | super().__init__()
346 | self.cv1 = TFConv(c1, c_, k=3, w=w.cv1)
347 | self.upsample = TFUpsample(None, scale_factor=2, mode='nearest')
348 | self.cv2 = TFConv(c_, c_, k=3, w=w.cv2)
349 | self.cv3 = TFConv(c_, c2, w=w.cv3)
350 |
351 | def call(self, inputs):
352 | return self.cv3(self.cv2(self.upsample(self.cv1(inputs))))
353 |
354 |
355 | class TFUpsample(keras.layers.Layer):
356 | # TF version of torch.nn.Upsample()
357 | def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w'
358 | super().__init__()
359 | assert scale_factor % 2 == 0, 'scale_factor must be multiple of 2'
360 | self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * scale_factor, x.shape[2] * scale_factor), mode)
361 | # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
362 | # with default arguments: align_corners=False, half_pixel_centers=False
363 | # self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x,
364 | # size=(x.shape[1] * 2, x.shape[2] * 2))
365 |
366 | def call(self, inputs):
367 | return self.upsample(inputs)
368 |
369 |
370 | class TFConcat(keras.layers.Layer):
371 | # TF version of torch.concat()
372 | def __init__(self, dimension=1, w=None):
373 | super().__init__()
374 | assert dimension == 1, 'convert only NCHW to NHWC concat'
375 | self.d = 3
376 |
377 | def call(self, inputs):
378 | return tf.concat(inputs, self.d)
379 |
380 |
381 | def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
382 | LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
383 | anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
384 | na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
385 | no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
386 |
387 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
388 | for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
389 | m_str = m
390 | m = eval(m) if isinstance(m, str) else m # eval strings
391 | for j, a in enumerate(args):
392 | try:
393 | args[j] = eval(a) if isinstance(a, str) else a # eval strings
394 | except NameError:
395 | pass
396 |
397 | n = max(round(n * gd), 1) if n > 1 else n # depth gain
398 | if m in [
399 | nn.Conv2d, Conv, DWConv, DWConvTranspose2d, Bottleneck, SPP, SPPF, MixConv2d, Focus, CrossConv,
400 | BottleneckCSP, C3, C3x]:
401 | c1, c2 = ch[f], args[0]
402 | c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
403 |
404 | args = [c1, c2, *args[1:]]
405 | if m in [BottleneckCSP, C3, C3x]:
406 | args.insert(2, n)
407 | n = 1
408 | elif m is nn.BatchNorm2d:
409 | args = [ch[f]]
410 | elif m is Concat:
411 | c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
412 | elif m in [Detect, Segment]:
413 | args.append([ch[x + 1] for x in f])
414 | if isinstance(args[1], int): # number of anchors
415 | args[1] = [list(range(args[1] * 2))] * len(f)
416 | if m is Segment:
417 | args[3] = make_divisible(args[3] * gw, 8)
418 | args.append(imgsz)
419 | else:
420 | c2 = ch[f]
421 |
422 | tf_m = eval('TF' + m_str.replace('nn.', ''))
423 | m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \
424 | else tf_m(*args, w=model.model[i]) # module
425 |
426 | torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
427 | t = str(m)[8:-2].replace('__main__.', '') # module type
428 | np = sum(x.numel() for x in torch_m_.parameters()) # number params
429 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
430 | LOGGER.info(f'{i:>3}{str(f):>18}{str(n):>3}{np:>10} {t:<40}{str(args):<30}') # print
431 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
432 | layers.append(m_)
433 | ch.append(c2)
434 | return keras.Sequential(layers), sorted(save)
435 |
436 |
437 | class TFModel:
438 | # TF YOLOv5 model
439 | def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)): # model, channels, classes
440 | super().__init__()
441 | if isinstance(cfg, dict):
442 | self.yaml = cfg # model dict
443 | else: # is *.yaml
444 | import yaml # for torch hub
445 | self.yaml_file = Path(cfg).name
446 | with open(cfg) as f:
447 | self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict
448 |
449 | # Define model
450 | if nc and nc != self.yaml['nc']:
451 | LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}")
452 | self.yaml['nc'] = nc # override yaml value
453 | self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
454 |
455 | def predict(self,
456 | inputs,
457 | tf_nms=False,
458 | agnostic_nms=False,
459 | topk_per_class=100,
460 | topk_all=100,
461 | iou_thres=0.45,
462 | conf_thres=0.25):
463 | y = [] # outputs
464 | x = inputs
465 | for m in self.model.layers:
466 | if m.f != -1: # if not from previous layer
467 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
468 |
469 | x = m(x) # run
470 | y.append(x if m.i in self.savelist else None) # save output
471 |
472 | # Add TensorFlow NMS
473 | if tf_nms:
474 | boxes = self._xywh2xyxy(x[0][..., :4])
475 | probs = x[0][:, :, 4:5]
476 | classes = x[0][:, :, 5:]
477 | scores = probs * classes
478 | if agnostic_nms:
479 | nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres)
480 | else:
481 | boxes = tf.expand_dims(boxes, 2)
482 | nms = tf.image.combined_non_max_suppression(boxes,
483 | scores,
484 | topk_per_class,
485 | topk_all,
486 | iou_thres,
487 | conf_thres,
488 | clip_boxes=False)
489 | return (nms, )
490 | return x # output [1,6300,85] = [xywh, conf, class0, class1, ...]
491 | # x = x[0] # [x(1,6300,85), ...] to x(6300,85)
492 | # xywh = x[..., :4] # x(6300,4) boxes
493 | # conf = x[..., 4:5] # x(6300,1) confidences
494 | # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes
495 | # return tf.concat([conf, cls, xywh], 1)
496 |
497 | @staticmethod
498 | def _xywh2xyxy(xywh):
499 | # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
500 | x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
501 | return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)
502 |
503 |
504 | class AgnosticNMS(keras.layers.Layer):
505 | # TF Agnostic NMS
506 | def call(self, input, topk_all, iou_thres, conf_thres):
507 | # wrap map_fn to avoid TypeSpec related error https://stackoverflow.com/a/65809989/3036450
508 | return tf.map_fn(lambda x: self._nms(x, topk_all, iou_thres, conf_thres),
509 | input,
510 | fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
511 | name='agnostic_nms')
512 |
513 | @staticmethod
514 | def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25): # agnostic NMS
515 | boxes, classes, scores = x
516 | class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
517 | scores_inp = tf.reduce_max(scores, -1)
518 | selected_inds = tf.image.non_max_suppression(boxes,
519 | scores_inp,
520 | max_output_size=topk_all,
521 | iou_threshold=iou_thres,
522 | score_threshold=conf_thres)
523 | selected_boxes = tf.gather(boxes, selected_inds)
524 | padded_boxes = tf.pad(selected_boxes,
525 | paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
526 | mode='CONSTANT',
527 | constant_values=0.0)
528 | selected_scores = tf.gather(scores_inp, selected_inds)
529 | padded_scores = tf.pad(selected_scores,
530 | paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
531 | mode='CONSTANT',
532 | constant_values=-1.0)
533 | selected_classes = tf.gather(class_inds, selected_inds)
534 | padded_classes = tf.pad(selected_classes,
535 | paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
536 | mode='CONSTANT',
537 | constant_values=-1.0)
538 | valid_detections = tf.shape(selected_inds)[0]
539 | return padded_boxes, padded_scores, padded_classes, valid_detections
540 |
541 |
542 | def activations(act=nn.SiLU):
543 | # Returns TF activation from input PyTorch activation
544 | if isinstance(act, nn.LeakyReLU):
545 | return lambda x: keras.activations.relu(x, alpha=0.1)
546 | elif isinstance(act, nn.Hardswish):
547 | return lambda x: x * tf.nn.relu6(x + 3) * 0.166666667
548 | elif isinstance(act, (nn.SiLU, SiLU)):
549 | return lambda x: keras.activations.swish(x)
550 | else:
551 | raise Exception(f'no matching TensorFlow activation found for PyTorch activation {act}')
552 |
553 |
554 | def representative_dataset_gen(dataset, ncalib=100):
555 | # Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays
556 | for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
557 | im = np.transpose(img, [1, 2, 0])
558 | im = np.expand_dims(im, axis=0).astype(np.float32)
559 | im /= 255
560 | yield [im]
561 | if n >= ncalib:
562 | break
563 |
564 |
565 | def run(
566 | weights=ROOT / 'yolov5s.pt', # weights path
567 | imgsz=(640, 640), # inference size h,w
568 | batch_size=1, # batch size
569 | dynamic=False, # dynamic batch size
570 | ):
571 | # PyTorch model
572 | im = torch.zeros((batch_size, 3, *imgsz)) # BCHW image
573 | model = attempt_load(weights, device=torch.device('cpu'), inplace=True, fuse=False)
574 | _ = model(im) # inference
575 | model.info()
576 |
577 | # TensorFlow model
578 | im = tf.zeros((batch_size, *imgsz, 3)) # BHWC image
579 | tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
580 | _ = tf_model.predict(im) # inference
581 |
582 | # Keras model
583 | im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
584 | keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
585 | keras_model.summary()
586 |
587 | LOGGER.info('PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.')
588 |
589 |
590 | def parse_opt():
591 | parser = argparse.ArgumentParser()
592 | parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path')
593 | parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
594 | parser.add_argument('--batch-size', type=int, default=1, help='batch size')
595 | parser.add_argument('--dynamic', action='store_true', help='dynamic batch size')
596 | opt = parser.parse_args()
597 | opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
598 | print_args(vars(opt))
599 | return opt
600 |
601 |
602 | def main(opt):
603 | run(**vars(opt))
604 |
605 |
606 | if __name__ == '__main__':
607 | opt = parse_opt()
608 | main(opt)
609 |
--------------------------------------------------------------------------------
/models/yolo.py:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 | """
3 | YOLO-specific modules
4 |
5 | Usage:
6 | $ python models/yolo.py --cfg yolov5s.yaml
7 | """
8 |
9 | import argparse
10 | import contextlib
11 | import os
12 | import platform
13 | import sys
14 | from copy import deepcopy
15 | from pathlib import Path
16 |
17 | FILE = Path(__file__).resolve()
18 | ROOT = FILE.parents[1] # YOLOv5 root directory
19 | if str(ROOT) not in sys.path:
20 | sys.path.append(str(ROOT)) # add ROOT to PATH
21 | if platform.system() != 'Windows':
22 | ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
23 |
24 | from models.common import * # noqa
25 | from models.experimental import * # noqa
26 | from utils.autoanchor import check_anchor_order
27 | from utils.general import LOGGER, check_version, check_yaml, make_divisible, print_args
28 | from utils.plots import feature_visualization
29 | from utils.torch_utils import (fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device,
30 | time_sync)
31 |
32 | try:
33 | import thop # for FLOPs computation
34 | except ImportError:
35 | thop = None
36 |
37 |
38 | class Detect(nn.Module):
39 | # YOLOv5 Detect head for detection models
40 | stride = None # strides computed during build
41 | dynamic = False # force grid reconstruction
42 | export = False # export mode
43 |
44 | def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer
45 | super().__init__()
46 | self.nc = nc # number of classes
47 | self.no = nc + 5 # number of outputs per anchor
48 | self.nl = len(anchors) # number of detection layers
49 | self.na = len(anchors[0]) // 2 # number of anchors
50 | self.grid = [torch.empty(0) for _ in range(self.nl)] # init grid
51 | self.anchor_grid = [torch.empty(0) for _ in range(self.nl)] # init anchor grid
52 | self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2)
53 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
54 | self.inplace = inplace # use inplace ops (e.g. slice assignment)
55 |
56 | def forward(self, x):
57 | z = [] # inference output
58 | for i in range(self.nl):
59 | x[i] = self.m[i](x[i]) # conv
60 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
61 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
62 |
63 | if not self.training: # inference
64 | if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
65 | self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
66 |
67 | if isinstance(self, Segment): # (boxes + masks)
68 | xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
69 | xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i] # xy
70 | wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i] # wh
71 | y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
72 | else: # Detect (boxes only)
73 | xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
74 | xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy
75 | wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh
76 | y = torch.cat((xy, wh, conf), 4)
77 | z.append(y.view(bs, self.na * nx * ny, self.no))
78 |
79 | return x if self.training else (torch.cat(z, 1), ) if self.export else (torch.cat(z, 1), x)
80 |
81 | def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__, '1.10.0')):
82 | d = self.anchors[i].device
83 | t = self.anchors[i].dtype
84 | shape = 1, self.na, ny, nx, 2 # grid shape
85 | y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t)
86 | yv, xv = torch.meshgrid(y, x, indexing='ij') if torch_1_10 else torch.meshgrid(y, x) # torch>=0.7 compatibility
87 | grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5
88 | anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
89 | return grid, anchor_grid
90 |
91 |
92 | class Segment(Detect):
93 | # YOLOv5 Segment head for segmentation models
94 | def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
95 | super().__init__(nc, anchors, ch, inplace)
96 | self.nm = nm # number of masks
97 | self.npr = npr # number of protos
98 | self.no = 5 + nc + self.nm # number of outputs per anchor
99 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
100 | self.proto = Proto(ch[0], self.npr, self.nm) # protos
101 | self.detect = Detect.forward
102 |
103 | def forward(self, x):
104 | p = self.proto(x[0])
105 | x = self.detect(self, x)
106 | return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])
107 |
108 |
109 | class BaseModel(nn.Module):
110 | # YOLOv5 base model
111 | def forward(self, x, profile=False, visualize=False):
112 | return self._forward_once(x, profile, visualize) # single-scale inference, train
113 |
114 | def _forward_once(self, x, profile=False, visualize=False):
115 | y, dt = [], [] # outputs
116 | for m in self.model:
117 | if m.f != -1: # if not from previous layer
118 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
119 | if profile:
120 | self._profile_one_layer(m, x, dt)
121 | x = m(x) # run
122 | y.append(x if m.i in self.save else None) # save output
123 | if visualize:
124 | feature_visualization(x, m.type, m.i, save_dir=visualize)
125 | return x
126 |
127 | def _profile_one_layer(self, m, x, dt):
128 | c = m == self.model[-1] # is final layer, copy input as inplace fix
129 | o = thop.profile(m, inputs=(x.copy() if c else x, ), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs
130 | t = time_sync()
131 | for _ in range(10):
132 | m(x.copy() if c else x)
133 | dt.append((time_sync() - t) * 100)
134 | if m == self.model[0]:
135 | LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} module")
136 | LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}')
137 | if c:
138 | LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total")
139 |
140 | def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
141 | LOGGER.info('Fusing layers... ')
142 | for m in self.model.modules():
143 | if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
144 | m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
145 | delattr(m, 'bn') # remove batchnorm
146 | m.forward = m.forward_fuse # update forward
147 | self.info()
148 | return self
149 |
150 | def info(self, verbose=False, img_size=640): # print model information
151 | model_info(self, verbose, img_size)
152 |
153 | def _apply(self, fn):
154 | # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
155 | self = super()._apply(fn)
156 | m = self.model[-1] # Detect()
157 | if isinstance(m, (Detect, Segment)):
158 | m.stride = fn(m.stride)
159 | m.grid = list(map(fn, m.grid))
160 | if isinstance(m.anchor_grid, list):
161 | m.anchor_grid = list(map(fn, m.anchor_grid))
162 | return self
163 |
164 |
165 | class DetectionModel(BaseModel):
166 | # YOLOv5 detection model
167 | def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes
168 | super().__init__()
169 | if isinstance(cfg, dict):
170 | self.yaml = cfg # model dict
171 | else: # is *.yaml
172 | import yaml # for torch hub
173 | self.yaml_file = Path(cfg).name
174 | with open(cfg, encoding='ascii', errors='ignore') as f:
175 | self.yaml = yaml.safe_load(f) # model dict
176 |
177 | # Define model
178 | ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels
179 | if nc and nc != self.yaml['nc']:
180 | LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
181 | self.yaml['nc'] = nc # override yaml value
182 | if anchors:
183 | LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
184 | self.yaml['anchors'] = round(anchors) # override yaml value
185 | self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
186 | self.names = [str(i) for i in range(self.yaml['nc'])] # default names
187 | self.inplace = self.yaml.get('inplace', True)
188 |
189 | # Build strides, anchors
190 | m = self.model[-1] # Detect()
191 | if isinstance(m, (Detect, Segment)):
192 | s = 256 # 2x min stride
193 | m.inplace = self.inplace
194 | forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
195 | m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward
196 | check_anchor_order(m)
197 | m.anchors /= m.stride.view(-1, 1, 1)
198 | self.stride = m.stride
199 | self._initialize_biases() # only run once
200 |
201 | # Init weights, biases
202 | initialize_weights(self)
203 | self.info()
204 | LOGGER.info('')
205 |
206 | def forward(self, x, augment=False, profile=False, visualize=False):
207 | if augment:
208 | return self._forward_augment(x) # augmented inference, None
209 | return self._forward_once(x, profile, visualize) # single-scale inference, train
210 |
211 | def _forward_augment(self, x):
212 | img_size = x.shape[-2:] # height, width
213 | s = [1, 0.83, 0.67] # scales
214 | f = [None, 3, None] # flips (2-ud, 3-lr)
215 | y = [] # outputs
216 | for si, fi in zip(s, f):
217 | xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
218 | yi = self._forward_once(xi)[0] # forward
219 | # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
220 | yi = self._descale_pred(yi, fi, si, img_size)
221 | y.append(yi)
222 | y = self._clip_augmented(y) # clip augmented tails
223 | return torch.cat(y, 1), None # augmented inference, train
224 |
225 | def _descale_pred(self, p, flips, scale, img_size):
226 | # de-scale predictions following augmented inference (inverse operation)
227 | if self.inplace:
228 | p[..., :4] /= scale # de-scale
229 | if flips == 2:
230 | p[..., 1] = img_size[0] - p[..., 1] # de-flip ud
231 | elif flips == 3:
232 | p[..., 0] = img_size[1] - p[..., 0] # de-flip lr
233 | else:
234 | x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale
235 | if flips == 2:
236 | y = img_size[0] - y # de-flip ud
237 | elif flips == 3:
238 | x = img_size[1] - x # de-flip lr
239 | p = torch.cat((x, y, wh, p[..., 4:]), -1)
240 | return p
241 |
242 | def _clip_augmented(self, y):
243 | # Clip YOLOv5 augmented inference tails
244 | nl = self.model[-1].nl # number of detection layers (P3-P5)
245 | g = sum(4 ** x for x in range(nl)) # grid points
246 | e = 1 # exclude layer count
247 | i = (y[0].shape[1] // g) * sum(4 ** x for x in range(e)) # indices
248 | y[0] = y[0][:, :-i] # large
249 | i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices
250 | y[-1] = y[-1][:, i:] # small
251 | return y
252 |
253 | def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
254 | # https://arxiv.org/abs/1708.02002 section 3.3
255 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
256 | m = self.model[-1] # Detect() module
257 | for mi, s in zip(m.m, m.stride): # from
258 | b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
259 | b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
260 | b.data[:, 5:5 + m.nc] += math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum()) # cls
261 | mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
262 |
263 |
264 | Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibility
265 |
266 |
267 | class SegmentationModel(DetectionModel):
268 | # YOLOv5 segmentation model
269 | def __init__(self, cfg='yolov5s-seg.yaml', ch=3, nc=None, anchors=None):
270 | super().__init__(cfg, ch, nc, anchors)
271 |
272 |
273 | class ClassificationModel(BaseModel):
274 | # YOLOv5 classification model
275 | def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): # yaml, model, number of classes, cutoff index
276 | super().__init__()
277 | self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg)
278 |
279 | def _from_detection_model(self, model, nc=1000, cutoff=10):
280 | # Create a YOLOv5 classification model from a YOLOv5 detection model
281 | if isinstance(model, DetectMultiBackend):
282 | model = model.model # unwrap DetectMultiBackend
283 | model.model = model.model[:cutoff] # backbone
284 | m = model.model[-1] # last layer
285 | ch = m.conv.in_channels if hasattr(m, 'conv') else m.cv1.conv.in_channels # ch into module
286 | c = Classify(ch, nc) # Classify()
287 | c.i, c.f, c.type = m.i, m.f, 'models.common.Classify' # index, from, type
288 | model.model[-1] = c # replace
289 | self.model = model.model
290 | self.stride = model.stride
291 | self.save = []
292 | self.nc = nc
293 |
294 | def _from_yaml(self, cfg):
295 | # Create a YOLOv5 classification model from a *.yaml file
296 | self.model = None
297 |
298 |
299 | def parse_model(d, ch): # model_dict, input_channels(3)
300 | # Parse a YOLOv5 model.yaml dictionary
301 | LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
302 | anchors, nc, gd, gw, act = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation')
303 | if act:
304 | Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU()
305 | LOGGER.info(f"{colorstr('activation:')} {act}") # print
306 | na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
307 | no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
308 |
309 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
310 | for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
311 | m = eval(m) if isinstance(m, str) else m # eval strings
312 | for j, a in enumerate(args):
313 | with contextlib.suppress(NameError):
314 | args[j] = eval(a) if isinstance(a, str) else a # eval strings
315 |
316 | n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
317 | if m in {
318 | Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
319 | BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x, SElayer}:
320 | c1, c2 = ch[f], args[0]
321 | if c2 != no: # if not output
322 | c2 = make_divisible(c2 * gw, 8)
323 |
324 | args = [c1, c2, *args[1:]]
325 | if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
326 | args.insert(2, n) # number of repeats
327 | n = 1
328 | elif m is nn.BatchNorm2d:
329 | args = [ch[f]]
330 | elif m is Concat:
331 | c2 = sum(ch[x] for x in f)
332 | # TODO: channel, gw, gd
333 | elif m in {Detect, Segment}:
334 | args.append([ch[x] for x in f])
335 | if isinstance(args[1], int): # number of anchors
336 | args[1] = [list(range(args[1] * 2))] * len(f)
337 | if m is Segment:
338 | args[3] = make_divisible(args[3] * gw, 8)
339 | elif m is Contract:
340 | c2 = ch[f] * args[0] ** 2
341 | elif m is Expand:
342 | c2 = ch[f] // args[0] ** 2
343 | else:
344 | c2 = ch[f]
345 |
346 | m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
347 | t = str(m)[8:-2].replace('__main__.', '') # module type
348 | np = sum(x.numel() for x in m_.parameters()) # number params
349 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
350 | LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f} {t:<40}{str(args):<30}') # print
351 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
352 | layers.append(m_)
353 | if i == 0:
354 | ch = []
355 | ch.append(c2)
356 | return nn.Sequential(*layers), sorted(save)
357 |
358 |
359 | if __name__ == '__main__':
360 | parser = argparse.ArgumentParser()
361 | parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
362 | parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs')
363 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
364 | parser.add_argument('--profile', action='store_true', help='profile model speed')
365 | parser.add_argument('--line-profile', action='store_true', help='profile model speed layer by layer')
366 | parser.add_argument('--test', action='store_true', help='test all yolo*.yaml')
367 | opt = parser.parse_args()
368 | opt.cfg = check_yaml(opt.cfg) # check YAML
369 | print_args(vars(opt))
370 | device = select_device(opt.device)
371 |
372 | # Create model
373 | im = torch.rand(opt.batch_size, 3, 640, 640).to(device)
374 | model = Model(opt.cfg).to(device)
375 |
376 | # Options
377 | if opt.line_profile: # profile layer by layer
378 | model(im, profile=True)
379 |
380 | elif opt.profile: # profile forward-backward
381 | results = profile(input=im, ops=[model], n=3)
382 |
383 | elif opt.test: # test all models
384 | for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'):
385 | try:
386 | _ = Model(cfg)
387 | except Exception as e:
388 | print(f'Error in {cfg}: {e}')
389 |
390 | else: # report fused model summary
391 | model.fuse()
392 |
--------------------------------------------------------------------------------
/models/yolov5_ghost_attention.yaml:
--------------------------------------------------------------------------------
1 | nc: 3 # number of classes
2 | depth_multiple: 0.33 # model depth multiple
3 | width_multiple: 0.50 # layer channel multiple
4 | anchors:
5 | - [10,13, 16,30, 33,23] # P3/8
6 | - [30,61, 62,45, 59,119] # P4/16
7 | - [116,90, 156,198, 373,326] # P5/32
8 |
9 | # YOLOv5 with GhostNet backbone
10 | backbone:
11 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2
12 | [-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4
13 | [-1, 3, C3Ghost, [128]],
14 | [-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8
15 | [-1, 6, C3Ghost, [256]],
16 | [-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16
17 | [-1, 9, C3Ghost, [512]],
18 | [-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32
19 | [-1, 3, C3Ghost, [1024]],
20 | [-1, 1, SPPF, [1024, 5]], # 9,用spp层来替代sppf层,获得不同分辨率的鲁棒性
21 | ]
22 |
23 | head:
24 | [[-1, 1, DWConv, [512, 1, 1]],
25 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
26 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
27 | [-1, 3, C3_seblock, [512, False]], # 13
28 |
29 | [-1, 1, DWConv, [256, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
32 | [-1, 3, C3_seblock, [256, False]], # 17 (P3/8-small)
33 |
34 | [-1, 1, DWConv, [256, 3, 2]],
35 | [[-1, 14], 1, Concat, [1]], # cat head P4
36 | [-1, 3, C3_seblock, [512, False]], # 20 (P4/16-medium)
37 |
38 | [-1, 1, GhostConv, [512, 3, 2]],
39 | [[-1, 10], 1, Concat, [1]], # cat head P5
40 | [-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large)
41 |
42 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
43 | ]
--------------------------------------------------------------------------------
/models/yolov5l.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.0 # model depth multiple
6 | width_multiple: 1.0 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3, [1024]],
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/yolov5l_2.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.0 # model depth multiple
6 | width_multiple: 1.0 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, DWConv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, DWConv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, DWConv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, DWConv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3, [1024]],
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 head
28 | head:
29 | [[-1, 1, DWConv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 13
33 |
34 | [-1, 1, DWConv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, DWConv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, DWConv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/yolov5m.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.67 # model depth multiple
6 | width_multiple: 0.75 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3, [1024]],
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/yolov5m_2.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.67 # model depth multiple
6 | width_multiple: 0.75 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, DWConv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, DWConv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, DWConv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, DWConv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3, [1024]],
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 head
28 | head:
29 | [[-1, 1, DWConv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 13
33 |
34 | [-1, 1, DWConv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, DWConv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, DWConv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/yolov5n.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.33 # model depth multiple
6 | width_multiple: 0.25 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3, [1024]],
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/yolov5s-ghost_dw.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.33 # model depth multiple
6 | width_multiple: 0.50 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3Ghost, [128]],
18 | [-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3Ghost, [256]],
20 | [-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3Ghost, [512]],
22 | [-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3Ghost, [1024]],
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 head
28 | head:
29 | [[-1, 1, GhostConv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3Ghost, [512, False]], # 13
33 |
34 | [-1, 1, GhostConv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, GhostConv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, GhostConv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/yolov5s-transformer_dw.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.33 # model depth multiple
6 | width_multiple: 0.50 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, DWConv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, DWConv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, DWConv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, DWConv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3TR, [1024]], # 9 <--- C3TR() Transformer module
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 head
28 | head:
29 | [[-1, 1, DWConv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 13
33 |
34 | [-1, 1, DWConv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, DWConv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, DWConv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/yolov5s.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.33 # model depth multiple
6 | width_multiple: 0.50 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, DWConv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, DWConv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, DWConv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, DWConv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3, [1024]],
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 head
28 | head:
29 | [[-1, 1, DWConv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 13
33 |
34 | [-1, 1, DWConv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, DWConv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, DWConv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/yolov5s_dw_se.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 3 # number of classes
5 | depth_multiple: 0.33 # model depth multiple
6 | width_multiple: 0.50 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, DWConv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, DWConv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, DWConv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, DWConv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3, [1024]],
24 | [-1, 1, SElayer, [1024]],
25 | [-1, 1, SPPF, [1024, 5]], # 9
26 | ]
27 |
28 | # YOLOv5 v6.0 head
29 | head:
30 | [[-1, 1, DWConv, [512, 1, 1]],
31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
33 | [-1, 3, C3, [512, False]], # 13
34 |
35 | [-1, 1, DWConv, [256, 1, 1]],
36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
38 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
39 |
40 | [-1, 1, DWConv, [256, 3, 2]],
41 | [[-1, 14], 1, Concat, [1]], # cat head P4
42 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
43 |
44 | [-1, 1, DWConv, [512, 3, 2]],
45 | [[-1, 10], 1, Concat, [1]], # cat head P5
46 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
47 |
48 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
49 | ]
50 |
--------------------------------------------------------------------------------
/models/yolov5s_dw_se_c3ghost.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 3 # number of classes
5 | depth_multiple: 0.33 # model depth multiple
6 | width_multiple: 0.50 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, DWConv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3Ghost, [128]],
18 | [-1, 1, DWConv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3Ghost, [256]],
20 | [-1, 1, DWConv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3Ghost, [512]],
22 | [-1, 1, DWConv, [1024, 3, 2]], # 7-P5/32
23 | # [-1, 3, C3x, [1024]],
24 | [-1, 1, SElayer, [1024]],
25 | [-1, 1, SPPF, [1024, 5]], # 9
26 | ]
27 |
28 | # YOLOv5 v6.0 head
29 | head:
30 | [[-1, 1, DWConv, [512, 1, 1]],
31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
33 | [-1, 3, C3Ghost, [512, False]], # 13
34 |
35 | [-1, 1, DWConv, [256, 1, 1]],
36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
38 | [-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small)
39 |
40 | [-1, 1, DWConv, [256, 3, 2]],
41 | [[-1, 14], 1, Concat, [1]], # cat head P4
42 | [-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium)
43 |
44 | [-1, 1, DWConv, [512, 3, 2]],
45 | [[-1, 10], 1, Concat, [1]], # cat head P5
46 | [-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large)
47 |
48 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
49 | ]
50 |
--------------------------------------------------------------------------------
/models/yolov5s_dw_se_c3spp_c3ghost.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 3 # number of classes
5 | depth_multiple: 0.33 # model depth multiple
6 | width_multiple: 0.50 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, DWConv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3Ghost, [128]],
18 | [-1, 1, DWConv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3Ghost, [256]],
20 | [-1, 1, DWConv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3Ghost, [512]],
22 | [-1, 1, DWConv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3SPP, [1024]],
24 | [-1, 1, SElayer, [1024]],
25 | [-1, 1, SPPF, [1024, 5]], # 9
26 | ]
27 |
28 | # YOLOv5 v6.0 head
29 | head:
30 | [[-1, 1, DWConv, [512, 1, 1]],
31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
33 | [-1, 3, C3Ghost, [512, False]], # 13
34 |
35 | [-1, 1, DWConv, [256, 1, 1]],
36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
38 | [-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small)
39 |
40 | [-1, 1, DWConv, [256, 3, 2]],
41 | [[-1, 14], 1, Concat, [1]], # cat head P4
42 | [-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium)
43 |
44 | [-1, 1, DWConv, [512, 3, 2]],
45 | [[-1, 10], 1, Concat, [1]], # cat head P5
46 | [-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large)
47 |
48 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
49 | ]
50 |
--------------------------------------------------------------------------------
/models/yolov5s_dw_spp.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 3 # number of classes
5 | depth_multiple: 0.33 # model depth multiple
6 | width_multiple: 0.50 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, DWConv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, DWConv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, DWConv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, DWConv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3SPP, [1024]],
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 head
28 | head:
29 | [[-1, 1, DWConv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 13
33 |
34 | [-1, 1, DWConv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, DWConv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, DWConv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/yolov5s_dw_x.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 3 # number of classes
5 | depth_multiple: 0.33 # model depth multiple
6 | width_multiple: 0.50 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, DWConv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, DWConv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, DWConv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, DWConv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3x, [1024]],
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 head
28 | head:
29 | [[-1, 1, DWConv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 13
33 |
34 | [-1, 1, DWConv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, DWConv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, DWConv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/yolov5s_raw.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.33 # model depth multiple
6 | width_multiple: 0.50 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3, [1024]],
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/yolov5x.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.33 # model depth multiple
6 | width_multiple: 1.25 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | # first_layer = DepthwiseSeparableConv(in_channels=3, out_channels=64, kernel_size=6, stride=2, padding=2)
16 | [[-1, 1, DWConv, [64, 6, 2, 2]], # 0-P1/2
17 | [-1, 1, DWConv, [128, 3, 2]], # 1-P2/4
18 | [-1, 3, C3, [128]],
19 | [-1, 1, DWConv, [256, 3, 2]], # 3-P3/8
20 | [-1, 6, C3, [256]],
21 | [-1, 1, DWConv, [512, 3, 2]], # 5-P4/16
22 | [-1, 9, C3, [512]],
23 | [-1, 1, DWConv, [1024, 3, 2]], # 7-P5/32
24 | [-1, 3, C3, [1024]],
25 | [-1, 1, SPPF, [1024, 5]], # 9
26 | ]
27 | ssh -p 35089 root@connect.bjb1.seetacloud.com
28 | # YOLOv5 v6.0 head
29 | head:
30 | [[-1, 1, DWConv, [512, 1, 1]],
31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
33 | [-1, 3, C3, [512, False]], # 13
34 |
35 | [-1, 1, DWConv, [256, 1, 1]],
36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
38 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
39 |
40 | [-1, 1, DWConv, [256, 3, 2]],
41 | [[-1, 14], 1, Concat, [1]], # cat head P4
42 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
43 |
44 | [-1, 1, DWConv, [512, 3, 2]],
45 | [[-1, 10], 1, Concat, [1]], # cat head P5
46 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
47 |
48 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
49 | ]
50 |
--------------------------------------------------------------------------------
/models/yolov5x_raw.yaml:
--------------------------------------------------------------------------------
1 | # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.33 # model depth multiple
6 | width_multiple: 1.25 # layer channel multiple
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 v6.0 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 6, C3, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 3, C3, [1024]],
24 | [-1, 1, SPPF, [1024, 5]], # 9
25 | ]
26 |
27 | # YOLOv5 v6.0 head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/yolov5_gradcam.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | warnings.filterwarnings('ignore')
3 | warnings.simplefilter('ignore')
4 | import torch, yaml, cv2, os, shutil
5 | import numpy as np
6 | np.random.seed(0)
7 | import matplotlib.pyplot as plt
8 | from tqdm import trange
9 | from PIL import Image
10 | from models.yolo import Model
11 | from utils.general import intersect_dicts
12 | from utils.augmentations import letterbox
13 | from utils.general import xywh2xyxy
14 | from pytorch_grad_cam import GradCAMPlusPlus, GradCAM, XGradCAM
15 | from pytorch_grad_cam.utils.image import show_cam_on_image
16 | from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradients
17 |
18 | class yolov5_heatmap:
19 | def __init__(self, weight, cfg, device, method, layer, backward_type, conf_threshold, ratio):
20 | device = torch.device(device)
21 | ckpt = torch.load(weight)
22 | model_names = ckpt['model'].names
23 | csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32
24 | model = Model(cfg, ch=3, nc=len(model_names)).to(device)
25 | csd = intersect_dicts(csd, model.state_dict(), exclude=['anchor']) # intersect
26 | model.load_state_dict(csd, strict=False) # load
27 | # model.fuse().eval()
28 | model.fuse().eval()
29 | print(f'Transferred {len(csd)}/{len(model.state_dict())} items')
30 |
31 | target_layers = [eval(layer)]
32 | method = eval(method)
33 |
34 | colors = np.random.uniform(0, 255, size=(len(model_names), 3)).astype(np.float16)
35 | self.__dict__.update(locals())
36 |
37 | def post_process(self, result):
38 | logits_ = result[..., 4:]
39 | boxes_ = result[..., :4]
40 | sorted, indices = torch.sort(logits_[..., 0], descending=True)
41 | return logits_[0][indices[0]], xywh2xyxy(boxes_[0][indices[0]]).cpu().detach().numpy()
42 |
43 | def draw_detections(self, box, color, name, img):
44 | xmin, ymin, xmax, ymax = list(map(int, list(box)))
45 | cv2.rectangle(img, (xmin, ymin), (xmax, ymax), tuple(int(x) for x in color), 2)
46 | cv2.putText(img, str(name), (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, tuple(int(x) for x in color), 2, lineType=cv2.LINE_AA)
47 | return img
48 |
49 | def __call__(self, img_path, save_path):
50 | # remove dir if exist
51 | if os.path.exists(save_path):
52 | shutil.rmtree(save_path)
53 | # make dir if not exist
54 | os.makedirs(save_path, exist_ok=True)
55 |
56 | # img process
57 | img = cv2.imread(img_path)
58 | img = letterbox(img)[0]
59 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
60 | img = np.float32(img) / 255.0
61 | tensor = torch.from_numpy(np.transpose(img, axes=[2, 0, 1])).unsqueeze(0).to(self.device)
62 |
63 | # init ActivationsAndGradients
64 | grads = ActivationsAndGradients(self.model, self.target_layers, reshape_transform=None)
65 |
66 | # get ActivationsAndResult
67 | result = grads(tensor)
68 | activations = grads.activations[0].cpu().detach().numpy()
69 |
70 | # postprocess to yolo output
71 | post_result, post_boxes = self.post_process(result[0])
72 | for i in trange(int(post_result.size(0) * self.ratio)):
73 | if post_result[i][0] < self.conf_threshold:
74 | break
75 |
76 | self.model.zero_grad()
77 | if self.backward_type == 'conf':
78 | post_result[i, 0].backward(retain_graph=True)
79 | else:
80 | # get max probability for this prediction
81 | score = post_result[i, 1:].max()
82 | score.backward(retain_graph=True)
83 |
84 | # process heatmap
85 | gradients = grads.gradients[0]
86 | b, k, u, v = gradients.size()
87 | weights = self.method.get_cam_weights(self.method, None, None, None, activations, gradients.detach().numpy())
88 | weights = weights.reshape((b, k, 1, 1))
89 | saliency_map = np.sum(weights * activations, axis=1)
90 | saliency_map = np.squeeze(np.maximum(saliency_map, 0))
91 | saliency_map = cv2.resize(saliency_map, (tensor.size(3), tensor.size(2)))
92 | saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max()
93 | if (saliency_map_max - saliency_map_min) == 0:
94 | continue
95 | saliency_map = (saliency_map - saliency_map_min) / (saliency_map_max - saliency_map_min)
96 |
97 | # add heatmap and box to image
98 | cam_image = show_cam_on_image(img.copy(), saliency_map, use_rgb=True)
99 | cam_image = self.draw_detections(post_boxes[i], self.colors[int(post_result[i, 1:].argmax())], f'{self.model_names[int(post_result[i, 1:].argmax())]} {post_result[i][0]:.2f}', cam_image)
100 | cam_image = Image.fromarray(cam_image)
101 | cam_image.save(f'{save_path}/{i}.png')
102 |
103 | def get_params():
104 | params = {
105 | 'weight': '/root/yolov5/runs/train/s_w16_b32_e200/weights/best.pt',
106 | 'cfg': 'models/yolov5s.yaml',
107 | 'device': 'cuda:0',
108 | 'method': 'XGradCAM', # GradCAMPlusPlus, GradCAM, XGradCAM
109 | 'layer': 'model.model[-2]',
110 | 'backward_type': 'class', # class or conf
111 | 'conf_threshold': 0.6, # 0.6
112 | 'ratio': 0.02 # 0.02-0.1
113 | }
114 | return params
115 |
116 | if __name__ == '__main__':
117 | model = yolov5_heatmap(**get_params())
118 | model("/root/autodl-tmp/datasets_new/images/val/000_br0.7_th0.05.jpg", 'result')
119 |
--------------------------------------------------------------------------------