├── .gitignore ├── .idea └── .gitignore ├── GUI ├── GUI.py ├── V_GUI.py └── main_GUI.py ├── Net ├── LPRNet.py ├── colorNet.py └── plateNet.py ├── README.md ├── Test ├── plate_color_test.py └── plate_ocr_test.py ├── UI ├── background.jpg └── car.png ├── alphabets.py ├── cvtorchvision ├── __init__.py └── cvtransforms │ ├── __init__.py │ ├── cvfunctional.py │ └── cvtransforms.py ├── data ├── coco.yaml ├── hyp.finetune.yaml ├── hyp.scratch.yaml ├── plate.yaml └── scripts │ └── get_.sh ├── detect_plate.py ├── hubconf.py ├── imgs_test ├── 1.jpg ├── 2.jpg ├── 3.jpg ├── 4.jpg ├── 5.jpg └── 6.jpg ├── lib ├── config │ ├── plate_color.yaml │ └── plate_ocr.yaml ├── core │ └── function.py ├── dataset │ ├── __init__.py │ └── _plate.py ├── models │ └── crnn.py └── utils │ ├── imutils.py │ └── utils.py ├── models ├── __init__.py ├── common.py ├── experimental.py ├── yolo.py ├── yolov5l-0.5.yaml ├── yolov5l.yaml ├── yolov5l6.yaml ├── yolov5m.yaml ├── yolov5m6.yaml ├── yolov5n.yaml ├── yolov5n6.yaml ├── yolov5s.yaml └── yolov5s6.yaml ├── onnx ├── export_pt.py ├── export_pth.py ├── onnx_detect.py └── openvino_infer.py ├── plateLabel.py ├── readme_imgs ├── 1.png ├── 2.png ├── 3.png ├── 4.png ├── 5.png ├── 6.png ├── main.png └── video.png ├── test.py ├── torch2trt ├── speed.py └── trt_model.py ├── train_color.py ├── train_ocr.py ├── train_yolo.py ├── utils ├── __init__.py ├── activations.py ├── autoanchor.py ├── crpd_process.py ├── cv_puttext.py ├── datasets.py ├── double_plate_split_merge.py ├── general.py ├── google_utils.py ├── log_dataset.py ├── loss.py ├── metrics.py ├── plate_rec.py ├── plots.py ├── solve_datasets.py ├── torch_utils.py └── wandb_utils.py └── weights ├── plate_rec.pt ├── plate_rec_color.pth └── plate_rec_ocr.pth /.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore 2 | # 首先忽略所有的文件 3 | * 4 | # 但是不忽略目录 5 | !*/ 6 | # 忽略一些指定的目录名 7 | ut/ 8 | runs/ 9 | .vscode/ 10 | build/ 11 | result1/ 12 | result/ 13 | mytest/ 14 | mytest_double/ 15 | pretrained_model/ 16 | gangao/ 17 | extra/ 18 | ccpd/ 19 | *.pyc 20 | # 不忽略下面指定的文件类型 21 | !*.cpp 22 | !*.h 23 | !*.hpp 24 | !*.c 25 | !.gitignore 26 | !*.py 27 | !*.sh 28 | !*.npy 29 | !*.jpg 30 | !*.pt 31 | !*.npy 32 | !*.pth 33 | !*.png 34 | !*.yaml 35 | !*.md -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # 默认忽略的文件 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /GUI/GUI.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tkinter as tk 3 | from tkinter import * 4 | from tkinter.filedialog import askopenfilename 5 | import cv2 6 | from PIL import Image, ImageTk 7 | from PIL.Image import Resampling 8 | from detect_plate import detect_Recognition_plate, draw_result, init_model, load_model, get_second 9 | import torch 10 | 11 | 12 | class LicensePlateDetectorGUI: 13 | def __init__(self, root, width, height): 14 | self.result_img_tk = None 15 | self.original_img_tk = None 16 | self.root = root 17 | self.width = width 18 | self.height = height 19 | self.root.geometry("%dx%d+%d+%d" % (width, height, 200, 50)) 20 | self.root.title("车牌识别") 21 | icon_path = "../UI/car.ico" 22 | self.root.iconbitmap(default=icon_path) 23 | 24 | # 初始化模型 25 | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 26 | self.detect_model = load_model('../weights/plate_rec.pt', self.device) 27 | self.plate_rec_model = init_model(self.device, '../weights/plate_rec_color.pth', is_color=True) 28 | 29 | # 初始化其他变量 30 | self.image_path = '' 31 | 32 | # 创建标签 33 | Label(self.root, text='原图:', font=('微软雅黑', 18)).place(x=48, y=10) 34 | Label(self.root, text='识别结果:', font=('微软雅黑', 18)).place(x=640, y=10) 35 | 36 | # 创建用于显示原图的 Canvas 37 | self.original_canvas = Canvas(self.root, width=512, height=512, bg='white', relief='solid', borderwidth=1) 38 | self.original_canvas.place(x=48, y=50) 39 | 40 | # 创建用于显示检测结果的 Canvas 41 | self.result_canvas = Canvas(self.root, width=512, height=512, bg='white', relief='solid', borderwidth=1) 42 | self.result_canvas.place(x=640, y=50) 43 | 44 | # 创建选择图片的按钮 45 | self.select_image_button = tk.Button(self.root, text='选择图片', command=self.select_image, font=('微软雅黑', 12)) 46 | self.select_image_button.place(x=300, y=600, width=100, height=50) 47 | 48 | # 创建开始检测的按钮 49 | self.detect_button = tk.Button(self.root, text='开始识别', command=self.detect_license_plate, font=('微软雅黑', 12)) 50 | self.detect_button.place(x=550, y=600, width=100, height=50) 51 | 52 | # 创建清空按钮 53 | self.clear_button = tk.Button(self.root, text='清空图片', command=self.clear, font=('微软雅黑', 12)) 54 | self.clear_button.place(x=800, y=600, width=100, height=50) 55 | print("已启动!开始识别!") 56 | 57 | def select_image(self): 58 | sv = StringVar() 59 | sv.set(askopenfilename(title="选择图片文件", 60 | filetypes=[("Images", "*.png;*.xpm;*.jpg;*.bmp"), 61 | ("All Files", "*.*")])) 62 | self.image_path = Entry(self.root, state='readonly', textvariable=sv).get() 63 | print(self.image_path) 64 | self.original_canvas.delete('all') 65 | self.result_canvas.delete('all') 66 | self.show_selected_image() 67 | 68 | def detect_license_plate(self): 69 | if not self.image_path: 70 | print("请先选择图片文件") 71 | return 72 | 73 | # 加载图像并进行检测 74 | img = cv2.imread(self.image_path) 75 | dict_list = detect_Recognition_plate(self.detect_model, img, self.device, self.plate_rec_model, 640, 76 | is_color=True) 77 | result_img_save = draw_result(img, dict_list) 78 | result_img_rgb = cv2.cvtColor(result_img_save, cv2.COLOR_BGR2RGB) 79 | result_img = Image.fromarray(result_img_rgb) 80 | # 显示原图和检测结果图像 81 | self.show_result_images(result_img) 82 | save_path = '../imgs_test/img_test_result' 83 | if not os.path.exists(save_path): 84 | os.mkdir(save_path) 85 | img_name = os.path.basename(self.image_path) 86 | save_img_path = os.path.join(save_path, img_name) 87 | cv2.imwrite(save_img_path, result_img_save) 88 | 89 | def show_selected_image(self): 90 | if self.image_path: 91 | img_open = Image.open(self.image_path) 92 | img_open = img_open.resize((512, 512), Resampling.LANCZOS) 93 | 94 | self.original_img_tk = ImageTk.PhotoImage(img_open) 95 | self.original_canvas.create_image(258, 258, image=self.original_img_tk, anchor='center') 96 | 97 | def show_result_images(self, result_img): 98 | result_img = result_img.resize((512, 512), Resampling.LANCZOS) 99 | self.result_img_tk = ImageTk.PhotoImage(result_img) 100 | 101 | self.result_canvas.create_image(258, 258, image=self.result_img_tk, anchor='center') 102 | 103 | def clear(self): 104 | self.original_canvas.delete('all') 105 | self.result_canvas.delete('all') 106 | self.image_path = None 107 | 108 | 109 | if __name__ == "__main__": 110 | print("启动中...请稍后...") 111 | root = tk.Tk() 112 | width = 1200 113 | height = 700 114 | gui = LicensePlateDetectorGUI(root, width, height) 115 | root.mainloop() 116 | -------------------------------------------------------------------------------- /GUI/main_GUI.py: -------------------------------------------------------------------------------- 1 | import tkinter as tk 2 | from tkinter import Text, Frame, Toplevel 3 | from PIL import Image, ImageTk 4 | from GUI import LicensePlateDetectorGUI 5 | from V_GUI import * 6 | 7 | 8 | def video_detection(): 9 | mapp = QApplication(sys.argv) 10 | window = VideoBox() 11 | window.show() 12 | mapp.exec_() 13 | 14 | 15 | class Detect_main: 16 | def __init__(self, root, width, height): 17 | self.image_tk = None 18 | self.root = root 19 | self.root.title("车牌识别系统") 20 | self.root.geometry("%dx%d+%d+%d" % (width, height, 200, 50)) 21 | icon_path = "../UI/car.ico" 22 | self.root.iconbitmap(default=icon_path) 23 | 24 | # 左侧图片区域 25 | self.image_label = tk.Label(root, text='pic') 26 | self.image_label.place(x=0, y=0, width=625, height=600) 27 | # 设置图片路径 28 | image_path = "../UI/background.jpg" # 请替换为实际的图片路径 29 | self.set_image(image_path, width=625, height=600) # 设置 label 的大小为 600x600 30 | 31 | # 图片检测按钮 32 | image_button = tk.Button(self.root, text="图片识别", command=self.image_detection, font=('微软雅黑', 12)) 33 | image_button.place(x=662.5, y=125, width=100, height=75) 34 | 35 | # 视频检测按钮 36 | video_button = tk.Button(self.root, text="视频识别", command=video_detection, font=('微软雅黑', 12)) 37 | video_button.place(x=662.5, y=275, width=100, height=75) 38 | 39 | # 开发信息按钮 40 | info_button = tk.Button(self.root, text="开发信息", command=self.staff_info, font=('微软雅黑', 12)) 41 | info_button.place(x=662.5, y=425, width=100, height=75) 42 | 43 | def image_detection(self): 44 | image_detection_window = Toplevel(self.root) 45 | LicensePlateDetectorGUI(image_detection_window, 1200, 700) 46 | 47 | def staff_info(self): 48 | info_window = Toplevel(self.root) 49 | info_window.title("开发信息") 50 | info_window.geometry("%dx%d+%d+%d" % (500, 300, 300, 200)) 51 | 52 | frame = Frame(info_window, padx=10, pady=10) 53 | frame.pack(expand=True, fill='both') 54 | 55 | info_text = ( 56 | "\n\n开发人员:周豪捷、刘伯钰——北京理工大学2021级\n\n" 57 | "联系邮箱:midkingggg@gmail.com\n\n" 58 | "详细使用说明请看README.md" 59 | ) 60 | # 使用 Text 组件 61 | info_text_widget = Text(frame, wrap='word', font=('微软雅黑', 14), height=10, width=40) 62 | info_text_widget.insert(tk.END, info_text) 63 | info_text_widget.pack(expand=True, fill='both') 64 | # 禁止编辑 65 | info_text_widget.config(state=tk.DISABLED) 66 | 67 | def set_image(self, image_path, width, height): 68 | original_image = Image.open(image_path) 69 | # 将图片调整为 label 的大小 70 | resized_image = original_image.resize((width, height), Image.LANCZOS) 71 | # 将图片转换为 Tkinter PhotoImage 对象 72 | self.image_tk = ImageTk.PhotoImage(resized_image) 73 | # 在 Label 中显示图片 74 | self.image_label.config(image=self.image_tk, width=width, height=height) 75 | self.image_label.image = self.image_tk 76 | 77 | 78 | if __name__ == "__main__": 79 | root = tk.Tk() 80 | app = Detect_main(root, 800, 600) 81 | root.mainloop() 82 | -------------------------------------------------------------------------------- /Net/LPRNet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | 6 | class small_basic_block(nn.Module): 7 | def __init__(self, ch_in, ch_out): 8 | super(small_basic_block, self).__init__() 9 | self.block = nn.Sequential( 10 | nn.Conv2d(ch_in, ch_out // 4, kernel_size=1), 11 | nn.ReLU(), 12 | nn.Conv2d(ch_out // 4, ch_out // 4, kernel_size=(3, 1), padding=(1, 0)), 13 | nn.ReLU(), 14 | nn.Conv2d(ch_out // 4, ch_out // 4, kernel_size=(1, 3), padding=(0, 1)), 15 | nn.ReLU(), 16 | nn.Conv2d(ch_out // 4, ch_out, kernel_size=1), 17 | ) 18 | 19 | def forward(self, x): 20 | return self.block(x) 21 | 22 | 23 | class LPRNet(nn.Module): 24 | def __init__(self, lpr_max_len, num_classes, dropout_rate, export=False): 25 | super(LPRNet, self).__init__() 26 | self.lpr_max_len = lpr_max_len 27 | self.num_classes = num_classes 28 | self.export = export 29 | self.backbone = nn.Sequential( 30 | nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1), 31 | nn.BatchNorm2d(num_features=64), 32 | nn.ReLU(), 33 | nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 1, 1)), 34 | small_basic_block(ch_in=64, ch_out=128), 35 | nn.BatchNorm2d(num_features=128), 36 | nn.ReLU(), 37 | nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(2, 1, 2)), 38 | small_basic_block(ch_in=64, ch_out=256), 39 | nn.BatchNorm2d(num_features=256), 40 | nn.ReLU(), 41 | small_basic_block(ch_in=256, ch_out=256), 42 | nn.BatchNorm2d(num_features=256), 43 | nn.ReLU(), 44 | nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(4, 1, 2)), 45 | nn.Dropout(dropout_rate), 46 | nn.Conv2d(in_channels=64, out_channels=256, kernel_size=(1, 4), stride=1), 47 | nn.BatchNorm2d(num_features=256), 48 | nn.ReLU(), 49 | nn.Dropout(dropout_rate), 50 | nn.Conv2d(in_channels=256, out_channels=num_classes, kernel_size=(13, 1), stride=1), 51 | nn.BatchNorm2d(num_features=num_classes), 52 | nn.ReLU(), 53 | ) 54 | self.container = nn.Sequential( 55 | nn.Conv2d(in_channels=448 + self.num_classes, out_channels=self.num_classes, kernel_size=(1, 1), 56 | stride=(1, 1)), 57 | ) 58 | 59 | def forward(self, x): 60 | keep_features = list() 61 | for i, layer in enumerate(self.backbone.children()): 62 | x = layer(x) 63 | if i in [2, 6, 13, 22]: 64 | keep_features.append(x) 65 | 66 | global_context = list() 67 | for i, f in enumerate(keep_features): 68 | if i in [0, 1]: 69 | f = nn.AvgPool2d(kernel_size=5, stride=5)(f) 70 | if i in [2]: 71 | f = nn.AvgPool2d(kernel_size=(4, 10), stride=(4, 2))(f) 72 | f_pow = torch.pow(f, 2) 73 | f_mean = torch.mean(f_pow) 74 | f = torch.div(f, f_mean) 75 | global_context.append(f) 76 | 77 | x = torch.cat(global_context, 1) 78 | x = self.container(x) 79 | logits = torch.mean(x, dim=2) 80 | if self.export: 81 | logits = logits.transpose(2, 1) 82 | logits = logits.argmax(dim=2) 83 | else: 84 | logits = logits.permute(2, 0, 1) 85 | logits = F.log_softmax(logits, dim=2) 86 | return logits 87 | 88 | 89 | def build_lprnet(lpr_max_len=8, num_classes=78, dropout_rate=0.5, export=False): 90 | Net = LPRNet(lpr_max_len, num_classes, dropout_rate, export) 91 | return Net 92 | 93 | 94 | # if __name__ == "__main__": 95 | # model = build_lprnet(export=True) 96 | # x = torch.randn(1, 3, 24, 94) 97 | # out = model(x) 98 | # print(out.shape) 99 | -------------------------------------------------------------------------------- /Net/colorNet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | 6 | class myNet_ocr(nn.Module): 7 | def __init__(self, cfg=None, num_classes=78, export=False, color_num=None): 8 | super(myNet_ocr, self).__init__() 9 | if cfg is None: 10 | cfg = [32, 32, 64, 64, 'M', 128, 128, 'M', 196, 196, 'M', 256, 256] 11 | self.feature = self.make_layers(cfg, True) 12 | self.export = export 13 | self.color_num = color_num 14 | if self.color_num: 15 | self.gap = nn.AdaptiveAvgPool2d(output_size=1) 16 | self.color_classifier = nn.Conv2d(cfg[-1], 5, kernel_size=1, stride=1) 17 | self.color_bn = nn.BatchNorm2d(5) 18 | self.flatten = nn.Flatten() 19 | self.loc = nn.MaxPool2d((5, 2), (1, 1), (0, 1), ceil_mode=False) 20 | self.newCnn = nn.Conv2d(cfg[-1], num_classes, 1, 1) 21 | 22 | def make_layers(self, cfg, batch_norm=False): 23 | layers = [] 24 | in_channels = 3 25 | for i in range(len(cfg)): 26 | if i == 0: 27 | conv2d = nn.Conv2d(in_channels, cfg[i], kernel_size=5, stride=1) 28 | if batch_norm: 29 | layers += [conv2d, nn.BatchNorm2d(cfg[i]), nn.ReLU(inplace=True)] 30 | else: 31 | layers += [conv2d, nn.ReLU(inplace=True)] 32 | in_channels = cfg[i] 33 | else: 34 | if cfg[i] == 'M': 35 | layers += [nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)] 36 | else: 37 | conv2d = nn.Conv2d(in_channels, cfg[i], kernel_size=3, padding=(1, 1), stride=1) 38 | if batch_norm: 39 | layers += [conv2d, nn.BatchNorm2d(cfg[i]), nn.ReLU(inplace=True)] 40 | else: 41 | layers += [conv2d, nn.ReLU(inplace=True)] 42 | in_channels = cfg[i] 43 | return nn.Sequential(*layers) 44 | 45 | def forward(self, x): 46 | x = self.feature(x) 47 | if self.color_num: 48 | x_color = self.color_classifier(x) 49 | x_color = self.color_bn(x_color) 50 | x_color = self.gap(x_color) 51 | x_color = self.flatten(x_color) 52 | x = self.loc(x) 53 | x = self.newCnn(x) 54 | 55 | if self.export: 56 | conv = x.squeeze(2) # b *512 * width 57 | conv = conv.transpose(2, 1) # [w, b, c] 58 | if self.color_num: 59 | return conv, x_color 60 | return conv 61 | else: 62 | b, c, h, w = x.size() 63 | assert h == 1, "the height of conv must be 1" 64 | conv = x.squeeze(2) # b *512 * width 65 | conv = conv.permute(2, 0, 1) # [w, b, c] 66 | output = F.log_softmax(conv, dim=2) 67 | if self.color_num: 68 | return output, x_color 69 | return output 70 | 71 | 72 | class myNet_ocr_color(nn.Module): 73 | def __init__(self, cfg=None, num_classes=78, export=False, color_num=None): 74 | super(myNet_ocr_color, self).__init__() 75 | if cfg is None: 76 | cfg = [32, 32, 64, 64, 'M', 128, 128, 'M', 196, 196, 'M', 256, 256] 77 | self.feature = self.make_layers(cfg, True) 78 | self.export = export 79 | self.color_num = color_num 80 | self.conv_out_num = 12 81 | if self.color_num: 82 | self.conv1 = nn.Conv2d(cfg[-1], self.conv_out_num, kernel_size=3, stride=2) 83 | self.bn1 = nn.BatchNorm2d(self.conv_out_num) 84 | self.relu1 = nn.ReLU(inplace=True) 85 | self.conv2 = nn.Conv2d(self.conv_out_num, 12, kernel_size=3, stride=2) 86 | self.bn2 = nn.BatchNorm2d(12) 87 | self.relu2 = nn.ReLU(inplace=True) 88 | self.gap = nn.AdaptiveAvgPool2d(output_size=1) 89 | self.color_classifier = nn.Conv2d(12, self.color_num, kernel_size=1, stride=1) 90 | self.color_bn = nn.BatchNorm2d(self.color_num) 91 | self.flatten = nn.Flatten() 92 | self.loc = nn.MaxPool2d((5, 2), (1, 1), (0, 1), ceil_mode=False) 93 | self.newCnn = nn.Conv2d(cfg[-1], num_classes, 1, 1) 94 | 95 | def make_layers(self, cfg, batch_norm=False): 96 | layers = [] 97 | in_channels = 3 98 | for i in range(len(cfg)): 99 | if i == 0: 100 | conv2d = nn.Conv2d(in_channels, cfg[i], kernel_size=5, stride=1) 101 | if batch_norm: 102 | layers += [conv2d, nn.BatchNorm2d(cfg[i]), nn.ReLU(inplace=True)] 103 | else: 104 | layers += [conv2d, nn.ReLU(inplace=True)] 105 | in_channels = cfg[i] 106 | else: 107 | if cfg[i] == 'M': 108 | layers += [nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)] 109 | else: 110 | conv2d = nn.Conv2d(in_channels, cfg[i], kernel_size=3, padding=(1, 1), stride=1) 111 | if batch_norm: 112 | layers += [conv2d, nn.BatchNorm2d(cfg[i]), nn.ReLU(inplace=True)] 113 | else: 114 | layers += [conv2d, nn.ReLU(inplace=True)] 115 | in_channels = cfg[i] 116 | return nn.Sequential(*layers) 117 | 118 | def forward(self, x): 119 | x = self.feature(x) 120 | if self.color_num: 121 | x_color = self.conv1(x) 122 | x_color = self.bn1(x_color) 123 | x_color = self.relu1(x_color) 124 | x_color = self.color_classifier(x_color) 125 | x_color = self.color_bn(x_color) 126 | x_color = self.gap(x_color) 127 | x_color = self.flatten(x_color) 128 | x = self.loc(x) 129 | x = self.newCnn(x) 130 | 131 | if self.export: 132 | conv = x.squeeze(2) # b *512 * width 133 | conv = conv.transpose(2, 1) # [w, b, c] 134 | if self.color_num: 135 | return conv, x_color 136 | return conv 137 | else: 138 | b, c, h, w = x.size() 139 | assert h == 1, "the height of conv must be 1" 140 | conv = x.squeeze(2) # b *512 * width 141 | conv = conv.permute(2, 0, 1) # [w, b, c] 142 | output = F.log_softmax(conv, dim=2) 143 | if self.color_num: 144 | return output, x_color 145 | return output 146 | 147 | 148 | # if __name__ == '__main__': 149 | # x = torch.randn(1, 3, 48, 168) 150 | # cfg = [32, 'M', 64, 'M', 128, 'M', 256] 151 | # model = myNet_ocr_color(num_classes=78, export=True, cfg=cfg, color_num=5) 152 | # out, color = model(x) 153 | # print(out.shape) 154 | -------------------------------------------------------------------------------- /Net/plateNet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | 6 | class myNet_ocr(nn.Module): 7 | def __init__(self, cfg=None, num_classes=78, export=False, trt=False): 8 | super(myNet_ocr, self).__init__() 9 | if cfg is None: 10 | cfg = [32, 32, 64, 64, 'M', 128, 128, 'M', 196, 196, 'M', 256, 256] 11 | self.feature = self.make_layers(cfg, True) 12 | self.export = export 13 | self.trt = trt 14 | self.loc = nn.MaxPool2d((5, 2), (1, 1), (0, 1), ceil_mode=False) 15 | self.newCnn = nn.Conv2d(cfg[-1], num_classes, 1, 1) 16 | 17 | def make_layers(self, cfg, batch_norm=False): 18 | layers = [] 19 | in_channels = 3 20 | for i in range(len(cfg)): 21 | if i == 0: 22 | conv2d = nn.Conv2d(in_channels, cfg[i], kernel_size=5, stride=1) 23 | if batch_norm: 24 | layers += [conv2d, nn.BatchNorm2d(cfg[i]), nn.ReLU(inplace=True)] 25 | else: 26 | layers += [conv2d, nn.ReLU(inplace=True)] 27 | in_channels = cfg[i] 28 | else: 29 | if cfg[i] == 'M': 30 | layers += [nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)] 31 | else: 32 | conv2d = nn.Conv2d(in_channels, cfg[i], kernel_size=3, padding=(1, 1), stride=1) 33 | if batch_norm: 34 | layers += [conv2d, nn.BatchNorm2d(cfg[i]), nn.ReLU(inplace=True)] 35 | else: 36 | layers += [conv2d, nn.ReLU(inplace=True)] 37 | in_channels = cfg[i] 38 | return nn.Sequential(*layers) 39 | 40 | def forward(self, x): 41 | x = self.feature(x) 42 | x = self.loc(x) 43 | x = self.newCnn(x) 44 | if self.export: 45 | conv = x.squeeze(2) # b *512 * width 46 | conv = conv.transpose(2, 1) # [w, b, c] 47 | if self.trt: 48 | conv = conv.argmax(dim=2) 49 | conv = conv.float() 50 | return conv 51 | else: 52 | b, c, h, w = x.size() 53 | assert h == 1, "the height of conv must be 1" 54 | conv = x.squeeze(2) # b *512 * width 55 | conv = conv.permute(2, 0, 1) # [w, b, c] 56 | output = F.log_softmax(conv, dim=2) 57 | 58 | return output 59 | 60 | 61 | if __name__ == '__main__': 62 | x = torch.randn(1, 3, 48, 168) 63 | cfg = [32, 'M', 64, 'M', 128, 'M', 256] 64 | model = myNet_ocr(num_classes=78, export=True, cfg=cfg) 65 | out = model(x) 66 | print(out.shape) 67 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 项目说明 2 | **本项目为大三的CV结课作业。** 3 | 该车牌识别系统分为 **车牌检测、文字识别、颜色识别** 三部分。 4 | 5 | ## 效果图 6 | 7 | - **主界面** 8 | 9 | ![主界面](readme_imgs/main.png) 10 | 11 | - **车牌检测** 12 | - 单层蓝牌 13 | ![车牌检测](readme_imgs/1.png) 14 | - 单层黄牌 15 | ![车牌检测](readme_imgs/2.png) 16 | - 警用车牌 17 | ![车牌检测](readme_imgs/3.png) 18 | - 单层绿牌 19 | ![车牌检测](readme_imgs/4.png) 20 | - 粤港澳黑牌 21 | ![车牌检测](readme_imgs/5.png) 22 | - 双层黄牌 23 | ![车牌检测](readme_imgs/6.png) 24 | 25 | - **视频检测** 26 | 27 | ![视频检测](readme_imgs/video.png) -------------------------------------------------------------------------------- /Test/plate_color_test.py: -------------------------------------------------------------------------------- 1 | from Net.colorNet import myNet_ocr_color 2 | import torch 3 | import cv2 4 | import numpy as np 5 | import os 6 | import time 7 | import argparse 8 | from alphabets import plate_chr 9 | 10 | 11 | def cv_imread(path): 12 | img = cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1) 13 | return img 14 | 15 | 16 | def allFilePath(rootPath, allFIleList): 17 | fileList = os.listdir(rootPath) 18 | for temp in fileList: 19 | if os.path.isfile(os.path.join(rootPath, temp)): 20 | allFIleList.append(os.path.join(rootPath, temp)) 21 | else: 22 | allFilePath(os.path.join(rootPath, temp), allFIleList) 23 | 24 | 25 | color = ['黑色', '蓝色', '绿色', '白色', '黄色'] 26 | mean_value, std_value = (0.588, 0.193) 27 | 28 | 29 | def decodePlate(preds): 30 | pre = 0 31 | newPreds = [] 32 | for i in range(len(preds)): 33 | if preds[i] != 0 and preds[i] != pre: 34 | newPreds.append(preds[i]) 35 | pre = preds[i] 36 | return newPreds 37 | 38 | 39 | def image_processing(img, device, img_size): 40 | img_h, img_w = img_size 41 | img = cv2.resize(img, (img_w, img_h)) 42 | img = img.astype(np.float32) 43 | img = (img / 255. - mean_value) / std_value 44 | img = img.transpose([2, 0, 1]) 45 | img = torch.from_numpy(img) 46 | img = img.to(device) 47 | img = img.view(1, *img.size()) 48 | return img 49 | 50 | 51 | def get_plate_result(img, device, model, img_size): 52 | input = image_processing(img, device, img_size) 53 | preds, preds_color = model(input) 54 | preds = preds.argmax(dim=2) 55 | preds_color = preds_color.argmax() 56 | preds_color = preds_color.item() 57 | preds = preds.view(-1).detach().cpu().numpy() 58 | newPreds = decodePlate(preds) 59 | plate = "" 60 | for i in newPreds: 61 | plate += plate_chr[int(i)] 62 | return plate, color[preds_color] 63 | 64 | 65 | def init_model(device, model_path): 66 | check_point = torch.load(model_path, map_location=device) 67 | model_state = check_point['state_dict'] 68 | cfg = check_point['cfg'] 69 | model = myNet_ocr_color(num_classes=len(plate_chr), export=True, cfg=cfg, color_num=5) 70 | model.load_state_dict(model_state, strict=False) 71 | model.to(device) 72 | model.eval() 73 | return model 74 | 75 | 76 | if __name__ == '__main__': 77 | parser = argparse.ArgumentParser() 78 | parser.add_argument('--model_path', type=str, default='../weights/plate_rec_color.pth', 79 | help='model.pt path(s)') 80 | parser.add_argument('--image_path', type=str, default='../license_imgs', 81 | help='source') 82 | parser.add_argument('--img_h', type=int, default=48, help='height') 83 | parser.add_argument('--img_w', type=int, default=168, help='width') 84 | parser.add_argument('--LPRNet', action='store_true', help='use LPRNet') 85 | parser.add_argument('--acc', type=bool, default='True', help=' get accuracy') 86 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 87 | opt = parser.parse_args() 88 | img_size = (opt.img_h, opt.img_w) 89 | model = init_model(device, opt.model_path) 90 | if os.path.isfile(opt.image_path): 91 | right = 0 92 | begin = time.time() 93 | img = cv_imread(opt.image_path) 94 | if img.shape[-1] != 3: 95 | img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) 96 | plate, plate_color = get_plate_result(img, device, model, img_size) 97 | print(plate, plate_color) 98 | elif opt.acc: 99 | file_list = [] 100 | right = 0 101 | allFilePath(opt.image_path, file_list) 102 | for pic_ in file_list: 103 | 104 | try: 105 | pic_name = os.path.basename(pic_) 106 | img = cv_imread(pic_) 107 | if img.shape[-1] != 3: 108 | img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) 109 | plate, plate_color = get_plate_result(img, device, model, img_size) 110 | plate_ori = pic_.split(os.sep)[-1].split('_')[0] 111 | if plate == plate_ori: 112 | right += 1 113 | else: 114 | print(plate_ori, "rec as ---> ", plate, pic_, plate_color) 115 | except: 116 | print("error") 117 | print("sum:%d ,right:%d , accuracy: %f" % (len(file_list), right, right / len(file_list))) 118 | else: 119 | file_list = [] 120 | allFilePath(opt.image_path, file_list) 121 | for pic_ in file_list: 122 | try: 123 | pic_name = os.path.basename(pic_) 124 | img = cv_imread(pic_) 125 | if img.shape[-1] != 3: 126 | img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) 127 | plate, plate_color = get_plate_result(img, device, model) 128 | print(plate, plate_color, pic_) 129 | except: 130 | print("error") 131 | -------------------------------------------------------------------------------- /Test/plate_ocr_test.py: -------------------------------------------------------------------------------- 1 | from Net.plateNet import myNet_ocr 2 | import torch 3 | import cv2 4 | import numpy as np 5 | import os 6 | import time 7 | import argparse 8 | from alphabets import plate_chr 9 | 10 | 11 | def cv_imread(path): 12 | img = cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1) 13 | return img 14 | 15 | 16 | def allFilePath(rootPath, allFIleList): 17 | fileList = os.listdir(rootPath) 18 | for temp in fileList: 19 | if os.path.isfile(os.path.join(rootPath, temp)): 20 | allFIleList.append(os.path.join(rootPath, temp)) 21 | else: 22 | allFilePath(os.path.join(rootPath, temp), allFIleList) 23 | 24 | 25 | mean_value, std_value = (0.588, 0.193) 26 | 27 | 28 | def decodePlate(preds): 29 | pre = 0 30 | newPreds = [] 31 | for i in range(len(preds)): 32 | if preds[i] != 0 and preds[i] != pre: 33 | newPreds.append(preds[i]) 34 | pre = preds[i] 35 | return newPreds 36 | 37 | 38 | def image_processing(img, device, img_size): 39 | img_h, img_w = img_size 40 | img = cv2.resize(img, (img_w, img_h)) 41 | img = img.astype(np.float32) 42 | img = (img / 255. - mean_value) / std_value 43 | img = img.transpose([2, 0, 1]) 44 | img = torch.from_numpy(img) 45 | img = img.to(device) 46 | img = img.view(1, *img.size()) 47 | return img 48 | 49 | 50 | def get_plate_result(img, device, model, img_size): 51 | input = image_processing(img, device, img_size) 52 | preds = model(input) 53 | preds = preds.argmax(dim=2) 54 | preds = preds.view(-1).detach().cpu().numpy() 55 | newPreds = decodePlate(preds) 56 | plate = "" 57 | for i in newPreds: 58 | plate += plate_chr[int(i)] 59 | return plate 60 | 61 | 62 | def init_model(device, model_path): 63 | check_point = torch.load(model_path, map_location=device) 64 | model_state = check_point['state_dict'] 65 | cfg = check_point['cfg'] 66 | model = myNet_ocr(num_classes=len(plate_chr), export=True, cfg=cfg) 67 | model.load_state_dict(model_state) 68 | model.to(device) 69 | model.eval() 70 | return model 71 | 72 | 73 | if __name__ == '__main__': 74 | parser = argparse.ArgumentParser() 75 | parser.add_argument('--model_path', type=str, default='../weights/plate_rec_ocr.pth', help='model.pt path(s)') 76 | parser.add_argument('--image_path', type=str, default='../license_imgs', 77 | help='source') 78 | parser.add_argument('--img_h', type=int, default=48, help='height') 79 | parser.add_argument('--img_w', type=int, default=168, help='width') 80 | parser.add_argument('--LPRNet', action='store_true', help='use LPRNet') 81 | parser.add_argument('--acc', type=bool, default='True', help=' get accuracy') 82 | device = torch.device("cpu") 83 | opt = parser.parse_args() 84 | img_size = (opt.img_h, opt.img_w) 85 | model = init_model(device, opt.model_path) 86 | if os.path.isfile(opt.image_path): 87 | right = 0 88 | begin = time.time() 89 | img = cv_imread(opt.image_path) 90 | if img.shape[-1] != 3: 91 | img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) 92 | plate = get_plate_result(img, device, model, img_size) 93 | print(plate) 94 | elif opt.acc: 95 | file_list = [] 96 | right = 0 97 | allFilePath(opt.image_path, file_list) 98 | for pic_ in file_list: 99 | pic_name = os.path.basename(pic_) 100 | img = cv_imread(pic_) 101 | if img.shape[-1] != 3: 102 | img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) 103 | plate = get_plate_result(img, device, model, img_size) 104 | plate_ori = pic_.split('/')[-1].split('_')[0] 105 | if plate == plate_ori: 106 | right += 1 107 | else: 108 | print(plate_ori, "rec as ---> ", plate, pic_) 109 | print("sum:%d ,right:%d , accuracy: %f" % (len(file_list), right, right / len(file_list))) 110 | else: 111 | file_list = [] 112 | allFilePath(opt.image_path, file_list) 113 | for pic_ in file_list: 114 | try: 115 | pic_name = os.path.basename(pic_) 116 | img = cv_imread(pic_) 117 | if img.shape[-1] != 3: 118 | img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) 119 | plate = get_plate_result(img, device, model) 120 | print(plate, pic_name) 121 | except: 122 | print("error") 123 | -------------------------------------------------------------------------------- /UI/background.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S2mple1/License_Plate_Detection/60e0cef2f545f799497810c2bf0ae47a4c4a9b1e/UI/background.jpg -------------------------------------------------------------------------------- /UI/car.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S2mple1/License_Plate_Detection/60e0cef2f545f799497810c2bf0ae47a4c4a9b1e/UI/car.png -------------------------------------------------------------------------------- /alphabets.py: -------------------------------------------------------------------------------- 1 | plateName = "#京沪津渝冀晋蒙辽吉黑苏浙皖闽赣鲁豫鄂湘粤桂琼川贵云藏陕甘青宁新学警港澳挂使领民航0123456789ABCDEFGHJKLMNPQRSTUVWXYZ危险品" 2 | plate_chr = "#京沪津渝冀晋蒙辽吉黑苏浙皖闽赣鲁豫鄂湘粤桂琼川贵云藏陕甘青宁新学警港澳挂使领民航危0123456789ABCDEFGHJKLMNPQRSTUVWXYZ险品" 3 | print(len(plateName)) 4 | -------------------------------------------------------------------------------- /cvtorchvision/__init__.py: -------------------------------------------------------------------------------- 1 | from cvtorchvision import cvtransforms -------------------------------------------------------------------------------- /cvtorchvision/cvtransforms/__init__.py: -------------------------------------------------------------------------------- 1 | from .cvtransforms import * 2 | -------------------------------------------------------------------------------- /data/coco.yaml: -------------------------------------------------------------------------------- 1 | # COCO 2017 dataset http://cocodataset.org 2 | # Train command: python train_yolo.py --data coco.yaml 3 | # Default dataset location is next to /yolov5: 4 | # /parent_folder 5 | # /coco 6 | # /yolov5 7 | 8 | 9 | # download command/URL (optional) 10 | download: bash data/scripts/get_coco.sh 11 | 12 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] 13 | train: ../coco/train2017.txt # 118287 images 14 | val: ../coco/val2017.txt # 5000 images 15 | test: ../coco/test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 16 | 17 | # number of classes 18 | nc: 80 19 | 20 | # class names 21 | names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 22 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 23 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 24 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 25 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 26 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 27 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 28 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 29 | 'hair drier', 'toothbrush' ] 30 | 31 | # Print classes 32 | # with open('data/coco.yaml') as f: 33 | # d = yaml.load(f, Loader=yaml.FullLoader) # dict 34 | # for i, x in enumerate(d['names']): 35 | # print(i, x) 36 | -------------------------------------------------------------------------------- /data/hyp.finetune.yaml: -------------------------------------------------------------------------------- 1 | # Hyperparameters for VOC finetuning 2 | # python train_yolo.py --batch 64 --model yolov5m.pt --data voc.yaml --img 512 --epochs 50 3 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials 4 | 5 | 6 | # Hyperparameter Evolution Results 7 | # Generations: 306 8 | # P R mAP.5 mAP.5:.95 box obj cls 9 | # Metrics: 0.6 0.936 0.896 0.684 0.0115 0.00805 0.00146 10 | 11 | lr0: 0.0032 12 | lrf: 0.12 13 | momentum: 0.843 14 | weight_decay: 0.00036 15 | warmup_epochs: 2.0 16 | warmup_momentum: 0.5 17 | warmup_bias_lr: 0.05 18 | box: 0.0296 19 | cls: 0.243 20 | cls_pw: 0.631 21 | obj: 0.301 22 | obj_pw: 0.911 23 | iou_t: 0.2 24 | anchor_t: 2.91 25 | fl_gamma: 0.0 26 | hsv_h: 0.0138 27 | hsv_s: 0.664 28 | hsv_v: 0.464 29 | degrees: 0.373 30 | translate: 0.245 31 | scale: 0.898 32 | shear: 0.602 33 | perspective: 0.0 34 | flipud: 0.00856 35 | fliplr: 0.5 36 | mosaic: 1.0 37 | mixup: 0.243 38 | -------------------------------------------------------------------------------- /data/hyp.scratch.yaml: -------------------------------------------------------------------------------- 1 | # Hyperparameters for COCO training from scratch 2 | # python train_yolo.py --batch 40 --cfg yolov5m.yaml --model '' --data coco.yaml --img 640 --epochs 300 3 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials 4 | 5 | 6 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 7 | lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf) 8 | momentum: 0.937 # SGD momentum/Adam beta1 9 | weight_decay: 0.0005 # optimizer weight decay 5e-4 10 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 11 | warmup_momentum: 0.8 # warmup initial momentum 12 | warmup_bias_lr: 0.1 # warmup initial bias lr 13 | box: 0.05 # box loss gain 14 | cls: 0.5 # cls loss gain 15 | landmark: 0.005 # landmark loss gain 16 | cls_pw: 1.0 # cls BCELoss positive_weight 17 | obj: 1.0 # obj loss gain (scale with pixels) 18 | obj_pw: 1.0 # obj BCELoss positive_weight 19 | iou_t: 0.20 # IoU training threshold 20 | anchor_t: 4.0 # anchor-multiple threshold 21 | # anchors: 3 # anchors per output layer (0 to ignore) 22 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 23 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 24 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 25 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 26 | degrees: 0.0 # image rotation (+/- deg) 27 | translate: 0.1 # image translation (+/- fraction) 28 | scale: 0.5 # image scale (+/- gain) 29 | shear: 0.5 # image shear (+/- deg) 30 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 31 | flipud: 0.0 # image flip up-down (probability) 32 | fliplr: 0.5 # image flip left-right (probability) 33 | mosaic: 0.5 # image mosaic (probability) 34 | mixup: 0.0 # image mixup (probability) 35 | -------------------------------------------------------------------------------- /data/plate.yaml: -------------------------------------------------------------------------------- 1 | # download command/URL (optional) 2 | #download: bash data/scripts/get_.sh 3 | 4 | train: data/datasets/images/train 5 | val: data/datasets/images/val 6 | 7 | # number of classes 8 | nc: 2 9 | 10 | # class names 11 | names: [ 'single','double' ] 12 | -------------------------------------------------------------------------------- /data/scripts/get_.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/ 3 | # Download command: bash data/scripts/get_.sh 4 | # Train command: python train_yolo.py --data voc.yaml 5 | # Default dataset location is next to /yolov5: 6 | # /parent_folder 7 | # /VOC 8 | # /yolov5 9 | 10 | start=$(date +%s) 11 | mkdir -p ../tmp 12 | cd ../tmp/ 13 | 14 | # Download/unzip images and labels 15 | d='.' # unzip directory 16 | url=https://github.com/ultralytics/yolov5/releases/download/v1.0/ 17 | f1=VOCtrainval_06-Nov-2007.zip # 446MB, 5012 images 18 | f2=VOCtest_06-Nov-2007.zip # 438MB, 4953 images 19 | f3=VOCtrainval_11-May-2012.zip # 1.95GB, 17126 images 20 | for f in $f3 $f2 $f1; do 21 | echo 'Downloading' $url$f '...' 22 | curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background 23 | done 24 | wait # finish background tasks 25 | 26 | end=$(date +%s) 27 | runtime=$((end - start)) 28 | echo "Completed in" $runtime "seconds" 29 | 30 | echo "Splitting dataset..." 31 | python3 - "$@" <train.txt 91 | cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >train.all.txt 92 | 93 | python3 - "$@" < %-20s, gt: %-20s' % (raw_pred, pred, gt)) 120 | 121 | print(n_correct) 122 | print(config.TEST.NUM_TEST * config.TEST.BATCH_SIZE_PER_GPU) 123 | accuracy = n_correct / sum 124 | print('Test loss: {:.4f}, accuray: {:.4f}'.format(losses.avg, accuracy)) 125 | 126 | if writer_dict: 127 | writer = writer_dict['writer'] 128 | global_steps = writer_dict['valid_global_steps'] 129 | writer.add_scalar('valid_acc', accuracy, global_steps) 130 | writer_dict['valid_global_steps'] = global_steps + 1 131 | 132 | return accuracy 133 | -------------------------------------------------------------------------------- /lib/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from ._plate import _plate 2 | 3 | 4 | def get_dataset(config): 5 | if config.DATASET.DATASET == "plate": 6 | return _plate 7 | else: 8 | raise NotImplemented() 9 | -------------------------------------------------------------------------------- /lib/dataset/_plate.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | import torch.utils.data as data 3 | import os 4 | import numpy as np 5 | import cv2 6 | from alphabets import plateName 7 | 8 | 9 | def cv_imread(path): 10 | img = cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1) 11 | return img 12 | 13 | 14 | class _plate(data.Dataset): 15 | def __init__(self, config, input_w=168, input_h=48, is_train=True): 16 | 17 | self.root = config.DATASET.ROOT 18 | self.is_train = is_train 19 | self.inp_h = config.MODEL.IMAGE_SIZE.H 20 | self.inp_w = config.MODEL.IMAGE_SIZE.W 21 | self.input_w = input_w 22 | self.input_h = input_h 23 | self.dataset_name = config.DATASET.DATASET 24 | 25 | self.mean = np.array(config.DATASET.MEAN, dtype=np.float32) 26 | self.std = np.array(config.DATASET.STD, dtype=np.float32) 27 | 28 | char_file = config.DATASET.CHAR_FILE 29 | char_dict = {num: char.strip() for num, char in enumerate(plateName)} 30 | char_dict[0] = "blank" 31 | txt_file = config.DATASET.JSON_FILE['train'] if is_train else config.DATASET.JSON_FILE['val'] 32 | 33 | self.labels = [] 34 | with open(txt_file, 'r', encoding='utf-8') as file: 35 | contents = file.readlines() 36 | for c in contents: 37 | c = c.strip(" \n") 38 | imgname = c.split(' ')[0] 39 | indices = c.split(' ')[1:] 40 | string = ''.join([char_dict[int(idx)] for idx in indices]) 41 | self.labels.append({imgname: string}) 42 | 43 | print("load {} images!".format(self.__len__())) 44 | 45 | def __len__(self): 46 | return len(self.labels) 47 | 48 | def __getitem__(self, idx): 49 | 50 | img_name = list(self.labels[idx].keys())[0] 51 | img = cv_imread(os.path.join(self.root, img_name)) 52 | if img.shape[-1] == 4: 53 | img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) 54 | img = cv2.resize(img, (self.input_w, self.input_h)) 55 | img = img.astype(np.float32) 56 | img = (img / 255. - self.mean) / self.std 57 | img = img.transpose([2, 0, 1]) 58 | 59 | return img, idx 60 | -------------------------------------------------------------------------------- /lib/models/crnn.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from torchvision import models 4 | 5 | 6 | class BidirectionalLSTM(nn.Module): 7 | def __init__(self, nIn, nHidden, nOut): 8 | super(BidirectionalLSTM, self).__init__() 9 | 10 | self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True) 11 | self.embedding = nn.Linear(nHidden * 2, nOut) 12 | 13 | def forward(self, input): 14 | recurrent, _ = self.rnn(input) 15 | T, b, h = recurrent.size() 16 | t_rec = recurrent.view(T * b, h) 17 | 18 | output = self.embedding(t_rec) # [T * b, nOut] 19 | output = output.view(T, b, -1) 20 | 21 | return output 22 | 23 | 24 | class CRNN(nn.Module): 25 | def __init__(self, imgH, nc, nclass, nh, n_rnn=2, leakyRelu=False): 26 | super(CRNN, self).__init__() 27 | assert imgH % 16 == 0, 'imgH has to be a multiple of 16' 28 | 29 | ks = [3, 3, 3, 3, 3, 3, 2] 30 | ps = [1, 1, 1, 1, 1, 1, 0] 31 | ss = [1, 1, 1, 1, 1, 1, 1] 32 | nm = [64, 128, 256, 256, 512, 512, 512] 33 | 34 | cnn = nn.Sequential() 35 | 36 | def convRelu(i, batchNormalization=False): 37 | nIn = nc if i == 0 else nm[i - 1] 38 | nOut = nm[i] 39 | cnn.add_module('conv{0}'.format(i), 40 | nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i])) 41 | if batchNormalization: 42 | cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut)) 43 | if leakyRelu: 44 | cnn.add_module('relu{0}'.format(i), 45 | nn.LeakyReLU(0.2, inplace=True)) 46 | else: 47 | cnn.add_module('relu{0}'.format(i), nn.ReLU(True)) 48 | 49 | convRelu(0) 50 | cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2)) # 64x16x64 51 | convRelu(1) 52 | cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2)) # 128x8x32 53 | convRelu(2, True) 54 | convRelu(3) 55 | cnn.add_module('pooling{0}'.format(2), 56 | nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # 256x4x16 57 | convRelu(4, True) 58 | convRelu(5) 59 | cnn.add_module('pooling{0}'.format(3), 60 | nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # 512x2x16 61 | convRelu(6, True) # 512x1x16 62 | 63 | self.cnn = cnn 64 | self.rnn = nn.Sequential( 65 | BidirectionalLSTM(512, nh, nh), 66 | BidirectionalLSTM(nh, nh, nclass)) 67 | self.newCnn = nn.Conv2d(512, nclass, 1, 1) 68 | 69 | def forward(self, input): 70 | 71 | # conv features 72 | conv = self.cnn(input) 73 | conv = self.newCnn(conv) 74 | b, c, h, w = conv.size() 75 | assert h == 1, "the height of conv must be 1" 76 | conv = conv.squeeze(2) # b *512 * width 77 | conv = conv.permute(2, 0, 1) # [w, b, c] 78 | # output = F.log_softmax(self.rnn(conv), dim=2) 79 | output = F.log_softmax(conv, dim=2) 80 | 81 | return output 82 | 83 | 84 | def weights_init(m): 85 | classname = m.__class__.__name__ 86 | if classname.find('Conv') != -1: 87 | m.weight.data.normal_(0.0, 0.02) 88 | elif classname.find('BatchNorm') != -1: 89 | m.weight.data.normal_(1.0, 0.02) 90 | m.bias.data.fill_(0) 91 | 92 | 93 | def get_crnn(config, export=False, cfg=None, num_class=78): 94 | model = myNet(num_classes=num_class, export=export, cfg=cfg) 95 | 96 | return model 97 | 98 | 99 | myCfg = [32, 'M', 64, 'M', 128, 'M', 256] 100 | 101 | 102 | class myNet(nn.Module): 103 | def __init__(self, cfg=None, num_classes=78, export=False): 104 | super(myNet, self).__init__() 105 | if cfg is None: 106 | cfg = [32, 32, 64, 64, 'M', 128, 128, 'M', 196, 196, 'M', 256, 256] 107 | self.feature = self.make_layers(cfg, True) 108 | self.export = export 109 | self.loc = nn.MaxPool2d((5, 2), (1, 1), (0, 1), ceil_mode=False) 110 | self.newCnn = nn.Conv2d(256, num_classes, 1, 1) 111 | 112 | def make_layers(self, cfg, batch_norm=False): 113 | layers = [] 114 | in_channels = 3 115 | for i in range(len(cfg)): 116 | if i == 0: 117 | conv2d = nn.Conv2d(in_channels, cfg[i], kernel_size=5, stride=1) 118 | if batch_norm: 119 | layers += [conv2d, nn.BatchNorm2d(cfg[i]), nn.ReLU(inplace=True)] 120 | else: 121 | layers += [conv2d, nn.ReLU(inplace=True)] 122 | in_channels = cfg[i] 123 | else: 124 | if cfg[i] == 'M': 125 | layers += [nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)] 126 | else: 127 | conv2d = nn.Conv2d(in_channels, cfg[i], kernel_size=3, padding=(1, 1), stride=1) 128 | if batch_norm: 129 | layers += [conv2d, nn.BatchNorm2d(cfg[i]), nn.ReLU(inplace=True)] 130 | else: 131 | layers += [conv2d, nn.ReLU(inplace=True)] 132 | in_channels = cfg[i] 133 | return nn.Sequential(*layers) 134 | 135 | def forward(self, x): 136 | x = self.feature(x) 137 | x = self.loc(x) 138 | x = self.newCnn(x) 139 | if self.export: # 推理模式 140 | conv = x.squeeze(2) # b *512 * width 141 | conv = conv.transpose(2, 1) # [w, b, c] 142 | conv = conv.argmax(dim=2) 143 | return conv 144 | else: # 训练模式 145 | b, c, h, w = x.size() 146 | assert h == 1, "the height of conv must be 1" 147 | conv = x.squeeze(2) # b *512 * width 148 | conv = conv.permute(2, 0, 1) # [w, b, c] 149 | # output = F.log_softmax(self.rnn(conv), dim=2) 150 | output = F.log_softmax(conv, dim=2) 151 | return output 152 | 153 | 154 | myCfg = [32, 32, 64, 64, 'M', 128, 128, 'M', 256, 256] 155 | 156 | 157 | class myNet1(nn.Module): 158 | def __init__(self, cfg=None, num_classes=78, export=False): 159 | super(myNet1, self).__init__() 160 | if cfg is None: 161 | cfg = myCfg 162 | self.feature = self.make_layers(cfg, True) 163 | self.export = export 164 | # self.classifier = nn.Linear(cfg[-1], num_classes) 165 | # self.loc = nn.MaxPool2d((2, 2), (5, 1), (0, 1),ceil_mode=True) 166 | self.loc = nn.MaxPool2d((5, 2), (1, 1), ceil_mode=False) 167 | self.newCnn = nn.Conv2d(256, num_classes, 1, 1) 168 | 169 | def make_layers(self, cfg, batch_norm=False): 170 | layers = [] 171 | in_channels = 3 172 | for i in range(len(cfg)): 173 | if i == 0: 174 | conv2d = nn.Conv2d(in_channels, cfg[i], kernel_size=5, stride=1) 175 | if batch_norm: 176 | layers += [conv2d, nn.BatchNorm2d(cfg[i]), nn.ReLU(inplace=True)] 177 | else: 178 | layers += [conv2d, nn.ReLU(inplace=True)] 179 | in_channels = cfg[i] 180 | else: 181 | if cfg[i] == 'M': 182 | layers += [nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)] 183 | else: 184 | conv2d = nn.Conv2d(in_channels, cfg[i], kernel_size=3, padding=(0, 1), stride=1) 185 | if batch_norm: 186 | layers += [conv2d, nn.BatchNorm2d(cfg[i]), nn.ReLU(inplace=True)] 187 | else: 188 | layers += [conv2d, nn.ReLU(inplace=True)] 189 | in_channels = cfg[i] 190 | return nn.Sequential(*layers) 191 | 192 | def forward(self, x): 193 | x = self.feature(x) 194 | x = self.loc(x) 195 | x = self.newCnn(x) 196 | if self.export: 197 | conv = x.squeeze(2) # b *512 * width 198 | conv = conv.transpose(2, 1) # [w, b, c] 199 | conv = conv.argmax(dim=2) 200 | return conv 201 | else: 202 | b, c, h, w = x.size() 203 | assert h == 1, "the height of conv must be 1" 204 | conv = x.squeeze(2) # b *512 * width 205 | conv = conv.permute(2, 0, 1) # [w, b, c] 206 | output = F.log_softmax(conv, dim=2) 207 | return output 208 | 209 | 210 | class caffeNetOcr(nn.Module): 211 | def __init__(self, num_classes=2, export=False): 212 | super(caffeNetOcr, self).__init__() 213 | self.conv1 = nn.Sequential( 214 | nn.Conv2d(3, 32, 3, 1, 1), 215 | nn.BatchNorm2d(32), 216 | nn.ReLU(), 217 | nn.MaxPool2d(3, 2, ceil_mode=False), 218 | ) 219 | 220 | self.conv2 = nn.Sequential( 221 | nn.Conv2d(32, 64, 3, 1, 1), 222 | nn.BatchNorm2d(64), 223 | nn.ReLU(), 224 | nn.MaxPool2d(3, 2, ceil_mode=False), 225 | ) 226 | 227 | self.conv3 = nn.Sequential( 228 | nn.Conv2d(64, 128, 3, 1, 1), 229 | nn.BatchNorm2d(128), 230 | nn.ReLU(), 231 | nn.MaxPool2d(3, 2, ceil_mode=False), 232 | ) 233 | 234 | self.conv4 = nn.Sequential( 235 | nn.Conv2d(128, 256, 3, 1, 1), 236 | nn.BatchNorm2d(256), 237 | nn.ReLU(), 238 | nn.MaxPool2d((2, 2), (2, 1), (0, 1), ceil_mode=False), 239 | ) 240 | 241 | self.conv5 = nn.Sequential( 242 | nn.Conv2d(256, 512, 3, 1, 1), 243 | nn.BatchNorm2d(512), 244 | nn.ReLU(), 245 | nn.MaxPool2d((2, 2), (2, 1), (0, 1), ceil_mode=False), 246 | ) 247 | 248 | self.conv6 = nn.Conv2d(512, num_classes, 1, 1) 249 | self.export = export 250 | 251 | # self.Linear1=nn.Linear(192*7*7,num_classes) 252 | 253 | def forward(self, x): 254 | x = self.conv1(x) 255 | # x=self.maxPool1(x) 256 | x = self.conv2(x) 257 | x = self.conv3(x) 258 | x = self.conv4(x) 259 | x = self.conv5(x) 260 | x = self.conv6(x) 261 | if self.export: 262 | conv = x.squeeze(2) # b *512 * width 263 | conv = conv.transpose(2, 1) # [w, b, c] 264 | conv = conv.argmax(dim=2) 265 | return conv 266 | else: 267 | b, c, h, w = x.size() 268 | assert h == 1, "the height of conv must be 1" 269 | conv = x.squeeze(2) # b *512 * width 270 | conv = conv.permute(2, 0, 1) # [w, b, c] 271 | output = F.log_softmax(conv, dim=2) 272 | return output 273 | 274 | return x 275 | 276 | 277 | class myNetRes50(nn.Module): 278 | def __init__(self, num_classes=78, export=False): 279 | super(myNetRes50, self).__init__() 280 | self.model = models.resnet50(pretrained=True) 281 | self.avgPool = nn.AvgPool2d((6, 1), (1, 1)) 282 | self.conv = nn.Conv2d(512, num_classes, 1, 1) 283 | self.export = export 284 | 285 | def forward(self, x): 286 | x = self.model.conv1(x) 287 | x = self.model.bn1(x) 288 | x = self.model.relu(x) 289 | x = self.model.maxpool(x) 290 | x = self.model.layer1(x) 291 | x = self.model.layer2(x) 292 | x = self.avgPool(x) 293 | x = self.conv(x) 294 | if self.export: 295 | conv = x.squeeze(2) # b *512 * width 296 | conv = conv.transpose(2, 1) # [w, b, c] 297 | conv = conv.argmax(dim=2) 298 | return conv 299 | else: 300 | b, c, h, w = x.size() 301 | assert h == 1, "the height of conv must be 1" 302 | conv = x.squeeze(2) # b *512 * width 303 | conv = conv.permute(2, 0, 1) # [w, b, c] 304 | output = F.log_softmax(conv, dim=2) 305 | return output 306 | 307 | 308 | # if __name__ == '__main__': 309 | # cfg = [32, 'M', 64, 'M', 128, 'M', 256] 310 | # model = myNet(num_classes=78, export=True, cfg=cfg) 311 | # input = Variable(torch.FloatTensor(1, 3, 48, 168)) 312 | # out = model(input) 313 | # print(out.size()) 314 | -------------------------------------------------------------------------------- /lib/utils/imutils.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | 3 | matplotlib.use('tkagg') 4 | import matplotlib.pyplot as plt 5 | import cv2 6 | import numpy as np 7 | 8 | pairs = [[i, i + 1] for i in range(16)] + \ 9 | [[i, i + 1] for i in range(17, 21)] + \ 10 | [[i, i + 1] for i in range(22, 26)] + \ 11 | [[i, i + 1] for i in range(36, 41)] + [[41, 36]] + \ 12 | [[i, i + 1] for i in range(42, 47)] + [[47, 42]] + \ 13 | [[i, i + 1] for i in range(27, 30)] + \ 14 | [[i, i + 1] for i in range(31, 35)] + \ 15 | [[i, i + 1] for i in range(48, 59)] + [[59, 48]] + \ 16 | [[i, i + 1] for i in range(60, 67)] + [[67, 60]] 17 | 18 | 19 | def show_joints(img, pts, show_idx=False, pairs=None): 20 | fig, ax = plt.subplots() 21 | ax.imshow(img) 22 | 23 | for i in range(pts.shape[0]): 24 | if pts[i, 2] > 0: 25 | ax.scatter(pts[i, 0], pts[i, 1], s=10, c='c', edgecolors='b', linewidth=0.3) 26 | if show_idx: 27 | plt.text(pts[i, 0], pts[i, 1], str(i)) 28 | if pairs is not None: 29 | for p in pairs: 30 | ax.plot(pts[p, 0], pts[p, 1], c='b', linewidth=0.3) 31 | 32 | plt.axis('off') 33 | plt.show() 34 | plt.close() 35 | 36 | 37 | def show_joints_heatmap(img, target): 38 | img = cv2.resize(img, target.shape[1:]) 39 | for i in range(target.shape[0]): 40 | t = target[i, :, :] 41 | plt.imshow(img, alpha=0.5) 42 | plt.imshow(t, alpha=0.5) 43 | plt.axis('off') 44 | plt.show() 45 | plt.close() 46 | 47 | 48 | def show_joints_boundary(img, target): 49 | img = cv2.resize(img, target.shape[1:]) 50 | for i in range(target.shape[0]): 51 | t = target[i, :, :] 52 | plt.imshow(img, alpha=0.5) 53 | plt.imshow(t, alpha=0.5) 54 | plt.axis('off') 55 | plt.show() 56 | plt.close() 57 | 58 | 59 | def show_joints_3d(predPts, pairs=None): 60 | ax = plt.subplot(111, projection='3d') 61 | 62 | view_angle = (-160, 30) 63 | if predPts.shape[1] > 2: 64 | ax.scatter(predPts[:, 2], predPts[:, 0], predPts[:, 1], s=5, c='c', marker='o', edgecolors='b', linewidths=0.5) 65 | if pairs is not None: 66 | for p in pairs: 67 | ax.plot(predPts[p, 2], predPts[p, 0], predPts[p, 1], c='b', linewidth=0.5) 68 | else: 69 | ax.scatter([0] * predPts.shape[0], predPts[:, 0], predPts[:, 1], s=10, marker='*') 70 | ax.set_xlabel('z', fontsize=10) 71 | ax.set_ylabel('x', fontsize=10) 72 | ax.set_zlabel('y', fontsize=10) 73 | ax.view_init(*view_angle) 74 | plt.show() 75 | plt.close() 76 | 77 | 78 | def save_plots(config, imgs, ppts_2d, ppts_3d, tpts_2d, tpts_3d, filename, nrows=4, ncols=4): 79 | mean = np.array(config.DATASET.MEAN, dtype=np.float32) 80 | std = np.array(config.DATASET.STD, dtype=np.float32) 81 | imgs = imgs.transpose(0, 2, 3, 1) 82 | imgs = (imgs * std + mean) * 255. 83 | imgs = imgs.astype(np.uint8) 84 | 85 | # plot 2d 86 | fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15, 15)) 87 | 88 | cnt = 0 89 | for i in range(nrows): 90 | for j in range(ncols): 91 | # Output a grid of images 92 | axes[i, j].imshow(imgs[cnt]) 93 | axes[i, j].scatter(ppts_2d[cnt, :, 0] * 4, ppts_2d[cnt, :, 1] * 4, s=10, c='c', edgecolors='k', linewidth=1) 94 | axes[i, j].scatter(tpts_2d[cnt, :, 0] * 4, tpts_2d[cnt, :, 1] * 4, s=10, c='r', edgecolors='k', linewidth=1) 95 | axes[i, j].axis('off') 96 | if pairs is not None: 97 | for p in pairs: 98 | axes[i, j].plot(ppts_2d[cnt, p, 0] * 4, ppts_2d[cnt, p, 1] * 4, c='b', linewidth=0.5) 99 | axes[i, j].plot(tpts_2d[cnt, p, 0] * 4, tpts_2d[cnt, p, 1] * 4, c='r', linewidth=0.5) 100 | cnt += 1 101 | plt.savefig(filename + '_2d.png') 102 | plt.close() 103 | 104 | # plot 3d 105 | fig = plt.figure(figsize=(15, 15)) 106 | for i in range(nrows * ncols): 107 | ax = fig.add_subplot(nrows, ncols, i + 1, projection='3d') 108 | ax.scatter(ppts_3d[i, :, 2], ppts_3d[i, :, 0], ppts_3d[i, :, 1], s=10, color='b', edgecolor='k', alpha=0.6) 109 | ax.scatter(tpts_3d[i, :, 2], tpts_3d[i, :, 0], tpts_3d[i, :, 1], s=10, color='r', edgecolor='k', alpha=0.6) 110 | ax.view_init(elev=205, azim=110) 111 | if pairs is not None: 112 | for p in pairs: 113 | ax.plot(ppts_3d[i, p, 2], ppts_3d[i, p, 0], ppts_3d[i, p, 1], c='b', linewidth=1) 114 | ax.plot(tpts_3d[i, p, 2], tpts_3d[i, p, 0], tpts_3d[i, p, 1], c='r', linewidth=1) 115 | plt.savefig(filename + '_3d.png') 116 | plt.close() 117 | -------------------------------------------------------------------------------- /lib/utils/utils.py: -------------------------------------------------------------------------------- 1 | import torch.optim as optim 2 | import time 3 | from pathlib import Path 4 | import torch 5 | 6 | 7 | def get_optimizer(config, model): 8 | optimizer = None 9 | 10 | if config.TRAIN.OPTIMIZER == "sgd": 11 | optimizer = optim.SGD( 12 | filter(lambda p: p.requires_grad, model.parameters()), 13 | lr=config.TRAIN.LR, 14 | momentum=config.TRAIN.MOMENTUM, 15 | weight_decay=config.TRAIN.WD, 16 | nesterov=config.TRAIN.NESTEROV 17 | ) 18 | elif config.TRAIN.OPTIMIZER == "adam": 19 | optimizer = optim.Adam( 20 | filter(lambda p: p.requires_grad, model.parameters()), 21 | lr=config.TRAIN.LR, 22 | ) 23 | elif config.TRAIN.OPTIMIZER == "rmsprop": 24 | optimizer = optim.RMSprop( 25 | filter(lambda p: p.requires_grad, model.parameters()), 26 | lr=config.TRAIN.LR, 27 | momentum=config.TRAIN.MOMENTUM, 28 | weight_decay=config.TRAIN.WD, 29 | ) 30 | 31 | return optimizer 32 | 33 | 34 | def create_log_folder(cfg, phase='train'): 35 | root_output_dir = Path(cfg.OUTPUT_DIR) 36 | # set up logger 37 | if not root_output_dir.exists(): 38 | print('=> creating {}'.format(root_output_dir)) 39 | root_output_dir.mkdir() 40 | 41 | dataset = cfg.DATASET.DATASET 42 | model = cfg.MODEL.NAME 43 | 44 | time_str = time.strftime('%Y-%m-%d-%H-%M') 45 | checkpoints_output_dir = root_output_dir / dataset / model / time_str / 'checkpoints' 46 | 47 | print('=> creating {}'.format(checkpoints_output_dir)) 48 | checkpoints_output_dir.mkdir(parents=True, exist_ok=True) 49 | 50 | tensorboard_log_dir = root_output_dir / dataset / model / time_str / 'log' 51 | print('=> creating {}'.format(tensorboard_log_dir)) 52 | tensorboard_log_dir.mkdir(parents=True, exist_ok=True) 53 | 54 | return {'chs_dir': str(checkpoints_output_dir), 'tb_dir': str(tensorboard_log_dir)} 55 | 56 | 57 | def get_batch_label(d, i): 58 | label = [] 59 | for idx in i: 60 | label.append(list(d.labels[idx].values())[0]) 61 | return label 62 | 63 | 64 | class strLabelConverter(object): 65 | """Convert between str and label. 66 | 67 | NOTE: 68 | Insert `blank` to the alphabet for CTC. 69 | 70 | Args: 71 | alphabet (str): set of the possible characters. 72 | ignore_case (bool, default=True): whether or not to ignore all of the case. 73 | """ 74 | 75 | def __init__(self, alphabet, ignore_case=False): 76 | self._ignore_case = ignore_case 77 | if self._ignore_case: 78 | alphabet = alphabet.lower() 79 | self.alphabet = alphabet + '-' # for `-1` index 80 | 81 | self.dict = {} 82 | for i, char in enumerate(alphabet): 83 | # NOTE: 0 is reserved for 'blank' required by wrap_ctc 84 | self.dict[char] = i + 1 85 | 86 | def encode(self, text): 87 | """Support batch or single str. 88 | 89 | Args: 90 | text (str or list of str): texts to convert. 91 | 92 | Returns: 93 | torch.IntTensor [length_0 + length_1 + ... length_{n - 1}]: encoded texts. 94 | torch.IntTensor [n]: length of each text. 95 | """ 96 | 97 | length = [] 98 | result = [] 99 | decode_flag = True if type(text[0]) == bytes else False 100 | 101 | for item in text: 102 | 103 | if decode_flag: 104 | item = item.decode('utf-8', 'strict') 105 | length.append(len(item)) 106 | for char in item: 107 | index = self.dict[char] 108 | result.append(index) 109 | text = result 110 | return (torch.IntTensor(text), torch.IntTensor(length)) 111 | 112 | def decode(self, t, length, raw=False): 113 | """Decode encoded texts back into strs. 114 | 115 | Args: 116 | torch.IntTensor [length_0 + length_1 + ... length_{n - 1}]: encoded texts. 117 | torch.IntTensor [n]: length of each text. 118 | 119 | Raises: 120 | AssertionError: when the texts and its length does not match. 121 | 122 | Returns: 123 | text (str or list of str): texts to convert. 124 | """ 125 | if length.numel() == 1: 126 | length = length[0] 127 | assert t.numel() == length, "text with length: {} does not match declared length: {}".format(t.numel(), 128 | length) 129 | if raw: 130 | return ''.join([self.alphabet[i - 1] for i in t]) 131 | else: 132 | char_list = [] 133 | for i in range(length): 134 | if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])): 135 | char_list.append(self.alphabet[t[i] - 1]) 136 | return ''.join(char_list) 137 | else: 138 | # batch mode 139 | assert t.numel() == length.sum(), "texts with length: {} does not match declared length: {}".format( 140 | t.numel(), length.sum()) 141 | texts = [] 142 | index = 0 143 | for i in range(length.numel()): 144 | l = length[i] 145 | texts.append( 146 | self.decode( 147 | t[index:index + l], torch.IntTensor([l]), raw=raw)) 148 | index += l 149 | return texts 150 | 151 | 152 | def get_char_dict(path): 153 | with open(path, 'rb') as file: 154 | char_dict = {num: char.strip().decode('gbk', 'ignore') for num, char in enumerate(file.readlines())} 155 | 156 | 157 | def model_info(model): # Plots a line-by-line description of a PyTorch model 158 | n_p = sum(x.numel() for x in model.parameters()) # number parameters 159 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients 160 | print('\n%5s %50s %9s %12s %20s %12s %12s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 161 | for i, (name, p) in enumerate(model.named_parameters()): 162 | name = name.replace('module_list.', '') 163 | print('%5g %50s %9s %12g %20s %12.3g %12.3g' % ( 164 | i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 165 | print('Model Summary: %g layers, %g parameters, %g gradients\n' % (i + 1, n_p, n_g)) 166 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | curPath = os.path.abspath(os.path.dirname(__file__)) 5 | sys.path.append(curPath) 6 | -------------------------------------------------------------------------------- /models/experimental.py: -------------------------------------------------------------------------------- 1 | # This file contains experimental modules 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | from common import Conv, DWConv 8 | from utils.google_utils import attempt_download 9 | 10 | 11 | class CrossConv(nn.Module): 12 | # Cross Convolution Downsample 13 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): 14 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut 15 | super(CrossConv, self).__init__() 16 | c_ = int(c2 * e) # hidden channels 17 | self.cv1 = Conv(c1, c_, (1, k), (1, s)) 18 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) 19 | self.add = shortcut and c1 == c2 20 | 21 | def forward(self, x): 22 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 23 | 24 | 25 | class Sum(nn.Module): 26 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 27 | def __init__(self, n, weight=False): # n: number of inputs 28 | super(Sum, self).__init__() 29 | self.weight = weight # apply model boolean 30 | self.iter = range(n - 1) # iter object 31 | if weight: 32 | self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer model 33 | 34 | def forward(self, x): 35 | y = x[0] # no weight 36 | if self.weight: 37 | w = torch.sigmoid(self.w) * 2 38 | for i in self.iter: 39 | y = y + x[i + 1] * w[i] 40 | else: 41 | for i in self.iter: 42 | y = y + x[i + 1] 43 | return y 44 | 45 | 46 | class GhostConv(nn.Module): 47 | # Ghost Convolution https://github.com/huawei-noah/ghostnet 48 | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups 49 | super(GhostConv, self).__init__() 50 | c_ = c2 // 2 # hidden channels 51 | self.cv1 = Conv(c1, c_, k, s, None, g, act) 52 | self.cv2 = Conv(c_, c_, 5, 1, None, c_, act) 53 | 54 | def forward(self, x): 55 | y = self.cv1(x) 56 | return torch.cat([y, self.cv2(y)], 1) 57 | 58 | 59 | class GhostBottleneck(nn.Module): 60 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet 61 | def __init__(self, c1, c2, k, s): 62 | super(GhostBottleneck, self).__init__() 63 | c_ = c2 // 2 64 | self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw 65 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw 66 | GhostConv(c_, c2, 1, 1, act=False)) # pw-linear 67 | self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), 68 | Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity() 69 | 70 | def forward(self, x): 71 | return self.conv(x) + self.shortcut(x) 72 | 73 | 74 | class MixConv2d(nn.Module): 75 | # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595 76 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): 77 | super(MixConv2d, self).__init__() 78 | groups = len(k) 79 | if equal_ch: # equal c_ per group 80 | i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices 81 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels 82 | else: # equal weight.numel() per group 83 | b = [c2] + [0] * groups 84 | a = np.eye(groups + 1, groups, k=-1) 85 | a -= np.roll(a, 1, axis=1) 86 | a *= np.array(k) ** 2 87 | a[0] = 1 88 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 89 | 90 | self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) 91 | self.bn = nn.BatchNorm2d(c2) 92 | self.act = nn.LeakyReLU(0.1, inplace=True) 93 | 94 | def forward(self, x): 95 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 96 | 97 | 98 | class Ensemble(nn.ModuleList): 99 | # Ensemble of models 100 | def __init__(self): 101 | super(Ensemble, self).__init__() 102 | 103 | def forward(self, x, augment=False): 104 | y = [] 105 | for module in self: 106 | y.append(module(x, augment)[0]) 107 | # y = torch.stack(y).max(0)[0] # max ensemble 108 | # y = torch.stack(y).mean(0) # mean ensemble 109 | y = torch.cat(y, 1) # nms ensemble 110 | return y, None # inference, train output 111 | 112 | 113 | def attempt_load(weights, map_location=None): 114 | # Loads an ensemble of models model=[a,b,c] or a single model model=[a] or model=a 115 | model = Ensemble() 116 | for w in weights if isinstance(weights, list) else [weights]: 117 | attempt_download(w) 118 | model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval()) # load FP32 model 119 | 120 | # Compatibility updates 121 | for m in model.modules(): 122 | if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: 123 | m.inplace = True # pytorch 1.7.0 compatibility 124 | elif type(m) is Conv: 125 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 126 | 127 | if len(model) == 1: 128 | return model[-1] # return model 129 | else: 130 | print('Ensemble created with %s\n' % weights) 131 | for k in ['names', 'stride']: 132 | setattr(model, k, getattr(model[-1], k)) 133 | return model # return ensemble 134 | -------------------------------------------------------------------------------- /models/yolov5l-0.5.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 0.5 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [4,5, 8,10, 13,16] # P3/8 9 | - [23,29, 43,55, 73,105] # P4/16 10 | - [146,217, 231,300, 335,433] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, StemBlock, [32, 3, 2]], # 0-P2/4 16 | [-1, 1, ShuffleV2Block, [128, 2]], # 1-P3/8 17 | [-1, 3, ShuffleV2Block, [128, 1]], # 2 18 | [-1, 1, ShuffleV2Block, [256, 2]], # 3-P4/16 19 | [-1, 7, ShuffleV2Block, [256, 1]], # 4 20 | [-1, 1, ShuffleV2Block, [512, 2]], # 5-P5/32 21 | [-1, 3, ShuffleV2Block, [512, 1]], # 6 22 | ] 23 | 24 | # YOLOv5 head 25 | head: 26 | [[-1, 1, Conv, [128, 1, 1]], 27 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 28 | [[-1, 4], 1, Concat, [1]], # cat backbone P4 29 | [-1, 1, C3, [128, False]], # 10 30 | 31 | [-1, 1, Conv, [128, 1, 1]], 32 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 33 | [[-1, 2], 1, Concat, [1]], # cat backbone P3 34 | [-1, 1, C3, [128, False]], # 14 (P3/8-small) 35 | 36 | [-1, 1, Conv, [128, 3, 2]], 37 | [[-1, 11], 1, Concat, [1]], # cat head P4 38 | [-1, 1, C3, [128, False]], # 17 (P4/16-medium) 39 | 40 | [-1, 1, Conv, [128, 3, 2]], 41 | [[-1, 7], 1, Concat, [1]], # cat head P5 42 | [-1, 1, C3, [128, False]], # 20 (P5/32-large) 43 | 44 | [[14, 17, 20], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 45 | ] 46 | 47 | -------------------------------------------------------------------------------- /models/yolov5l.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [4,5, 8,10, 13,16] # P3/8 9 | - [23,29, 43,55, 73,105] # P4/16 10 | - [146,217, 231,300, 335,433] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2 16 | [-1, 3, C3, [128]], 17 | [-1, 1, Conv, [256, 3, 2]], # 2-P3/8 18 | [-1, 9, C3, [256]], 19 | [-1, 1, Conv, [512, 3, 2]], # 4-P4/16 20 | [-1, 9, C3, [512]], 21 | [-1, 1, Conv, [1024, 3, 2]], # 6-P5/32 22 | [-1, 1, SPP, [1024, [3,5,7]]], 23 | [-1, 3, C3, [1024, False]], # 8 24 | ] 25 | 26 | # YOLOv5 head 27 | head: 28 | [[-1, 1, Conv, [512, 1, 1]], 29 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 30 | [[-1, 5], 1, Concat, [1]], # cat backbone P4 31 | [-1, 3, C3, [512, False]], # 12 32 | 33 | [-1, 1, Conv, [256, 1, 1]], 34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 35 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 36 | [-1, 3, C3, [256, False]], # 16 (P3/8-small) 37 | 38 | [-1, 1, Conv, [256, 3, 2]], 39 | [[-1, 13], 1, Concat, [1]], # cat head P4 40 | [-1, 3, C3, [512, False]], # 19 (P4/16-medium) 41 | 42 | [-1, 1, Conv, [512, 3, 2]], 43 | [[-1, 9], 1, Concat, [1]], # cat head P5 44 | [-1, 3, C3, [1024, False]], # 22 (P5/32-large) 45 | 46 | [[16, 19, 22], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 47 | ] 48 | -------------------------------------------------------------------------------- /models/yolov5l6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [6,7, 9,11, 13,16] # P3/8 9 | - [18,23, 26,33, 37,47] # P4/16 10 | - [54,67, 77,104, 112,154] # P5/32 11 | - [174,238, 258,355, 445,568] # P6/64 12 | 13 | # YOLOv5 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [ [ -1, 1, StemBlock, [ 64, 3, 2 ] ], # 0-P1/2 17 | [ -1, 3, C3, [ 128 ] ], 18 | [ -1, 1, Conv, [ 256, 3, 2 ] ], # 2-P3/8 19 | [ -1, 9, C3, [ 256 ] ], 20 | [ -1, 1, Conv, [ 512, 3, 2 ] ], # 4-P4/16 21 | [ -1, 9, C3, [ 512 ] ], 22 | [ -1, 1, Conv, [ 768, 3, 2 ] ], # 6-P5/32 23 | [ -1, 3, C3, [ 768 ] ], 24 | [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 8-P6/64 25 | [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ], 26 | [ -1, 3, C3, [ 1024, False ] ], # 10 27 | ] 28 | 29 | # YOLOv5 head 30 | head: 31 | [ [ -1, 1, Conv, [ 768, 1, 1 ] ], 32 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 33 | [ [ -1, 7 ], 1, Concat, [ 1 ] ], # cat backbone P5 34 | [ -1, 3, C3, [ 768, False ] ], # 14 35 | 36 | [ -1, 1, Conv, [ 512, 1, 1 ] ], 37 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 38 | [ [ -1, 5 ], 1, Concat, [ 1 ] ], # cat backbone P4 39 | [ -1, 3, C3, [ 512, False ] ], # 18 40 | 41 | [ -1, 1, Conv, [ 256, 1, 1 ] ], 42 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 43 | [ [ -1, 3 ], 1, Concat, [ 1 ] ], # cat backbone P3 44 | [ -1, 3, C3, [ 256, False ] ], # 22 (P3/8-small) 45 | 46 | [ -1, 1, Conv, [ 256, 3, 2 ] ], 47 | [ [ -1, 19 ], 1, Concat, [ 1 ] ], # cat head P4 48 | [ -1, 3, C3, [ 512, False ] ], # 25 (P4/16-medium) 49 | 50 | [ -1, 1, Conv, [ 512, 3, 2 ] ], 51 | [ [ -1, 15 ], 1, Concat, [ 1 ] ], # cat head P5 52 | [ -1, 3, C3, [ 768, False ] ], # 28 (P5/32-large) 53 | 54 | [ -1, 1, Conv, [ 768, 3, 2 ] ], 55 | [ [ -1, 11 ], 1, Concat, [ 1 ] ], # cat head P6 56 | [ -1, 3, C3, [ 1024, False ] ], # 31 (P6/64-xlarge) 57 | 58 | [ [ 22, 25, 28, 31 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6) 59 | ] 60 | 61 | -------------------------------------------------------------------------------- /models/yolov5m.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 0.67 # model depth multiple 4 | width_multiple: 0.75 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [4,5, 8,10, 13,16] # P3/8 9 | - [23,29, 43,55, 73,105] # P4/16 10 | - [146,217, 231,300, 335,433] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2 16 | [-1, 3, C3, [128]], 17 | [-1, 1, Conv, [256, 3, 2]], # 2-P3/8 18 | [-1, 9, C3, [256]], 19 | [-1, 1, Conv, [512, 3, 2]], # 4-P4/16 20 | [-1, 9, C3, [512]], 21 | [-1, 1, Conv, [1024, 3, 2]], # 6-P5/32 22 | [-1, 1, SPP, [1024, [3,5,7]]], 23 | [-1, 3, C3, [1024, False]], # 8 24 | ] 25 | 26 | # YOLOv5 head 27 | head: 28 | [[-1, 1, Conv, [512, 1, 1]], 29 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 30 | [[-1, 5], 1, Concat, [1]], # cat backbone P4 31 | [-1, 3, C3, [512, False]], # 12 32 | 33 | [-1, 1, Conv, [256, 1, 1]], 34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 35 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 36 | [-1, 3, C3, [256, False]], # 16 (P3/8-small) 37 | 38 | [-1, 1, Conv, [256, 3, 2]], 39 | [[-1, 13], 1, Concat, [1]], # cat head P4 40 | [-1, 3, C3, [512, False]], # 19 (P4/16-medium) 41 | 42 | [-1, 1, Conv, [512, 3, 2]], 43 | [[-1, 9], 1, Concat, [1]], # cat head P5 44 | [-1, 3, C3, [1024, False]], # 22 (P5/32-large) 45 | 46 | [[16, 19, 22], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 47 | ] 48 | -------------------------------------------------------------------------------- /models/yolov5m6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 0.67 # model depth multiple 4 | width_multiple: 0.75 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [6,7, 9,11, 13,16] # P3/8 9 | - [18,23, 26,33, 37,47] # P4/16 10 | - [54,67, 77,104, 112,154] # P5/32 11 | - [174,238, 258,355, 445,568] # P6/64 12 | 13 | # YOLOv5 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [ [ -1, 1, StemBlock, [ 64, 3, 2 ] ], # 0-P1/2 17 | [ -1, 3, C3, [ 128 ] ], 18 | [ -1, 1, Conv, [ 256, 3, 2 ] ], # 2-P3/8 19 | [ -1, 9, C3, [ 256 ] ], 20 | [ -1, 1, Conv, [ 512, 3, 2 ] ], # 4-P4/16 21 | [ -1, 9, C3, [ 512 ] ], 22 | [ -1, 1, Conv, [ 768, 3, 2 ] ], # 6-P5/32 23 | [ -1, 3, C3, [ 768 ] ], 24 | [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 8-P6/64 25 | [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ], 26 | [ -1, 3, C3, [ 1024, False ] ], # 10 27 | ] 28 | 29 | # YOLOv5 head 30 | head: 31 | [ [ -1, 1, Conv, [ 768, 1, 1 ] ], 32 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 33 | [ [ -1, 7 ], 1, Concat, [ 1 ] ], # cat backbone P5 34 | [ -1, 3, C3, [ 768, False ] ], # 14 35 | 36 | [ -1, 1, Conv, [ 512, 1, 1 ] ], 37 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 38 | [ [ -1, 5 ], 1, Concat, [ 1 ] ], # cat backbone P4 39 | [ -1, 3, C3, [ 512, False ] ], # 18 40 | 41 | [ -1, 1, Conv, [ 256, 1, 1 ] ], 42 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 43 | [ [ -1, 3 ], 1, Concat, [ 1 ] ], # cat backbone P3 44 | [ -1, 3, C3, [ 256, False ] ], # 22 (P3/8-small) 45 | 46 | [ -1, 1, Conv, [ 256, 3, 2 ] ], 47 | [ [ -1, 19 ], 1, Concat, [ 1 ] ], # cat head P4 48 | [ -1, 3, C3, [ 512, False ] ], # 25 (P4/16-medium) 49 | 50 | [ -1, 1, Conv, [ 512, 3, 2 ] ], 51 | [ [ -1, 15 ], 1, Concat, [ 1 ] ], # cat head P5 52 | [ -1, 3, C3, [ 768, False ] ], # 28 (P5/32-large) 53 | 54 | [ -1, 1, Conv, [ 768, 3, 2 ] ], 55 | [ [ -1, 11 ], 1, Concat, [ 1 ] ], # cat head P6 56 | [ -1, 3, C3, [ 1024, False ] ], # 31 (P6/64-xlarge) 57 | 58 | [ [ 22, 25, 28, 31 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6) 59 | ] 60 | 61 | -------------------------------------------------------------------------------- /models/yolov5n.yaml: -------------------------------------------------------------------------------- 1 | # Parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.25 # layer channel multiple 5 | anchors: 6 | - [10,13, 16,30, 33,23] # P3/8 7 | - [30,61, 62,45, 59,119] # P4/16 8 | - [116,90, 156,198, 373,326] # P5/32 9 | 10 | # YOLOv5 backbone 11 | backbone: 12 | # [from, number, module, args] 13 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 14 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 15 | [-1, 3, C3, [128]], 16 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 17 | [-1, 6, C3, [256]], 18 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 19 | [-1, 9, C3, [512]], 20 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 21 | [-1, 3, C3, [1024]], 22 | [-1, 1, SPPF, [1024, 5]], # 9 23 | ] 24 | 25 | # YOLOv5 v6.0 head 26 | head: 27 | [[-1, 1, Conv, [512, 1, 1]], 28 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 29 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 30 | [-1, 3, C3, [512, False]], # 13 31 | 32 | [-1, 1, Conv, [256, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 35 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 36 | 37 | [-1, 1, Conv, [256, 3, 2]], 38 | [[-1, 14], 1, Concat, [1]], # cat head P4 39 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 40 | 41 | [-1, 1, Conv, [512, 3, 2]], 42 | [[-1, 10], 1, Concat, [1]], # cat head P5 43 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 44 | 45 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 46 | ] 47 | -------------------------------------------------------------------------------- /models/yolov5n6.yaml: -------------------------------------------------------------------------------- 1 | # Parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.25 # layer channel multiple 5 | anchors: 6 | - [10,13, 16,30, 33,23] # P3/8 7 | - [30,61, 62,45, 59,119] # P4/16 8 | - [116,90, 156,198, 373,326] # P5/32 9 | 10 | # YOLOv5 backbone 11 | backbone: 12 | # [from, number, module, args] 13 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 14 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 15 | [-1, 3, C3, [128]], 16 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 17 | [-1, 6, C3, [256]], 18 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 19 | [-1, 9, C3, [512]], 20 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 21 | [-1, 3, C3, [1024]], 22 | [-1, 1, SPPF, [1024, 5]], # 9 23 | ] 24 | 25 | # YOLOv5 v6.0 head 26 | head: 27 | [[-1, 1, Conv, [512, 1, 1]], 28 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 29 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 30 | [-1, 3, C3, [512, False]], # 13 31 | 32 | [-1, 1, Conv, [256, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 35 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 36 | 37 | [-1, 1, Conv, [256, 3, 2]], 38 | [[-1, 14], 1, Concat, [1]], # cat head P4 39 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 40 | 41 | [-1, 1, Conv, [512, 3, 2]], 42 | [[-1, 10], 1, Concat, [1]], # cat head P5 43 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 44 | 45 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 46 | ] 47 | -------------------------------------------------------------------------------- /models/yolov5s.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.5 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [4,5, 8,10, 13,16] # P3/8 9 | - [23,29, 43,55, 73,105] # P4/16 10 | - [146,217, 231,300, 335,433] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2 16 | [-1, 3, C3, [128]], 17 | [-1, 1, Conv, [256, 3, 2]], # 2-P3/8 18 | [-1, 9, C3, [256]], 19 | [-1, 1, Conv, [512, 3, 2]], # 4-P4/16 20 | [-1, 9, C3, [512]], 21 | [-1, 1, Conv, [1024, 3, 2]], # 6-P5/32 22 | [-1, 1, SPP, [1024, [3,5,7]]], 23 | [-1, 3, C3, [1024, False]], # 8 24 | ] 25 | 26 | # YOLOv5 head 27 | head: 28 | [[-1, 1, Conv, [512, 1, 1]], 29 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 30 | [[-1, 5], 1, Concat, [1]], # cat backbone P4 31 | [-1, 3, C3, [512, False]], # 12 32 | 33 | [-1, 1, Conv, [256, 1, 1]], 34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 35 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 36 | [-1, 3, C3, [256, False]], # 16 (P3/8-small) 37 | 38 | [-1, 1, Conv, [256, 3, 2]], 39 | [[-1, 13], 1, Concat, [1]], # cat head P4 40 | [-1, 3, C3, [512, False]], # 19 (P4/16-medium) 41 | 42 | [-1, 1, Conv, [512, 3, 2]], 43 | [[-1, 9], 1, Concat, [1]], # cat head P5 44 | [-1, 3, C3, [1024, False]], # 22 (P5/32-large) 45 | 46 | [[16, 19, 22], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 47 | ] 48 | -------------------------------------------------------------------------------- /models/yolov5s6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [6,7, 9,11, 13,16] # P3/8 9 | - [18,23, 26,33, 37,47] # P4/16 10 | - [54,67, 77,104, 112,154] # P5/32 11 | - [174,238, 258,355, 445,568] # P6/64 12 | 13 | # YOLOv5 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [ [ -1, 1, StemBlock, [ 64, 3, 2 ] ], # 0-P1/2 17 | [ -1, 3, C3, [ 128 ] ], 18 | [ -1, 1, Conv, [ 256, 3, 2 ] ], # 2-P3/8 19 | [ -1, 9, C3, [ 256 ] ], 20 | [ -1, 1, Conv, [ 512, 3, 2 ] ], # 4-P4/16 21 | [ -1, 9, C3, [ 512 ] ], 22 | [ -1, 1, Conv, [ 768, 3, 2 ] ], # 6-P5/32 23 | [ -1, 3, C3, [ 768 ] ], 24 | [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 8-P6/64 25 | [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ], 26 | [ -1, 3, C3, [ 1024, False ] ], # 10 27 | ] 28 | 29 | # YOLOv5 head 30 | head: 31 | [ [ -1, 1, Conv, [ 768, 1, 1 ] ], 32 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 33 | [ [ -1, 7 ], 1, Concat, [ 1 ] ], # cat backbone P5 34 | [ -1, 3, C3, [ 768, False ] ], # 14 35 | 36 | [ -1, 1, Conv, [ 512, 1, 1 ] ], 37 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 38 | [ [ -1, 5 ], 1, Concat, [ 1 ] ], # cat backbone P4 39 | [ -1, 3, C3, [ 512, False ] ], # 18 40 | 41 | [ -1, 1, Conv, [ 256, 1, 1 ] ], 42 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 43 | [ [ -1, 3 ], 1, Concat, [ 1 ] ], # cat backbone P3 44 | [ -1, 3, C3, [ 256, False ] ], # 22 (P3/8-small) 45 | 46 | [ -1, 1, Conv, [ 256, 3, 2 ] ], 47 | [ [ -1, 19 ], 1, Concat, [ 1 ] ], # cat head P4 48 | [ -1, 3, C3, [ 512, False ] ], # 25 (P4/16-medium) 49 | 50 | [ -1, 1, Conv, [ 512, 3, 2 ] ], 51 | [ [ -1, 15 ], 1, Concat, [ 1 ] ], # cat head P5 52 | [ -1, 3, C3, [ 768, False ] ], # 28 (P5/32-large) 53 | 54 | [ -1, 1, Conv, [ 768, 3, 2 ] ], 55 | [ [ -1, 11 ], 1, Concat, [ 1 ] ], # cat head P6 56 | [ -1, 3, C3, [ 1024, False ] ], # 31 (P6/64-xlarge) 57 | 58 | [ [ 22, 25, 28, 31 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6) 59 | ] 60 | 61 | -------------------------------------------------------------------------------- /onnx/export_pt.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import time 4 | import torch 5 | import torch.nn as nn 6 | import models 7 | from models.experimental import attempt_load 8 | from utils.activations import Hardswish, SiLU 9 | from utils.general import set_logging, check_img_size 10 | import onnx 11 | 12 | sys.path.append('../') 13 | 14 | if __name__ == '__main__': 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--weights', type=str, default='../weights/plate_rec.pt', help='weights path') 17 | parser.add_argument('--img_size', nargs='+', type=int, default=[640, 640], help='image size') 18 | parser.add_argument('--batch_size', type=int, default=1, help='batch size') 19 | parser.add_argument('--dynamic', action='store_true', default=False, help='enable dynamic axis in onnx model') 20 | parser.add_argument('--onnx2pb', action='store_true', default=False, help='export onnx to pb') 21 | parser.add_argument('--onnx_infer', action='store_true', default=True, help='onnx infer test') 22 | # =======================TensorRT================================= 23 | parser.add_argument('--onnx2trt', action='store_true', default=False, help='export onnx to tensorrt') 24 | parser.add_argument('--fp16_trt', action='store_true', default=False, help='fp16 infer') 25 | # ================================================================ 26 | opt = parser.parse_args() 27 | opt.img_size *= 2 if len(opt.img_size) == 1 else 1 28 | print(opt) 29 | set_logging() 30 | t = time.time() 31 | 32 | # Load PyTorch model 33 | model = attempt_load(opt.weights, map_location=torch.device('cpu')) 34 | delattr(model.model[-1], 'anchor_grid') 35 | model.model[-1].anchor_grid = [torch.zeros(1)] * 3 36 | model.model[-1].export_cat = True 37 | model.eval() 38 | labels = model.names 39 | 40 | # Checks 41 | gs = int(max(model.stride)) 42 | opt.img_size = [check_img_size(x, gs) for x in opt.img_size] 43 | 44 | # Input 45 | img = torch.zeros(opt.batch_size, 3, *opt.img_size) 46 | 47 | # Update model 48 | for k, m in model.named_modules(): 49 | m._non_persistent_buffers_set = set() 50 | if isinstance(m, models.common.Conv): 51 | if isinstance(m.act, nn.Hardswish): 52 | m.act = Hardswish() 53 | elif isinstance(m.act, nn.SiLU): 54 | m.act = SiLU() 55 | if isinstance(m, models.common.ShuffleV2Block): 56 | for i in range(len(m.branch1)): 57 | if isinstance(m.branch1[i], nn.SiLU): 58 | m.branch1[i] = SiLU() 59 | for i in range(len(m.branch2)): 60 | if isinstance(m.branch2[i], nn.SiLU): 61 | m.branch2[i] = SiLU() 62 | if isinstance(m, models.common.BlazeBlock): 63 | if isinstance(m.relu, nn.SiLU): 64 | m.relu = SiLU() 65 | if isinstance(m, models.common.DoubleBlazeBlock): 66 | if isinstance(m.relu, nn.SiLU): 67 | m.relu = SiLU() 68 | for i in range(len(m.branch1)): 69 | if isinstance(m.branch1[i], nn.SiLU): 70 | m.branch1[i] = SiLU() 71 | y = model(img) 72 | 73 | # ONNX export 74 | print('\nStarting ONNX export with onnx %s...' % onnx.__version__) 75 | f = opt.weights.replace('.pt', '.onnx') # filename 76 | model.fuse() # only for ONNX 77 | input_names = ['input'] 78 | output_names = ['output'] 79 | torch.onnx.export(model, img, f, verbose=False, opset_version=12, 80 | input_names=input_names, 81 | output_names=output_names, 82 | dynamic_axes={'input': {0: 'batch'}, 83 | 'output': {0: 'batch'} 84 | } if opt.dynamic else None) 85 | 86 | # Checks 87 | onnx_model = onnx.load(f) 88 | onnx.checker.check_model(onnx_model) 89 | print('ONNX export success, saved as %s' % f) 90 | print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t)) 91 | 92 | # onnx infer 93 | if opt.onnx_infer: 94 | import onnxruntime 95 | import numpy as np 96 | 97 | providers = ['CPUExecutionProvider'] 98 | session = onnxruntime.InferenceSession(f, providers=providers) 99 | im = img.cpu().numpy().astype(np.float32) # torch to numpy 100 | y_onnx = session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: im})[0] 101 | print("pred's shape is ", y_onnx.shape) 102 | print("max(|torch_pred - onnx_pred|) =", abs(y.cpu().numpy() - y_onnx).max()) 103 | 104 | # TensorRT export 105 | if opt.onnx2trt: 106 | from torch2trt.trt_model import ONNX_to_TRT 107 | 108 | print('\nStarting TensorRT...') 109 | ONNX_to_TRT(onnx_model_path=f, trt_engine_path=f.replace('.onnx', '.trt'), fp16_mode=opt.fp16_trt) 110 | 111 | # PB export 112 | if opt.onnx2pb: 113 | print('download the newest onnx_tf by https://github.com/onnx/onnx-tensorflow/tree/master/onnx_tf') 114 | from onnx_tf.backend import prepare 115 | import tensorflow as tf 116 | 117 | outpb = f.replace('.onnx', '.pb') 118 | tf_rep = prepare(onnx_model, strict=False) 119 | tf_rep.export_graph(outpb) 120 | 121 | out_onnx = tf_rep.run(img) 122 | 123 | with tf.Graph().as_default(): 124 | graph_def = tf.GraphDef() 125 | with open(outpb, "rb") as f: 126 | graph_def.ParseFromString(f.read()) 127 | tf.import_graph_def(graph_def, name="") 128 | with tf.Session() as sess: 129 | init = tf.global_variables_initializer() 130 | input_x = sess.graph.get_tensor_by_name(input_names[0] + ':0') 131 | outputs = [] 132 | for i in output_names: 133 | outputs.append(sess.graph.get_tensor_by_name(i + ':0')) 134 | out_pb = sess.run(outputs, feed_dict={input_x: img}) 135 | 136 | print(f'out_pytorch {y}') 137 | print(f'out_onnx {out_onnx}') 138 | print(f'out_pb {out_pb}') 139 | -------------------------------------------------------------------------------- /onnx/export_pth.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from Net.colorNet import myNet_ocr_color 3 | from alphabets import plate_chr 4 | import torch 5 | import onnx 6 | 7 | if __name__ == "__main__": 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('--weights', type=str, default='../weights/plate_rec_ocr.pth', help='weights path') 10 | parser.add_argument('--save_path', type=str, default='../weights/plate_rec_ocr.onnx', help='onnx save path') 11 | parser.add_argument('--img_size', nargs='+', type=int, default=[48, 168], help='image size') 12 | parser.add_argument('--batch_size', type=int, default=1, help='batch size') 13 | parser.add_argument('--dynamic', action='store_true', default=False, help='enable dynamic axis in onnx model') 14 | parser.add_argument('--simplify', action='store_true', default=False, help='simplified onnx') 15 | 16 | opt = parser.parse_args() 17 | print(opt) 18 | checkpoint = torch.load(opt.weights) 19 | cfg = checkpoint['cfg'] 20 | model = myNet_ocr_color(num_classes=len(plate_chr), cfg=cfg, export=True, color_num=5) 21 | model.load_state_dict(checkpoint['state_dict'],False) 22 | model.eval() 23 | 24 | input = torch.randn(opt.batch_size, 3, 48, 168) 25 | onnx_file_name = opt.save_path 26 | 27 | torch.onnx.export(model, input, onnx_file_name, 28 | input_names=["images"], output_names=["output_1", "output_2"], 29 | verbose=False, 30 | opset_version=11, 31 | dynamic_axes={'images': {0: 'batch'}, 32 | 'output': {0: 'batch'} 33 | } if opt.dynamic else None) 34 | print(f"convert completed,save to {opt.save_path}") 35 | if opt.simplify: 36 | from onnxsim import simplify 37 | 38 | print(f"begin simplify ....") 39 | input_shapes = {"images": list(input.shape)} 40 | onnx_model = onnx.load(onnx_file_name) 41 | model_simp, check = simplify(onnx_model, test_input_shapes=input_shapes) 42 | onnx.save(model_simp, onnx_file_name) 43 | print(f"simplify completed,save to {opt.save_path}") 44 | -------------------------------------------------------------------------------- /onnx/onnx_detect.py: -------------------------------------------------------------------------------- 1 | import onnxruntime 2 | import numpy as np 3 | import cv2 4 | import copy 5 | import os 6 | import argparse 7 | from PIL import Image, ImageDraw, ImageFont 8 | import time 9 | 10 | plate_color_list = ['黑色', '蓝色', '绿色', '白色', '黄色'] 11 | plateName = r"#京沪津渝冀晋蒙辽吉黑苏浙皖闽赣鲁豫鄂湘粤桂琼川贵云藏陕甘青宁新学警港澳挂使领民航危0123456789ABCDEFGHJKLMNPQRSTUVWXYZ险品" 12 | mean_value, std_value = (0.588, 0.193) # 识别模型均值标准差 13 | 14 | def decodePlate(preds): # 识别后处理 15 | pre = 0 16 | newPreds = [] 17 | for i in range(len(preds)): 18 | if preds[i] != 0 and preds[i] != pre: 19 | newPreds.append(preds[i]) 20 | pre = preds[i] 21 | plate = "" 22 | for i in newPreds: 23 | plate += plateName[int(i)] 24 | return plate 25 | 26 | 27 | def rec_pre_precessing(img, size=(48, 168)): 28 | img = cv2.resize(img, (168, 48)) 29 | img = img.astype(np.float32) 30 | img = (img / 255 - mean_value) / std_value 31 | img = img.transpose(2, 0, 1) 32 | img = img.reshape(1, *img.shape) 33 | return img 34 | 35 | 36 | def get_plate_result(img, session_rec): 37 | img = rec_pre_precessing(img) 38 | y_onnx_plate, y_onnx_color = session_rec.run([session_rec.get_outputs()[0].name, session_rec.get_outputs()[1].name], 39 | {session_rec.get_inputs()[0].name: img}) 40 | index = np.argmax(y_onnx_plate, axis=-1) 41 | index_color = np.argmax(y_onnx_color) 42 | plate_color = plate_color_list[index_color] 43 | plate_no = decodePlate(index[0]) 44 | return plate_no, plate_color 45 | 46 | 47 | def allFilePath(rootPath, allFIleList): 48 | fileList = os.listdir(rootPath) 49 | for temp in fileList: 50 | if os.path.isfile(os.path.join(rootPath, temp)): 51 | allFIleList.append(os.path.join(rootPath, temp)) 52 | else: 53 | allFilePath(os.path.join(rootPath, temp), allFIleList) 54 | 55 | 56 | def get_split_merge(img): # 双层车牌 57 | h, w, c = img.shape 58 | img_upper = img[0:int(5 / 12 * h), :] 59 | img_lower = img[int(1 / 3 * h):, :] 60 | img_upper = cv2.resize(img_upper, (img_lower.shape[1], img_lower.shape[0])) 61 | new_img = np.hstack((img_upper, img_lower)) 62 | return new_img 63 | 64 | 65 | def order_points(pts): 66 | rect = np.zeros((4, 2), dtype="float32") 67 | s = pts.sum(axis=1) 68 | rect[0] = pts[np.argmin(s)] 69 | rect[2] = pts[np.argmax(s)] 70 | diff = np.diff(pts, axis=1) 71 | rect[1] = pts[np.argmin(diff)] 72 | rect[3] = pts[np.argmax(diff)] 73 | return rect 74 | 75 | 76 | def four_point_transform(image, pts): # 透视变换 77 | rect = order_points(pts) 78 | (tl, tr, br, bl) = rect 79 | widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) 80 | widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) 81 | maxWidth = max(int(widthA), int(widthB)) 82 | heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) 83 | heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) 84 | maxHeight = max(int(heightA), int(heightB)) 85 | dst = np.array([ 86 | [0, 0], 87 | [maxWidth - 1, 0], 88 | [maxWidth - 1, maxHeight - 1], 89 | [0, maxHeight - 1]], dtype="float32") 90 | M = cv2.getPerspectiveTransform(rect, dst) 91 | warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) 92 | 93 | return warped 94 | 95 | 96 | def my_letter_box(img, size=(640, 640)): # 97 | h, w, c = img.shape 98 | r = min(size[0] / h, size[1] / w) 99 | new_h, new_w = int(h * r), int(w * r) 100 | top = int((size[0] - new_h) / 2) 101 | left = int((size[1] - new_w) / 2) 102 | 103 | bottom = size[0] - new_h - top 104 | right = size[1] - new_w - left 105 | img_resize = cv2.resize(img, (new_w, new_h)) 106 | img = cv2.copyMakeBorder(img_resize, top, bottom, left, right, borderType=cv2.BORDER_CONSTANT, 107 | value=(114, 114, 114)) 108 | return img, r, left, top 109 | 110 | 111 | def xywh2xyxy(boxes): 112 | xywh = copy.deepcopy(boxes) 113 | xywh[:, 0] = boxes[:, 0] - boxes[:, 2] / 2 114 | xywh[:, 1] = boxes[:, 1] - boxes[:, 3] / 2 115 | xywh[:, 2] = boxes[:, 0] + boxes[:, 2] / 2 116 | xywh[:, 3] = boxes[:, 1] + boxes[:, 3] / 2 117 | return xywh 118 | 119 | 120 | def my_nms(boxes, iou_thresh): 121 | index = np.argsort(boxes[:, 4])[::-1] 122 | keep = [] 123 | while index.size > 0: 124 | i = index[0] 125 | keep.append(i) 126 | x1 = np.maximum(boxes[i, 0], boxes[index[1:], 0]) 127 | y1 = np.maximum(boxes[i, 1], boxes[index[1:], 1]) 128 | x2 = np.minimum(boxes[i, 2], boxes[index[1:], 2]) 129 | y2 = np.minimum(boxes[i, 3], boxes[index[1:], 3]) 130 | 131 | w = np.maximum(0, x2 - x1) 132 | h = np.maximum(0, y2 - y1) 133 | 134 | inter_area = w * h 135 | union_area = (boxes[i, 2] - boxes[i, 0]) * (boxes[i, 3] - boxes[i, 1]) + ( 136 | boxes[index[1:], 2] - boxes[index[1:], 0]) * (boxes[index[1:], 3] - boxes[index[1:], 1]) 137 | iou = inter_area / (union_area - inter_area) 138 | idx = np.where(iou <= iou_thresh)[0] 139 | index = index[idx + 1] 140 | return keep 141 | 142 | 143 | def restore_box(boxes, r, left, top): 144 | boxes[:, [0, 2, 5, 7, 9, 11]] -= left 145 | boxes[:, [1, 3, 6, 8, 10, 12]] -= top 146 | 147 | boxes[:, [0, 2, 5, 7, 9, 11]] /= r 148 | boxes[:, [1, 3, 6, 8, 10, 12]] /= r 149 | return boxes 150 | 151 | 152 | def detect_pre_precessing(img, img_size): 153 | img, r, left, top = my_letter_box(img, img_size) 154 | # cv2.imwrite("1.jpg",img) 155 | img = img[:, :, ::-1].transpose(2, 0, 1).copy().astype(np.float32) 156 | img = img / 255 157 | img = img.reshape(1, *img.shape) 158 | return img, r, left, top 159 | 160 | 161 | def post_precessing(dets, r, left, top, conf_thresh=0.3, iou_thresh=0.5): # 检测后处理 162 | choice = dets[:, :, 4] > conf_thresh 163 | dets = dets[choice] 164 | dets[:, 13:15] *= dets[:, 4:5] 165 | box = dets[:, :4] 166 | boxes = xywh2xyxy(box) 167 | score = np.max(dets[:, 13:15], axis=-1, keepdims=True) 168 | index = np.argmax(dets[:, 13:15], axis=-1).reshape(-1, 1) 169 | output = np.concatenate((boxes, score, dets[:, 5:13], index), axis=1) 170 | reserve_ = my_nms(output, iou_thresh) 171 | output = output[reserve_] 172 | output = restore_box(output, r, left, top) 173 | return output 174 | 175 | 176 | def rec_plate(outputs, img0, session_rec): # 识别车牌 177 | dict_list = [] 178 | for output in outputs: 179 | result_dict = {} 180 | rect = output[:4].tolist() 181 | land_marks = output[5:13].reshape(4, 2) 182 | roi_img = four_point_transform(img0, land_marks) 183 | label = int(output[-1]) 184 | score = output[4] 185 | if label == 1: # 代表是双层车牌 186 | roi_img = get_split_merge(roi_img) 187 | plate_no, plate_color = get_plate_result(roi_img, session_rec) 188 | result_dict['rect'] = rect 189 | result_dict['landmarks'] = land_marks.tolist() 190 | result_dict['plate_no'] = plate_no 191 | result_dict['roi_height'] = roi_img.shape[0] 192 | result_dict['plate_color'] = plate_color 193 | dict_list.append(result_dict) 194 | return dict_list 195 | 196 | 197 | def cv2ImgAddText(img, text, left, top, textColor=(0, 255, 0), textSize=20): # 将识别结果画在图上 198 | if isinstance(img, np.ndarray): 199 | img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) 200 | draw = ImageDraw.Draw(img) 201 | fontText = ImageFont.truetype( 202 | "../fonts/AlibabaPuHuiTi-3-65-Medium.ttf", textSize, encoding="utf-8") 203 | draw.text((left, top), text, textColor, font=fontText) 204 | return cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) 205 | 206 | 207 | def draw_result(orgimg, dict_list): 208 | result_str = "" 209 | for result in dict_list: 210 | rect_area = result['rect'] 211 | x, y, w, h = rect_area[0], rect_area[1], rect_area[2] - rect_area[0], rect_area[3] - rect_area[1] 212 | padding_w = 0.05 * w 213 | padding_h = 0.11 * h 214 | rect_area[0] = max(0, int(x - padding_w)) 215 | rect_area[1] = min(orgimg.shape[1], int(y - padding_h)) 216 | rect_area[2] = max(0, int(rect_area[2] + padding_w)) 217 | rect_area[3] = min(orgimg.shape[0], int(rect_area[3] + padding_h)) 218 | height_area = result['roi_height'] 219 | landmarks = result['landmarks'] 220 | result = result['plate_no'] 221 | result_str += result + " " 222 | for i in range(4): # 关键点 223 | cv2.circle(orgimg, (int(landmarks[i][0]), int(landmarks[i][1])), 5, clors[i], -1) 224 | cv2.rectangle(orgimg, (rect_area[0], rect_area[1]), (rect_area[2], rect_area[3]), (255, 255, 0), 2) # 画框 225 | if len(result) >= 1: 226 | orgimg = cv2ImgAddText(orgimg, result, rect_area[0] - height_area, rect_area[1] - height_area - 10, 227 | (0, 255, 0), height_area) 228 | print(result_str) 229 | return orgimg 230 | 231 | 232 | if __name__ == "__main__": 233 | begin = time.time() 234 | parser = argparse.ArgumentParser() 235 | parser.add_argument('--detect_model', type=str, default='../weights/plate_rec.onnx', 236 | help='model.pt path(s)') # 检测模型 237 | parser.add_argument('--rec_model', type=str, default='../weights/plate_rec_color.onnx', 238 | help='model.pt path(s)') # 识别模型 239 | parser.add_argument('--image_path', type=str, default='../imgs', help='source') 240 | parser.add_argument('--img_size', type=int, default=640, help='inference size (pixels)') 241 | parser.add_argument('--output', type=str, default='./result_onnx', help='source') 242 | opt = parser.parse_args() 243 | file_list = [] 244 | allFilePath(opt.image_path, file_list) 245 | providers = ['CPUExecutionProvider'] 246 | clors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255)] 247 | img_size = (opt.img_size, opt.img_size) 248 | session_detect = onnxruntime.InferenceSession(opt.detect_model, providers=providers) 249 | session_rec = onnxruntime.InferenceSession(opt.rec_model, providers=providers) 250 | if not os.path.exists(opt.output): 251 | os.mkdir(opt.output) 252 | save_path = opt.output 253 | count = 0 254 | for pic_ in file_list: 255 | count += 1 256 | print(count, pic_, end=" ") 257 | img = cv2.imread(pic_) 258 | img0 = copy.deepcopy(img) 259 | img, r, left, top = detect_pre_precessing(img, img_size) 260 | # print(img.shape) 261 | y_onnx = session_detect.run([session_detect.get_outputs()[0].name], {session_detect.get_inputs()[0].name: img})[ 262 | 0] 263 | outputs = post_precessing(y_onnx, r, left, top) 264 | result_list = rec_plate(outputs, img0, session_rec) 265 | ori_img = draw_result(img0, result_list) 266 | img_name = os.path.basename(pic_) 267 | save_img_path = os.path.join(save_path, img_name) 268 | cv2.imwrite(save_img_path, ori_img) 269 | print(f"总共耗时{time.time() - begin} s") 270 | -------------------------------------------------------------------------------- /onnx/openvino_infer.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from openvino.runtime import Core 4 | import os 5 | import time 6 | import copy 7 | from PIL import Image, ImageDraw, ImageFont 8 | import argparse 9 | 10 | 11 | def cv_imread(path): 12 | img = cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1) 13 | return img 14 | 15 | 16 | def allFilePath(rootPath, allFIleList): 17 | fileList = os.listdir(rootPath) 18 | for temp in fileList: 19 | if os.path.isfile(os.path.join(rootPath, temp)): 20 | allFIleList.append(os.path.join(rootPath, temp)) 21 | else: 22 | allFilePath(os.path.join(rootPath, temp), allFIleList) 23 | 24 | 25 | mean_value, std_value = (0.588, 0.193) 26 | plateName = r"#京沪津渝冀晋蒙辽吉黑苏浙皖闽赣鲁豫鄂湘粤桂琼川贵云藏陕甘青宁新学警港澳挂使领民航危0123456789ABCDEFGHJKLMNPQRSTUVWXYZ险品" 27 | 28 | 29 | def rec_pre_precessing(img, size=(48, 168)): 30 | img = cv2.resize(img, (168, 48)) 31 | img = img.astype(np.float32) 32 | img = (img / 255 - mean_value) / std_value 33 | img = img.transpose(2, 0, 1) 34 | img = img.reshape(1, *img.shape) 35 | return img 36 | 37 | 38 | def decodePlate(preds): 39 | pre = 0 40 | newPreds = [] 41 | preds = preds.astype(np.int8)[0] 42 | for i in range(len(preds)): 43 | if preds[i] != 0 and preds[i] != pre: 44 | newPreds.append(preds[i]) 45 | pre = preds[i] 46 | plate = "" 47 | for i in newPreds: 48 | plate += plateName[int(i)] 49 | return plate 50 | 51 | 52 | def load_model(onnx_path): 53 | ie = Core() 54 | model_onnx = ie.read_model(model=onnx_path) 55 | compiled_model_onnx = ie.compile_model(model=model_onnx, device_name="CPU") 56 | output_layer_onnx = compiled_model_onnx.output(0) 57 | return compiled_model_onnx, output_layer_onnx 58 | 59 | 60 | def get_plate_result(img, rec_model, rec_output): 61 | img = rec_pre_precessing(img) 62 | res_onnx = rec_model([img])[rec_output] 63 | index = np.argmax(res_onnx, axis=-1) 64 | plate_no = decodePlate(index) 65 | return plate_no 66 | 67 | 68 | def get_split_merge(img): # 双层车牌进行分割后识别 69 | h, w, c = img.shape 70 | img_upper = img[0:int(5 / 12 * h), :] 71 | img_lower = img[int(1 / 3 * h):, :] 72 | img_upper = cv2.resize(img_upper, (img_lower.shape[1], img_lower.shape[0])) 73 | new_img = np.hstack((img_upper, img_lower)) 74 | return new_img 75 | 76 | 77 | def order_points(pts): 78 | rect = np.zeros((4, 2), dtype="float32") 79 | s = pts.sum(axis=1) 80 | rect[0] = pts[np.argmin(s)] 81 | rect[2] = pts[np.argmax(s)] 82 | diff = np.diff(pts, axis=1) 83 | rect[1] = pts[np.argmin(diff)] 84 | rect[3] = pts[np.argmax(diff)] 85 | return rect 86 | 87 | 88 | def four_point_transform(image, pts): 89 | rect = order_points(pts) 90 | (tl, tr, br, bl) = rect 91 | widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) 92 | widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) 93 | maxWidth = max(int(widthA), int(widthB)) 94 | heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) 95 | heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) 96 | maxHeight = max(int(heightA), int(heightB)) 97 | dst = np.array([ 98 | [0, 0], 99 | [maxWidth - 1, 0], 100 | [maxWidth - 1, maxHeight - 1], 101 | [0, maxHeight - 1]], dtype="float32") 102 | M = cv2.getPerspectiveTransform(rect, dst) 103 | warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) 104 | 105 | # return the warped image 106 | return warped 107 | 108 | 109 | def my_letter_box(img, size=(640, 640)): 110 | h, w, c = img.shape 111 | r = min(size[0] / h, size[1] / w) 112 | new_h, new_w = int(h * r), int(w * r) 113 | top = int((size[0] - new_h) / 2) 114 | left = int((size[1] - new_w) / 2) 115 | 116 | bottom = size[0] - new_h - top 117 | right = size[1] - new_w - left 118 | img_resize = cv2.resize(img, (new_w, new_h)) 119 | img = cv2.copyMakeBorder(img_resize, top, bottom, left, right, borderType=cv2.BORDER_CONSTANT, 120 | value=(114, 114, 114)) 121 | return img, r, left, top 122 | 123 | 124 | def xywh2xyxy(boxes): 125 | xywh = copy.deepcopy(boxes) 126 | xywh[:, 0] = boxes[:, 0] - boxes[:, 2] / 2 127 | xywh[:, 1] = boxes[:, 1] - boxes[:, 3] / 2 128 | xywh[:, 2] = boxes[:, 0] + boxes[:, 2] / 2 129 | xywh[:, 3] = boxes[:, 1] + boxes[:, 3] / 2 130 | return xywh 131 | 132 | 133 | def my_nms(boxes, iou_thresh): 134 | index = np.argsort(boxes[:, 4])[::-1] 135 | keep = [] 136 | while index.size > 0: 137 | i = index[0] 138 | keep.append(i) 139 | x1 = np.maximum(boxes[i, 0], boxes[index[1:], 0]) 140 | y1 = np.maximum(boxes[i, 1], boxes[index[1:], 1]) 141 | x2 = np.minimum(boxes[i, 2], boxes[index[1:], 2]) 142 | y2 = np.minimum(boxes[i, 3], boxes[index[1:], 3]) 143 | 144 | w = np.maximum(0, x2 - x1) 145 | h = np.maximum(0, y2 - y1) 146 | 147 | inter_area = w * h 148 | union_area = (boxes[i, 2] - boxes[i, 0]) * (boxes[i, 3] - boxes[i, 1]) + ( 149 | boxes[index[1:], 2] - boxes[index[1:], 0]) * (boxes[index[1:], 3] - boxes[index[1:], 1]) 150 | iou = inter_area / (union_area - inter_area) 151 | idx = np.where(iou <= iou_thresh)[0] 152 | index = index[idx + 1] 153 | return keep 154 | 155 | 156 | def restore_box(boxes, r, left, top): 157 | boxes[:, [0, 2, 5, 7, 9, 11]] -= left 158 | boxes[:, [1, 3, 6, 8, 10, 12]] -= top 159 | 160 | boxes[:, [0, 2, 5, 7, 9, 11]] /= r 161 | boxes[:, [1, 3, 6, 8, 10, 12]] /= r 162 | return boxes 163 | 164 | 165 | def detect_pre_precessing(img, img_size): 166 | img, r, left, top = my_letter_box(img, img_size) 167 | # cv2.imwrite("1.jpg",img) 168 | img = img[:, :, ::-1].transpose(2, 0, 1).copy().astype(np.float32) 169 | img = img / 255 170 | img = img.reshape(1, *img.shape) 171 | return img, r, left, top 172 | 173 | 174 | def post_precessing(dets, r, left, top, conf_thresh=0.3, iou_thresh=0.5): # 检测后处理 175 | choice = dets[:, :, 4] > conf_thresh 176 | dets = dets[choice] 177 | dets[:, 13:15] *= dets[:, 4:5] 178 | box = dets[:, :4] 179 | boxes = xywh2xyxy(box) 180 | score = np.max(dets[:, 13:15], axis=-1, keepdims=True) 181 | index = np.argmax(dets[:, 13:15], axis=-1).reshape(-1, 1) 182 | output = np.concatenate((boxes, score, dets[:, 5:13], index), axis=1) 183 | reserve_ = my_nms(output, iou_thresh) 184 | output = output[reserve_] 185 | output = restore_box(output, r, left, top) 186 | return output 187 | 188 | 189 | def rec_plate(outputs, img0, rec_model, rec_output): 190 | dict_list = [] 191 | for output in outputs: 192 | result_dict = {} 193 | rect = output[:4].tolist() 194 | land_marks = output[5:13].reshape(4, 2) 195 | roi_img = four_point_transform(img0, land_marks) 196 | label = int(output[-1]) 197 | if label == 1: # 代表是双层车牌 198 | roi_img = get_split_merge(roi_img) 199 | plate_no = get_plate_result(roi_img, rec_model, rec_output) # 得到车牌识别结果 200 | result_dict['rect'] = rect 201 | result_dict['landmarks'] = land_marks.tolist() 202 | result_dict['plate_no'] = plate_no 203 | result_dict['roi_height'] = roi_img.shape[0] 204 | dict_list.append(result_dict) 205 | return dict_list 206 | 207 | 208 | def cv2ImgAddText(img, text, left, top, textColor=(0, 255, 0), textSize=20): 209 | if isinstance(img, np.ndarray): 210 | img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) 211 | draw = ImageDraw.Draw(img) 212 | fontText = ImageFont.truetype( 213 | "../fonts/AlibabaPuHuiTi-3-65-Medium.ttf", textSize, encoding="utf-8") 214 | draw.text((left, top), text, textColor, font=fontText) 215 | return cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) 216 | 217 | 218 | def draw_result(orgimg, dict_list): 219 | result_str = "" 220 | for result in dict_list: 221 | rect_area = result['rect'] 222 | 223 | x, y, w, h = rect_area[0], rect_area[1], rect_area[2] - rect_area[0], rect_area[3] - rect_area[1] 224 | padding_w = 0.05 * w 225 | padding_h = 0.11 * h 226 | rect_area[0] = max(0, int(x - padding_w)) 227 | rect_area[1] = min(orgimg.shape[1], int(y - padding_h)) 228 | rect_area[2] = max(0, int(rect_area[2] + padding_w)) 229 | rect_area[3] = min(orgimg.shape[0], int(rect_area[3] + padding_h)) 230 | 231 | height_area = result['roi_height'] 232 | landmarks = result['landmarks'] 233 | result = result['plate_no'] 234 | result_str += result + " " 235 | if len(result) >= 6: 236 | cv2.rectangle(orgimg, (rect_area[0], rect_area[1]), (rect_area[2], rect_area[3]), (0, 0, 255), 2) # 画框 237 | orgimg = cv2ImgAddText(orgimg, result, rect_area[0] - height_area, rect_area[1] - height_area - 10, 238 | (0, 255, 0), height_area) 239 | return orgimg 240 | 241 | 242 | def get_second(capture): 243 | if capture.isOpened(): 244 | rate = capture.get(5) 245 | FrameNumber = capture.get(7) 246 | duration = FrameNumber / rate 247 | return int(rate), int(FrameNumber), int(duration) 248 | 249 | 250 | if __name__ == "__main__": 251 | parser = argparse.ArgumentParser() 252 | parser.add_argument('--detect_model', type=str, default='../weights/plate_rec.onnx', 253 | help='model.pt path(s)') # 检测模型 254 | parser.add_argument('--rec_model', type=str, default='../weights/plate_rec_ocr.onnx', help='model.pt path(s)') # 识别模型 255 | parser.add_argument('--image_path', type=str, default='../imgs', help='source') 256 | parser.add_argument('--img_size', type=int, default=640, help='inference size (pixels)') 257 | parser.add_argument('--output', type=str, default='./result', help='source') 258 | opt = parser.parse_args() 259 | file_list = [] 260 | file_folder = opt.image_path 261 | allFilePath(file_folder, file_list) 262 | rec_onnx_path = opt.rec_model 263 | detect_onnx_path = opt.detect_model 264 | rec_model, rec_output = load_model(rec_onnx_path) 265 | detect_model, detect_output = load_model(detect_onnx_path) 266 | count = 0 267 | img_size = (opt.img_size, opt.img_size) 268 | begin = time.time() 269 | save_path = opt.output 270 | if not os.path.exists(save_path): 271 | os.mkdir(save_path) 272 | for pic_ in file_list: 273 | 274 | count += 1 275 | print(count, pic_, end=" ") 276 | img = cv2.imread(pic_) 277 | time_b = time.time() 278 | if img.shape[-1] == 4: 279 | img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) 280 | img0 = copy.deepcopy(img) 281 | img, r, left, top = detect_pre_precessing(img, img_size) # 检测前处理 282 | # print(img.shape) 283 | det_result = detect_model([img])[detect_output] 284 | outputs = post_precessing(det_result, r, left, top) # 检测后处理 285 | time_1 = time.time() 286 | result_list = rec_plate(outputs, img0, rec_model, rec_output) 287 | time_e = time.time() 288 | print(f'耗时 {time_e - time_b} s') 289 | ori_img = draw_result(img0, result_list) 290 | img_name = os.path.basename(pic_) 291 | save_img_path = os.path.join(save_path, img_name) 292 | 293 | cv2.imwrite(save_img_path, ori_img) 294 | print(f"总共耗时{time.time() - begin} s") 295 | -------------------------------------------------------------------------------- /plateLabel.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from alphabets import plate_chr 4 | 5 | 6 | def allFileList(rootfile, allFile): 7 | folder = os.listdir(rootfile) 8 | for temp in folder: 9 | fileName = os.path.join(rootfile, temp) 10 | if os.path.isfile(fileName): 11 | allFile.append(fileName) 12 | else: 13 | allFileList(fileName, allFile) 14 | 15 | 16 | def is_str_right(plate_name): 17 | for str_ in plate_name: 18 | if str_ not in palteStr: 19 | return False 20 | return True 21 | 22 | 23 | if __name__ == "__main__": 24 | parser = argparse.ArgumentParser() 25 | parser.add_argument('--image_path', type=str, default="datasets/plate/val/", 26 | help='source') 27 | parser.add_argument('--label_file', type=str, default='datasets/val.txt', help='model.pt path(s)') 28 | 29 | opt = parser.parse_args() 30 | rootPath = opt.image_path 31 | labelFile = opt.label_file 32 | palteStr = plate_chr 33 | print(len(palteStr)) 34 | plateDict = {} 35 | for i in range(len(list(palteStr))): 36 | plateDict[palteStr[i]] = i 37 | fp = open(labelFile, "w", encoding="utf-8") 38 | file = [] 39 | allFileList(rootPath, file) 40 | picNum = 0 41 | for jpgFile in file: 42 | print(jpgFile) 43 | jpgName = os.path.basename(jpgFile) 44 | name = jpgName.split("_")[0] 45 | if " " in name: 46 | continue 47 | labelStr = " " 48 | if not is_str_right(name): 49 | continue 50 | strList = list(name) 51 | for i in range(len(strList)): 52 | labelStr += str(plateDict[strList[i]]) + " " 53 | picNum += 1 54 | fp.write(jpgFile + labelStr + "\n") 55 | fp.close() 56 | -------------------------------------------------------------------------------- /readme_imgs/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S2mple1/License_Plate_Detection/60e0cef2f545f799497810c2bf0ae47a4c4a9b1e/readme_imgs/1.png -------------------------------------------------------------------------------- /readme_imgs/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S2mple1/License_Plate_Detection/60e0cef2f545f799497810c2bf0ae47a4c4a9b1e/readme_imgs/2.png -------------------------------------------------------------------------------- /readme_imgs/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S2mple1/License_Plate_Detection/60e0cef2f545f799497810c2bf0ae47a4c4a9b1e/readme_imgs/3.png -------------------------------------------------------------------------------- /readme_imgs/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S2mple1/License_Plate_Detection/60e0cef2f545f799497810c2bf0ae47a4c4a9b1e/readme_imgs/4.png -------------------------------------------------------------------------------- /readme_imgs/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S2mple1/License_Plate_Detection/60e0cef2f545f799497810c2bf0ae47a4c4a9b1e/readme_imgs/5.png -------------------------------------------------------------------------------- /readme_imgs/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S2mple1/License_Plate_Detection/60e0cef2f545f799497810c2bf0ae47a4c4a9b1e/readme_imgs/6.png -------------------------------------------------------------------------------- /readme_imgs/main.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S2mple1/License_Plate_Detection/60e0cef2f545f799497810c2bf0ae47a4c4a9b1e/readme_imgs/main.png -------------------------------------------------------------------------------- /readme_imgs/video.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S2mple1/License_Plate_Detection/60e0cef2f545f799497810c2bf0ae47a4c4a9b1e/readme_imgs/video.png -------------------------------------------------------------------------------- /torch2trt/speed.py: -------------------------------------------------------------------------------- 1 | from models.experimental import attempt_load 2 | from torch2trt.trt_model import TrtModel 3 | import argparse 4 | import torch 5 | import time 6 | from tqdm import tqdm 7 | 8 | 9 | def run(model, img, warmup_iter, iter): 10 | print('start warm up...') 11 | for _ in tqdm(range(warmup_iter)): 12 | model(img) 13 | 14 | print('start calculate...') 15 | torch.cuda.synchronize() 16 | start = time.time() 17 | for __ in tqdm(range(iter)): 18 | model(img) 19 | torch.cuda.synchronize() 20 | end = time.time() 21 | return ((end - start) * 1000) / float(iter) 22 | 23 | 24 | if __name__ == '__main__': 25 | parser = argparse.ArgumentParser() 26 | parser.add_argument('--torch_path', type=str, required=True, help='torch model path') 27 | parser.add_argument('--trt_path', type=str, required=True, help='tensorrt model path') 28 | 29 | parser.add_argument('--device', type=int, default=0, help='cuda device') 30 | parser.add_argument('--img_shape', type=list, default=[1, 3, 640, 640], help='tensorrt model path') 31 | parser.add_argument('--warmup_iter', type=int, default=100, help='warm up iter') 32 | parser.add_argument('--iter', type=int, default=300, help='average elapsed time of iterations') 33 | opt = parser.parse_args() 34 | 35 | # -----------------------torch----------------------------------------- 36 | img = torch.zeros(opt.img_shape) 37 | model = attempt_load(opt.torch_path, map_location=torch.device('cpu')) # load FP32 model 38 | model.eval() 39 | total_time = run(model.to(opt.device), img.to(opt.device), opt.warmup_iter, opt.iter) 40 | print('Pytorch is %.2f ms/img' % total_time) 41 | 42 | # -----------------------tensorrt----------------------------------------- 43 | model = TrtModel(opt.trt_path) 44 | total_time = run(model, img.numpy(), opt.warmup_iter, opt.iter) 45 | model.destroy() 46 | print('TensorRT is %.2f ms/img' % total_time) 47 | -------------------------------------------------------------------------------- /torch2trt/trt_model.py: -------------------------------------------------------------------------------- 1 | import pycuda.autoinit 2 | import pycuda.driver as cuda 3 | import tensorrt as trt 4 | import numpy as np 5 | 6 | EXPLICIT_BATCH = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) 7 | TRT_LOGGER = trt.Logger(trt.Logger.WARNING) 8 | 9 | 10 | def GiB(val): 11 | return val * 1 << 30 12 | 13 | 14 | def ONNX_to_TRT(onnx_model_path=None, trt_engine_path=None, fp16_mode=False): 15 | """ 16 | 仅适用TensorRT V8版本 17 | 生成cudaEngine,并保存引擎文件(仅支持固定输入尺度) 18 | 19 | fp16_mode: True则fp16预测 20 | onnx_model_path: 将加载的onnx权重路径 21 | trt_engine_path: trt引擎文件保存路径 22 | """ 23 | builder = trt.Builder(TRT_LOGGER) 24 | network = builder.create_network(EXPLICIT_BATCH) 25 | parser = trt.OnnxParser(network, TRT_LOGGER) 26 | 27 | config = builder.create_builder_config() 28 | config.max_workspace_size = GiB(1) 29 | if fp16_mode: 30 | config.set_flag(trt.BuilderFlag.FP16) 31 | with open(onnx_model_path, 'rb') as model: 32 | assert parser.parse(model.read()) 33 | serialized_engine = builder.build_serialized_network(network, config) 34 | 35 | with open(trt_engine_path, 'wb') as f: 36 | f.write(serialized_engine) 37 | 38 | print('TensorRT file in ' + trt_engine_path) 39 | print('============ONNX->TensorRT SUCCESS============') 40 | 41 | 42 | class TrtModel(): 43 | """ 44 | TensorRT infer 45 | """ 46 | 47 | def __init__(self, trt_path): 48 | self.ctx = cuda.Device(0).make_context() 49 | stream = cuda.Stream() 50 | TRT_LOGGER = trt.Logger(trt.Logger.INFO) 51 | runtime = trt.Runtime(TRT_LOGGER) 52 | 53 | with open(trt_path, "rb") as f: 54 | engine = runtime.deserialize_cuda_engine(f.read()) 55 | context = engine.create_execution_context() 56 | 57 | host_inputs = [] 58 | cuda_inputs = [] 59 | host_outputs = [] 60 | cuda_outputs = [] 61 | bindings = [] 62 | 63 | for binding in engine: 64 | print('bingding:', binding, engine.get_binding_shape(binding)) 65 | size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size 66 | dtype = trt.nptype(engine.get_binding_dtype(binding)) 67 | host_mem = cuda.pagelocked_empty(size, dtype) 68 | cuda_mem = cuda.mem_alloc(host_mem.nbytes) 69 | bindings.append(int(cuda_mem)) 70 | if engine.binding_is_input(binding): 71 | self.input_w = engine.get_binding_shape(binding)[-1] 72 | self.input_h = engine.get_binding_shape(binding)[-2] 73 | host_inputs.append(host_mem) 74 | cuda_inputs.append(cuda_mem) 75 | else: 76 | host_outputs.append(host_mem) 77 | cuda_outputs.append(cuda_mem) 78 | 79 | # Store 80 | self.stream = stream 81 | self.context = context 82 | self.engine = engine 83 | self.host_inputs = host_inputs 84 | self.cuda_inputs = cuda_inputs 85 | self.host_outputs = host_outputs 86 | self.cuda_outputs = cuda_outputs 87 | self.bindings = bindings 88 | self.batch_size = engine.max_batch_size 89 | 90 | def __call__(self, img_np_nchw): 91 | ''' 92 | TensorRT推理 93 | :param img_np_nchw: 输入图像 94 | ''' 95 | self.ctx.push() 96 | 97 | # Restore 98 | stream = self.stream 99 | context = self.context 100 | engine = self.engine 101 | host_inputs = self.host_inputs 102 | cuda_inputs = self.cuda_inputs 103 | host_outputs = self.host_outputs 104 | cuda_outputs = self.cuda_outputs 105 | bindings = self.bindings 106 | 107 | np.copyto(host_inputs[0], img_np_nchw.ravel()) 108 | cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream) 109 | context.execute_async(batch_size=self.batch_size, bindings=bindings, stream_handle=stream.handle) 110 | cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream) 111 | stream.synchronize() 112 | self.ctx.pop() 113 | return host_outputs[0] 114 | 115 | def destroy(self): 116 | self.ctx.pop() 117 | -------------------------------------------------------------------------------- /train_color.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import cvtorchvision.cvtransforms as cvTransforms 5 | import torchvision.datasets as dset 6 | import numpy as np 7 | import os 8 | import argparse 9 | from Net.colorNet import myNet_ocr_color 10 | import cv2 11 | from tqdm import tqdm 12 | import matplotlib.pyplot as plt 13 | 14 | train_loss_list = [] 15 | val_loss_list = [] 16 | accuracy_list = [] 17 | 18 | 19 | def cv_imread(path): 20 | img = cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1) 21 | return img 22 | 23 | 24 | class CrossEntropyLabelSmooth(nn.Module): 25 | def __init__(self, num_classes, epsilon=0.1, use_gpu=True): 26 | super(CrossEntropyLabelSmooth, self).__init__() 27 | self.num_classes = num_classes 28 | self.epsilon = epsilon 29 | self.use_gpu = use_gpu 30 | self.logsoftmax = nn.LogSoftmax(dim=1) 31 | 32 | def forward(self, inputs, targets): 33 | log_probs = self.logsoftmax(inputs) 34 | targets = torch.zeros(log_probs.size()).scatter_(1, targets.unsqueeze(1).cpu(), 1) 35 | if self.use_gpu: targets = targets.cuda() 36 | targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes 37 | loss = (- targets * log_probs).mean(0).sum() 38 | return loss 39 | 40 | 41 | def fix_bn(m): 42 | classname = m.__class__.__name__ 43 | if classname.find('BatchNorm') != -1: 44 | if m.num_features != 5 and m.num_features != 12: 45 | m.eval() 46 | 47 | 48 | def train(epoch): 49 | print('\nEpoch: %d' % epoch) 50 | print(scheduler.get_lr()) 51 | model.train() 52 | model.apply(fix_bn) 53 | 54 | epoch_loss = 0.0 55 | total_batches = len(trainloader) 56 | 57 | for batch_idx, (img, label) in enumerate(tqdm(trainloader, desc=f'Training Epoch {epoch}')): 58 | image = Variable(img.cuda()) 59 | label = Variable(label.cuda()) 60 | optimizer.zero_grad() 61 | _, out = model(image) 62 | loss = criterion(out, label) 63 | loss.backward() 64 | optimizer.step() 65 | epoch_loss += loss.item() 66 | 67 | if batch_idx % 50 == 0: 68 | print("Epoch:%d [%d|%d] loss:%f lr:%s" % ( 69 | epoch, batch_idx, total_batches, loss.mean(), scheduler.get_lr())) 70 | 71 | avg_epoch_loss = epoch_loss / total_batches 72 | train_loss_list.append(avg_epoch_loss) 73 | print(f"Avg Loss for Epoch {epoch}: {avg_epoch_loss}") 74 | 75 | scheduler.step() 76 | 77 | 78 | def val(epoch): 79 | print("\nValidation Epoch: %d" % epoch) 80 | model.eval() 81 | total = 0 82 | correct = 0 83 | with torch.no_grad(): 84 | for batch_idx, (img, label) in enumerate(valloader): 85 | image = Variable(img.cuda()) 86 | label = Variable(label.cuda()) 87 | _, out = model(image) 88 | _, predicted = torch.max(out.data, 1) 89 | total += image.size(0) 90 | correct += predicted.data.eq(label.data).cuda().sum() 91 | accuracy = 1.0 * correct.cpu().numpy() / total 92 | accuracy_list.append(accuracy) 93 | print("Acc: %f " % ((1.0 * correct.cpu().numpy()) / total)) 94 | exModelName = opt.model_path + '/' + str(format(accuracy, ".6f")) + "_" + "epoch_" + str(epoch) + "_model" + ".pth" 95 | torch.save({'cfg': cfg, 'state_dict': model.state_dict()}, exModelName) 96 | 97 | 98 | if __name__ == '__main__': 99 | parser = argparse.ArgumentParser() 100 | parser.add_argument('--weights', type=str, 101 | default='weights/plate_rec_ocr.pth') # 车牌识别模型 102 | parser.add_argument('--train_path', type=str, default='datasets/plate_color/train') # 颜色训练集 103 | parser.add_argument('--val_path', type=str, default='datasets/plate_color/val') # 颜色验证集 104 | parser.add_argument('--num_color', type=int, default=5) 105 | parser.add_argument('--num_workers', type=int, default=8) 106 | parser.add_argument('--batchSize', type=int, default=256) 107 | parser.add_argument('--epoch', type=int, default=120) 108 | parser.add_argument('--lr', type=float, default=0.0025) 109 | parser.add_argument('--device', type=str, default='cuda') 110 | parser.add_argument('--model_path', type=str, default='color_model', help='model_path') 111 | opt = parser.parse_args() 112 | 113 | print(opt) 114 | 115 | device = torch.device("cuda" if torch.cuda.is_available() and opt.device == 'cuda' else 'cpu') 116 | torch.backends.cudnn.benchmark = True 117 | if not os.path.exists(opt.model_path): 118 | os.mkdir(opt.model_path) 119 | 120 | mean_value = (0.588, 0.588, 0.588) 121 | std_value = (0.193, 0.193, 0.193) 122 | 123 | transform_train = cvTransforms.Compose([ 124 | cvTransforms.Resize((48, 168)), 125 | cvTransforms.RandomHorizontalFlip(), 126 | cvTransforms.ToTensorNoDiv(), 127 | cvTransforms.NormalizeCaffe(mean_value, std_value) 128 | ]) 129 | 130 | transform_val = cvTransforms.Compose([ 131 | cvTransforms.Resize((48, 168)), 132 | cvTransforms.ToTensorNoDiv(), 133 | cvTransforms.NormalizeCaffe(mean_value, std_value), 134 | ]) 135 | 136 | rec_model_Path = opt.weights # 车牌识别模型 137 | checkPoint = torch.load(rec_model_Path, map_location=torch.device('cuda' if opt.device == 'cuda' else 'cpu')) 138 | cfg = checkPoint["cfg"] 139 | print(cfg) 140 | model = myNet_ocr_color(cfg=cfg, color_num=opt.num_color) 141 | model_dict = checkPoint['state_dict'] 142 | model.load_state_dict(model_dict, strict=False) 143 | 144 | trainset = dset.ImageFolder(opt.train_path, transform=transform_train, loader=cv_imread) 145 | valset = dset.ImageFolder(opt.val_path, transform=transform_val, loader=cv_imread) 146 | print(len(valset)) 147 | trainloader = torch.utils.data.DataLoader(trainset, batch_size=opt.batchSize, shuffle=True, 148 | num_workers=opt.num_workers) 149 | valloader = torch.utils.data.DataLoader(valset, batch_size=opt.batchSize, shuffle=False, 150 | num_workers=opt.num_workers) 151 | model = model.to(device) 152 | for name, value in model.named_parameters(): 153 | if name not in ['color_classifier.weight', 'color_classifier.bias', 'color_bn.weight', 'color_bn.bias', 154 | 'conv1.weight', 'conv1.bias', 'bn1.weight', 'bn1.bias']: 155 | value.requires_grad = False 156 | params = filter(lambda p: p.requires_grad, model.parameters()) 157 | optimizer = torch.optim.SGD(params, lr=opt.lr, momentum=0.9, weight_decay=5e-4) 158 | scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(opt.epoch)) 159 | criterion = CrossEntropyLabelSmooth(opt.num_color) 160 | criterion.cuda() 161 | for epoch in range(opt.epoch): 162 | train(epoch) 163 | val(epoch) 164 | 165 | plt.figure(figsize=(12, 4)) 166 | plt.subplot(1, 2, 1) 167 | plt.plot(train_loss_list, label='Train Loss') 168 | plt.xlabel('Epoch') 169 | plt.ylabel('Loss') 170 | plt.legend() 171 | 172 | plt.subplot(1, 2, 2) 173 | plt.plot(accuracy_list, label='Validation Accuracy') 174 | plt.xlabel('Epoch') 175 | plt.ylabel('Accuracy') 176 | plt.legend() 177 | 178 | plt.savefig(os.path.join(opt.model_path, 'training_curves.png')) 179 | plt.close() 180 | -------------------------------------------------------------------------------- /train_ocr.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from easydict import EasyDict as edict 3 | import yaml 4 | import os 5 | import torch 6 | import torch.backends.cudnn as cudnn 7 | from torch.utils.data import DataLoader 8 | import lib.utils.utils as utils 9 | from lib.dataset import get_dataset 10 | from lib.core import function 11 | from lib.utils.utils import model_info 12 | from Net.plateNet import myNet_ocr 13 | from alphabets import plateName, plate_chr 14 | from tensorboardX import SummaryWriter 15 | 16 | 17 | def parse_arg(): 18 | parser = argparse.ArgumentParser(description="train crnn") 19 | 20 | parser.add_argument('--cfg', help='experiment configuration filename', required=True, type=str) 21 | parser.add_argument('--img_h', type=int, default=48, help='height') 22 | parser.add_argument('--img_w', type=int, default=168, help='width') 23 | args = parser.parse_args() 24 | 25 | with open(args.cfg, 'r') as f: 26 | config = yaml.load(f, Loader=yaml.FullLoader) 27 | config = edict(config) 28 | 29 | config.DATASET.ALPHABETS = plateName 30 | config.MODEL.NUM_CLASSES = len(config.DATASET.ALPHABETS) 31 | config.HEIGHT = args.img_h 32 | config.WIDTH = args.img_w 33 | return config 34 | 35 | 36 | def main(): 37 | config = parse_arg() 38 | 39 | # create output folder 40 | output_dict = utils.create_log_folder(config, phase='train') 41 | 42 | # cudnn 43 | cudnn.benchmark = config.CUDNN.BENCHMARK 44 | cudnn.deterministic = config.CUDNN.DETERMINISTIC 45 | cudnn.enabled = config.CUDNN.ENABLED 46 | 47 | # writer dict 48 | writer_dict = { 49 | 'writer': SummaryWriter(log_dir=output_dict['tb_dir']), 50 | 'train_global_steps': 0, 51 | 'valid_global_steps': 0, 52 | } 53 | 54 | cfg = [16, 16, 32, 32, 'M', 64, 64, 'M', 96, 96, 'M', 128, 256] 55 | model = myNet_ocr(num_classes=len(plate_chr), cfg=cfg) 56 | 57 | # get device 58 | if torch.cuda.is_available(): 59 | device = torch.device("cuda:{}".format(config.GPUID)) 60 | else: 61 | device = torch.device("cpu:0") 62 | 63 | model = model.to(device) 64 | 65 | # define loss function 66 | criterion = torch.nn.CTCLoss() 67 | 68 | last_epoch = config.TRAIN.BEGIN_EPOCH 69 | optimizer = utils.get_optimizer(config, model) 70 | if isinstance(config.TRAIN.LR_STEP, list): 71 | lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( 72 | optimizer, config.TRAIN.LR_STEP, 73 | config.TRAIN.LR_FACTOR, last_epoch - 1 74 | ) 75 | else: 76 | lr_scheduler = torch.optim.lr_scheduler.StepLR( 77 | optimizer, config.TRAIN.LR_STEP, 78 | config.TRAIN.LR_FACTOR, last_epoch - 1 79 | ) 80 | 81 | if config.TRAIN.FINETUNE.IS_FINETUNE: 82 | model_state_file = config.TRAIN.FINETUNE.FINETUNE_CHECKPOINIT 83 | if model_state_file == '': 84 | print(" => no checkpoint found") 85 | checkpoint = torch.load(model_state_file, map_location='cpu') 86 | if 'state_dict' in checkpoint.keys(): 87 | checkpoint = checkpoint['state_dict'] 88 | 89 | model.load_state_dict(checkpoint) 90 | 91 | elif config.TRAIN.RESUME.IS_RESUME: 92 | model_state_file = config.TRAIN.RESUME.FILE 93 | if model_state_file == '': 94 | print(" => no checkpoint found") 95 | checkpoint = torch.load(model_state_file, map_location='cpu') 96 | if 'state_dict' in checkpoint.keys(): 97 | model.load_state_dict(checkpoint['state_dict']) 98 | last_epoch = checkpoint['epoch'] 99 | else: 100 | model.load_state_dict(checkpoint) 101 | 102 | model_info(model) 103 | train_dataset = get_dataset(config)(config, input_w=config.WIDTH, input_h=config.HEIGHT, is_train=True) 104 | train_loader = DataLoader( 105 | dataset=train_dataset, 106 | batch_size=config.TRAIN.BATCH_SIZE_PER_GPU, 107 | shuffle=config.TRAIN.SHUFFLE, 108 | num_workers=config.WORKERS, 109 | pin_memory=config.PIN_MEMORY, 110 | ) 111 | 112 | val_dataset = get_dataset(config)(config, input_w=config.WIDTH, input_h=config.HEIGHT, is_train=False) 113 | val_loader = DataLoader( 114 | dataset=val_dataset, 115 | batch_size=config.TEST.BATCH_SIZE_PER_GPU, 116 | shuffle=config.TEST.SHUFFLE, 117 | num_workers=config.WORKERS, 118 | pin_memory=config.PIN_MEMORY, 119 | ) 120 | 121 | best_acc = 0.5 122 | converter = utils.strLabelConverter(config.DATASET.ALPHABETS) 123 | for epoch in range(last_epoch, config.TRAIN.END_EPOCH): 124 | function.train(config, train_loader, train_dataset, converter, model, 125 | criterion, optimizer, device, epoch, writer_dict, output_dict) 126 | lr_scheduler.step() 127 | 128 | acc = function.validate(config, val_loader, val_dataset, converter, 129 | model, criterion, device, epoch, writer_dict, output_dict) 130 | 131 | is_best = acc > best_acc 132 | best_acc = max(acc, best_acc) 133 | 134 | print("is best:", is_best) 135 | print("best acc is:", best_acc) 136 | # save checkpoint 137 | torch.save( 138 | { 139 | "cfg": cfg, 140 | "state_dict": model.state_dict(), 141 | "epoch": epoch + 1, 142 | # "optimizer": optimizer.state_dict(), 143 | # "lr_scheduler": lr_scheduler.state_dict(), 144 | "best_acc": best_acc, 145 | }, os.path.join(output_dict['chs_dir'], "checkpoint_{}_acc_{:.4f}.pth".format(epoch, acc)) 146 | ) 147 | 148 | writer_dict['writer'].close() 149 | 150 | 151 | if __name__ == '__main__': 152 | main() 153 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | curPath = os.path.abspath(os.path.dirname(__file__)) 5 | sys.path.append(curPath) 6 | -------------------------------------------------------------------------------- /utils/activations.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class SiLU(nn.Module): 7 | @staticmethod 8 | def forward(x): 9 | return x * torch.sigmoid(x) 10 | 11 | 12 | class Hardswish(nn.Module): 13 | @staticmethod 14 | def forward(x): 15 | return x * F.hardtanh(x + 3, 0., 6.) / 6. 16 | 17 | 18 | class MemoryEfficientSwish(nn.Module): 19 | class F(torch.autograd.Function): 20 | @staticmethod 21 | def forward(ctx, x): 22 | ctx.save_for_backward(x) 23 | return x * torch.sigmoid(x) 24 | 25 | @staticmethod 26 | def backward(ctx, grad_output): 27 | x = ctx.saved_tensors[0] 28 | sx = torch.sigmoid(x) 29 | return grad_output * (sx * (1 + x * (1 - sx))) 30 | 31 | def forward(self, x): 32 | return self.F.apply(x) 33 | 34 | 35 | class Mish(nn.Module): 36 | @staticmethod 37 | def forward(x): 38 | return x * F.softplus(x).tanh() 39 | 40 | 41 | class MemoryEfficientMish(nn.Module): 42 | class F(torch.autograd.Function): 43 | @staticmethod 44 | def forward(ctx, x): 45 | ctx.save_for_backward(x) 46 | return x.mul(torch.tanh(F.softplus(x))) 47 | 48 | @staticmethod 49 | def backward(ctx, grad_output): 50 | x = ctx.saved_tensors[0] 51 | sx = torch.sigmoid(x) 52 | fx = F.softplus(x).tanh() 53 | return grad_output * (fx + x * sx * (1 - fx * fx)) 54 | 55 | def forward(self, x): 56 | return self.F.apply(x) 57 | 58 | 59 | class FReLU(nn.Module): 60 | def __init__(self, c1, k=3): 61 | super().__init__() 62 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False) 63 | self.bn = nn.BatchNorm2d(c1) 64 | 65 | def forward(self, x): 66 | return torch.max(x, self.bn(self.conv(x))) 67 | -------------------------------------------------------------------------------- /utils/autoanchor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import yaml 4 | from scipy.cluster.vq import kmeans 5 | from tqdm import tqdm 6 | 7 | from utils.general import colorstr 8 | 9 | 10 | def check_anchor_order(m): 11 | a = m.anchor_grid.prod(-1).view(-1) 12 | da = a[-1] - a[0] 13 | ds = m.stride[-1] - m.stride[0] 14 | if da.sign() != ds.sign(): 15 | print('Reversing anchor order') 16 | m.anchors[:] = m.anchors.flip(0) 17 | m.anchor_grid[:] = m.anchor_grid.flip(0) 18 | 19 | 20 | def check_anchors(dataset, model, thr=4.0, imgsz=640): 21 | prefix = colorstr('autoanchor: ') 22 | print(f'\n{prefix}Analyzing anchors... ', end='') 23 | m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] 24 | shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) 25 | scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) 26 | wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() 27 | 28 | def metric(k): 29 | r = wh[:, None] / k[None] 30 | x = torch.min(r, 1. / r).min(2)[0] 31 | best = x.max(1)[0] 32 | aat = (x > 1. / thr).float().sum(1).mean() 33 | bpr = (best > 1. / thr).float().mean() 34 | return bpr, aat 35 | 36 | bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2)) 37 | print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='') 38 | if bpr < 0.98: 39 | print('. Attempting to improve anchors, please wait...') 40 | na = m.anchor_grid.numel() // 2 41 | new_anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) 42 | new_bpr = metric(new_anchors.reshape(-1, 2))[0] 43 | if new_bpr > bpr: 44 | new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(m.anchors) 45 | m.anchor_grid[:] = new_anchors.clone().view_as(m.anchor_grid) 46 | m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) 47 | check_anchor_order(m) 48 | print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.') 49 | else: 50 | print(f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.') 51 | print('') 52 | 53 | 54 | def kmean_anchors(path='data/plate_color.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): 55 | thr = 1. / thr 56 | prefix = colorstr('autoanchor: ') 57 | 58 | def metric(k, wh): # compute metrics 59 | r = wh[:, None] / k[None] 60 | x = torch.min(r, 1. / r).min(2)[0] 61 | return x, x.max(1)[0] # x, best_x 62 | 63 | def anchor_fitness(k): 64 | _, best = metric(torch.tensor(k, dtype=torch.float32), wh) 65 | return (best * (best > thr).float()).mean() 66 | 67 | def print_results(k): 68 | k = k[np.argsort(k.prod(1))] 69 | x, best = metric(k, wh0) 70 | bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n 71 | print(f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr') 72 | print(f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' 73 | f'past_thr={x[x > thr].mean():.3f}-mean: ', end='') 74 | for i, x in enumerate(k): 75 | print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg 76 | return k 77 | 78 | if isinstance(path, str): 79 | with open(path) as f: 80 | data_dict = yaml.load(f, Loader=yaml.SafeLoader) # model dict 81 | from utils.datasets import LoadImagesAndLabels 82 | dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) 83 | else: 84 | dataset = path # dataset 85 | 86 | # Get label wh 87 | shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) 88 | wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh 89 | 90 | # Filter 91 | i = (wh0 < 3.0).any(1).sum() 92 | if i: 93 | print(f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.') 94 | wh = wh0[(wh0 >= 2.0).any(1)] 95 | print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...') 96 | s = wh.std(0) 97 | k, dist = kmeans(wh / s, n, iter=30) 98 | k *= s 99 | wh = torch.tensor(wh, dtype=torch.float32) # filtered 100 | wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered 101 | k = print_results(k) 102 | 103 | # Evolve 104 | npr = np.random 105 | f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 106 | pbar = tqdm(range(gen), desc=f'{prefix}Evolving anchors with Genetic Algorithm:') 107 | for _ in pbar: 108 | v = np.ones(sh) 109 | while (v == 1).all(): 110 | v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) 111 | kg = (k.copy() * v).clip(min=2.0) 112 | fg = anchor_fitness(kg) 113 | if fg > f: 114 | f, k = fg, kg.copy() 115 | pbar.desc = f'{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}' 116 | if verbose: 117 | print_results(k) 118 | 119 | return print_results(k) 120 | -------------------------------------------------------------------------------- /utils/crpd_process.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import cv2 4 | import numpy as np 5 | 6 | 7 | def allFilePath(rootPath, allFIleList): 8 | fileList = os.listdir(rootPath) 9 | for temp in fileList: 10 | if os.path.isfile(os.path.join(rootPath, temp)): 11 | if temp.endswith(".jpg"): 12 | allFIleList.append(os.path.join(rootPath, temp)) 13 | else: 14 | allFilePath(os.path.join(rootPath, temp), allFIleList) 15 | 16 | 17 | def order_points(pts): 18 | pts = pts[:4, :] 19 | rect = np.zeros((5, 2), dtype="float32") 20 | 21 | s = pts.sum(axis=1) 22 | rect[0] = pts[np.argmin(s)] 23 | rect[2] = pts[np.argmax(s)] 24 | 25 | diff = np.diff(pts, axis=1) 26 | rect[1] = pts[np.argmin(diff)] 27 | rect[3] = pts[np.argmax(diff)] 28 | 29 | return rect 30 | 31 | 32 | def get_partical_ccpd(): 33 | ccpd_dir = r"" # 34 | save_Path = r"" 35 | folder_list = os.listdir(ccpd_dir) 36 | for folder_name in folder_list: 37 | count = 0 38 | folder_path = os.path.join(ccpd_dir, folder_name) 39 | if os.path.isfile(folder_path): 40 | continue 41 | if folder_name == "crpd_fn": 42 | continue 43 | name_list = os.listdir(folder_path) 44 | 45 | save_folder = save_Path 46 | if not os.path.exists(save_folder): 47 | os.mkdir(save_folder) 48 | 49 | for name in name_list: 50 | file_path = os.path.join(folder_path, name) 51 | count += 1 52 | if count > 1000: 53 | break 54 | new_file_path = os.path.join(save_folder, name) 55 | shutil.move(file_path, new_file_path) 56 | print(count, new_file_path) 57 | 58 | 59 | def get_rect_and_landmarks(img_path): 60 | file_name = img_path.split("/")[-1].split("-") 61 | landmarks_np = np.zeros((5, 2)) 62 | rect = file_name[2].split("_") 63 | landmarks = file_name[3].split("_") 64 | rect_str = "&".join(rect) 65 | landmarks_str = "&".join(landmarks) 66 | rect = rect_str.split("&") 67 | landmarks = landmarks_str.split("&") 68 | rect = [int(x) for x in rect] 69 | landmarks = [int(x) for x in landmarks] 70 | for i in range(4): 71 | landmarks_np[i][0] = landmarks[2 * i] 72 | landmarks_np[i][1] = landmarks[2 * i + 1] 73 | landmarks_np_new = order_points(landmarks_np) 74 | return rect, landmarks, landmarks_np_new 75 | 76 | 77 | def x1x2y1y2_yolo(rect, landmarks, img): 78 | h, w, c = img.shape 79 | rect[0] = max(0, rect[0]) 80 | rect[1] = max(0, rect[1]) 81 | rect[2] = min(w - 1, rect[2] - rect[0]) 82 | rect[3] = min(h - 1, rect[3] - rect[1]) 83 | annotation = np.zeros((1, 14)) 84 | annotation[0, 0] = (rect[0] + rect[2] / 2) / w # cx 85 | annotation[0, 1] = (rect[1] + rect[3] / 2) / h # cy 86 | annotation[0, 2] = rect[2] / w # w 87 | annotation[0, 3] = rect[3] / h # h 88 | 89 | annotation[0, 4] = landmarks[0] / w # l0_x 90 | annotation[0, 5] = landmarks[1] / h # l0_y 91 | annotation[0, 6] = landmarks[2] / w # l1_x 92 | annotation[0, 7] = landmarks[3] / h # l1_y 93 | annotation[0, 8] = landmarks[4] / w # l2_x 94 | annotation[0, 9] = landmarks[5] / h # l2_y 95 | annotation[0, 10] = landmarks[6] / w # l3_x 96 | annotation[0, 11] = landmarks[7] / h # l3_y 97 | return annotation 98 | 99 | 100 | def xywh2yolo(rect, landmarks_sort, img): 101 | h, w, c = img.shape 102 | rect[0] = max(0, rect[0]) 103 | rect[1] = max(0, rect[1]) 104 | rect[2] = min(w - 1, rect[2] - rect[0]) 105 | rect[3] = min(h - 1, rect[3] - rect[1]) 106 | annotation = np.zeros((1, 12)) 107 | annotation[0, 0] = (rect[0] + rect[2] / 2) / w # cx 108 | annotation[0, 1] = (rect[1] + rect[3] / 2) / h # cy 109 | annotation[0, 2] = rect[2] / w # w 110 | annotation[0, 3] = rect[3] / h # h 111 | 112 | annotation[0, 4] = landmarks_sort[0][0] / w # l0_x 113 | annotation[0, 5] = landmarks_sort[0][1] / h # l0_y 114 | annotation[0, 6] = landmarks_sort[1][0] / w # l1_x 115 | annotation[0, 7] = landmarks_sort[1][1] / h # l1_y 116 | annotation[0, 8] = landmarks_sort[2][0] / w # l2_x 117 | annotation[0, 9] = landmarks_sort[2][1] / h # l2_y 118 | annotation[0, 10] = landmarks_sort[3][0] / w # l3_x 119 | annotation[0, 11] = landmarks_sort[3][1] / h # l3_y 120 | return annotation 121 | 122 | 123 | def yolo2x1y1x2y2(annotation, img): 124 | h, w, c = img.shape 125 | rect = annotation[:, 0:4].squeeze().tolist() 126 | landmarks = annotation[:, 4:].squeeze().tolist() 127 | rect_w = w * rect[2] 128 | rect_h = h * rect[3] 129 | rect_x = int(rect[0] * w - rect_w / 2) 130 | rect_y = int(rect[1] * h - rect_h / 2) 131 | new_rect = [rect_x, rect_y, rect_x + rect_w, rect_y + rect_h] 132 | for i in range(5): 133 | landmarks[2 * i] = landmarks[2 * i] * w 134 | landmarks[2 * i + 1] = landmarks[2 * i + 1] * h 135 | return new_rect, landmarks 136 | 137 | 138 | def write_lable(file_path): 139 | pass 140 | 141 | 142 | if __name__ == '__main__': 143 | file_root = r"crpd" 144 | file_list = [] 145 | count = 0 146 | allFilePath(file_root, file_list) 147 | for img_path in file_list: 148 | count += 1 149 | text_path = img_path.replace(".jpg", ".txt") 150 | img = cv2.imread(img_path) 151 | rect, landmarks, landmarks_sort = get_rect_and_landmarks(img_path) 152 | annotation = xywh2yolo(rect, landmarks_sort, img) 153 | str_label = "0 " 154 | for i in range(len(annotation[0])): 155 | str_label = str_label + " " + str(annotation[0][i]) 156 | str_label = str_label.replace('[', '').replace(']', '') 157 | str_label = str_label.replace(',', '') + '\n' 158 | with open(text_path, "w") as f: 159 | f.write(str_label) 160 | print(count, img_path) 161 | -------------------------------------------------------------------------------- /utils/cv_puttext.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from PIL import Image, ImageDraw, ImageFont 4 | 5 | 6 | # 用于将车牌识别结果写入图片中 7 | def cv2ImgAddText(img, text, left, top, textColor=(0, 255, 0), textSize=20): 8 | if isinstance(img, np.ndarray): 9 | img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) 10 | draw = ImageDraw.Draw(img) 11 | fontText = ImageFont.truetype("../fonts/AlibabaPuHuiTi-3-65-Medium.ttf", textSize, encoding="utf-8") 12 | draw.text((left, top), text, textColor, font=fontText) 13 | return cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) 14 | -------------------------------------------------------------------------------- /utils/double_plate_split_merge.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | 5 | def get_split_merge(img): 6 | h, w, c = img.shape 7 | img_upper = img[0:int(5 / 12 * h), :] 8 | img_lower = img[int(1 / 3 * h):, :] 9 | img_upper = cv2.resize(img_upper, (img_lower.shape[1], img_lower.shape[0])) 10 | new_img = np.hstack((img_upper, img_lower)) 11 | return new_img 12 | 13 | -------------------------------------------------------------------------------- /utils/google_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | import subprocess 4 | import time 5 | from pathlib import Path 6 | import requests 7 | import torch 8 | 9 | 10 | def gsutil_getsize(url=''): 11 | s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8') 12 | return eval(s.split(' ')[0]) if len(s) else 0 13 | 14 | 15 | def attempt_download(file, repo='ultralytics/yolov5'): 16 | file = Path(str(file).strip().replace("'", '').lower()) 17 | 18 | if not file.exists(): 19 | try: 20 | response = requests.get(f'https://api.github.com/repos/{repo}/releases/latest').json() 21 | assets = [x['name'] for x in response['assets']] 22 | tag = response['tag_name'] 23 | except: 24 | assets = ['yolov5.pt', 'yolov5.pt', 'yolov5l.pt', 'yolov5x.pt'] 25 | tag = subprocess.check_output('git tag', shell=True).decode('utf-8').split('\n')[-2] 26 | 27 | name = file.name 28 | if name in assets: 29 | msg = f'{file} missing, try downloading from https://github.com/{repo}/releases/' 30 | redundant = False 31 | try: 32 | url = f'https://github.com/{repo}/releases/download/{tag}/{name}' 33 | print(f'Downloading {url} to {file}...') 34 | torch.hub.download_url_to_file(url, file) 35 | assert file.exists() and file.stat().st_size > 1E6 36 | except Exception as e: 37 | print(f'Download error: {e}') 38 | assert redundant, 'No secondary mirror' 39 | url = f'https://storage.googleapis.com/{repo}/ckpt/{name}' 40 | print(f'Downloading {url} to {file}...') 41 | os.system(f'curl -L {url} -o {file}') 42 | finally: 43 | if not file.exists() or file.stat().st_size < 1E6: 44 | file.unlink(missing_ok=True) 45 | print(f'ERROR: Download failure: {msg}') 46 | print('') 47 | return 48 | 49 | 50 | def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'): 51 | t = time.time() 52 | file = Path(file) 53 | cookie = Path('cookie') 54 | print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='') 55 | file.unlink(missing_ok=True) 56 | cookie.unlink(missing_ok=True) 57 | 58 | out = "NUL" if platform.system() == "Windows" else "/dev/null" 59 | os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}') 60 | if os.path.exists('cookie'): 61 | s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}' 62 | else: 63 | s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"' 64 | r = os.system(s) 65 | cookie.unlink(missing_ok=True) 66 | 67 | # Error check 68 | if r != 0: 69 | file.unlink(missing_ok=True) 70 | print('Download error ') 71 | return r 72 | 73 | if file.suffix == '.zip': 74 | print('unzipping... ', end='') 75 | os.system(f'unzip -q {file}') 76 | file.unlink() 77 | 78 | print(f'Done ({time.time() - t:.1f}s)') 79 | return r 80 | 81 | 82 | def get_token(cookie="./cookie"): 83 | with open(cookie) as f: 84 | for line in f: 85 | if "download" in line: 86 | return line.split()[-1] 87 | return "" 88 | -------------------------------------------------------------------------------- /utils/log_dataset.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from wandb_utils import WandbLogger 3 | 4 | WANDB_ARTIFACT_PREFIX = 'wandb-artifact://' 5 | 6 | 7 | def create_dataset_artifact(opt): 8 | with open(opt.data) as f: 9 | data = yaml.load(f, Loader=yaml.SafeLoader) 10 | logger = WandbLogger(opt, '', None, data, job_type='Dataset Creation') 11 | 12 | 13 | # if __name__ == '__main__': 14 | # parser = argparse.ArgumentParser() 15 | # parser.add_argument('--data', type=str, default='../data/plate_color.yaml', help='data.yaml path') 16 | # parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') 17 | # parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project') 18 | # opt = parser.parse_args() 19 | # opt.resume = False 20 | # create_dataset_artifact(opt) 21 | -------------------------------------------------------------------------------- /utils/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from general import bbox_iou 5 | from torch_utils import is_parallel 6 | 7 | 8 | def smooth_BCE(eps=0.1): 9 | return 1.0 - 0.5 * eps, 0.5 * eps 10 | 11 | 12 | class BCEBlurWithLogitsLoss(nn.Module): 13 | def __init__(self, alpha=0.05): 14 | super(BCEBlurWithLogitsLoss, self).__init__() 15 | self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') 16 | self.alpha = alpha 17 | 18 | def forward(self, pred, true): 19 | loss = self.loss_fcn(pred, true) 20 | pred = torch.sigmoid(pred) 21 | dx = pred - true 22 | alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4)) 23 | loss = loss * alpha_factor 24 | return loss.mean() 25 | 26 | 27 | class FocalLoss(nn.Module): 28 | def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): 29 | super(FocalLoss, self).__init__() 30 | self.loss_fcn = loss_fcn 31 | self.gamma = gamma 32 | self.alpha = alpha 33 | self.reduction = loss_fcn.reduction 34 | self.loss_fcn.reduction = 'none' 35 | 36 | def forward(self, pred, true): 37 | loss = self.loss_fcn(pred, true) 38 | pred_prob = torch.sigmoid(pred) 39 | p_t = true * pred_prob + (1 - true) * (1 - pred_prob) 40 | alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) 41 | modulating_factor = (1.0 - p_t) ** self.gamma 42 | loss = loss * alpha_factor * modulating_factor 43 | 44 | if self.reduction == 'mean': 45 | return loss.mean() 46 | elif self.reduction == 'sum': 47 | return loss.sum() 48 | else: 49 | return loss 50 | 51 | 52 | class QFocalLoss(nn.Module): 53 | def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): 54 | super(QFocalLoss, self).__init__() 55 | self.loss_fcn = loss_fcn 56 | self.gamma = gamma 57 | self.alpha = alpha 58 | self.reduction = loss_fcn.reduction 59 | self.loss_fcn.reduction = 'none' 60 | 61 | def forward(self, pred, true): 62 | loss = self.loss_fcn(pred, true) 63 | pred_prob = torch.sigmoid(pred) 64 | alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) 65 | modulating_factor = torch.abs(true - pred_prob) ** self.gamma 66 | loss = loss * alpha_factor * modulating_factor 67 | 68 | if self.reduction == 'mean': 69 | return loss.mean() 70 | elif self.reduction == 'sum': 71 | return loss.sum() 72 | else: 73 | return loss 74 | 75 | 76 | class WingLoss(nn.Module): 77 | def __init__(self, w=10, e=2): 78 | super(WingLoss, self).__init__() 79 | self.w = w 80 | self.e = e 81 | self.C = self.w - self.w * np.log(1 + self.w / self.e) 82 | 83 | def forward(self, x, t, sigma=1): 84 | weight = torch.ones_like(t) 85 | weight[torch.where(t == -1)] = 0 86 | diff = weight * (x - t) 87 | abs_diff = diff.abs() 88 | flag = (abs_diff.data < self.w).float() 89 | y = flag * self.w * torch.log(1 + abs_diff / self.e) + (1 - flag) * (abs_diff - self.C) 90 | return y.sum() 91 | 92 | 93 | class LandmarksLoss(nn.Module): 94 | def __init__(self, alpha=1.0): 95 | super(LandmarksLoss, self).__init__() 96 | self.loss_fcn = WingLoss() 97 | self.alpha = alpha 98 | 99 | def forward(self, pred, truel, mask): 100 | loss = self.loss_fcn(pred * mask, truel * mask) 101 | return loss / (torch.sum(mask) + 10e-14) 102 | 103 | 104 | def compute_loss(p, targets, model): 105 | device = targets.device 106 | lcls, lbox, lobj, lmark = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, 107 | device=device), torch.zeros( 108 | 1, device=device) 109 | tcls, tbox, indices, anchors, tlandmarks, lmks_mask = build_targets(p, targets, model) 110 | h = model.hyp 111 | 112 | BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) 113 | BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) 114 | 115 | landmarks_loss = LandmarksLoss(1.0) 116 | 117 | cp, cn = smooth_BCE(eps=0.0) 118 | 119 | # Focal loss 120 | g = h['fl_gamma'] 121 | if g > 0: 122 | BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) 123 | 124 | # Losses 125 | nt = 0 126 | no = len(p) 127 | balance = [4.0, 1.0, 0.4] if no == 3 else [4.0, 1.0, 0.4, 0.1] 128 | for i, pi in enumerate(p): 129 | b, a, gj, gi = indices[i] 130 | tobj = torch.zeros_like(pi[..., 0], device=device) 131 | 132 | n = b.shape[0] 133 | if n: 134 | nt = nt + n 135 | ps = pi[b, a, gj, gi] 136 | 137 | # Regression 138 | pxy = ps[:, :2].sigmoid() * 2. - 0.5 139 | pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] 140 | pbox = torch.cat((pxy, pwh), 1) # predicted box 141 | iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) 142 | lbox = lbox + (1.0 - iou).mean() # iou loss 143 | 144 | # Objectness 145 | tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * iou.detach().clamp(0).type(tobj.dtype) 146 | 147 | # Classification 148 | if model.nc > 1: 149 | t = torch.full_like(ps[:, 13:], cn, device=device) 150 | t[range(n), tcls[i]] = cp 151 | lcls = lcls + BCEcls(ps[:, 13:], t) # BCE 152 | 153 | plandmarks = ps[:, 5:13] 154 | plandmarks_02 = plandmarks[:, 0:2] * anchors[i] 155 | plandmarks_24 = plandmarks[:, 2:4] * anchors[i] 156 | plandmarks_46 = plandmarks[:, 4:6] * anchors[i] 157 | plandmarks_68 = plandmarks[:, 6:8] * anchors[i] 158 | plandmarks_8 = plandmarks[:, 8:] 159 | plandmarks = torch.cat((plandmarks_02, plandmarks_24, plandmarks_46, plandmarks_68, plandmarks_8), dim=-1) 160 | 161 | lmark = lmark + landmarks_loss(plandmarks, tlandmarks[i], lmks_mask[i]) 162 | 163 | lobj = lobj + BCEobj(pi[..., 4], tobj) * balance[i] 164 | 165 | s = 3 / no 166 | lbox = lbox * h['box'] * s 167 | lobj = lobj * h['obj'] * s * (1.4 if no == 4 else 1.) 168 | lcls = lcls * h['cls'] * s 169 | lmark = lmark * h['landmark'] * s 170 | 171 | bs = tobj.shape[0] 172 | 173 | loss = lbox + lobj + lcls + lmark 174 | return loss * bs, torch.cat((lbox, lobj, lcls, lmark, loss)).detach() 175 | 176 | 177 | def build_targets(p, targets, model): 178 | det = model.module.model[-1] if is_parallel(model) else model.model[-1] 179 | na, nt = det.na, targets.shape[0] 180 | tcls, tbox, indices, anch, landmarks, lmks_mask = [], [], [], [], [], [] 181 | gain = torch.ones(15, device=targets.device) 182 | ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) 183 | targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) 184 | 185 | g = 0.5 # bias 186 | off = torch.tensor([[0, 0], 187 | [1, 0], [0, 1], [-1, 0], [0, -1], 188 | ], device=targets.device).float() * g 189 | 190 | for i in range(det.nl): 191 | anchors, shape = det.anchors[i], p[i].shape 192 | gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] 193 | gain[6:14] = torch.tensor(p[i].shape)[[3, 2, 3, 2, 3, 2, 3, 2]] 194 | 195 | t = targets * gain 196 | if nt: 197 | r = t[:, :, 4:6] / anchors[:, None] 198 | j = torch.max(r, 1. / r).max(2)[0] < model.hyp['anchor_t'] 199 | t = t[j] 200 | 201 | # Offsets 202 | gxy = t[:, 2:4] 203 | gxi = gain[[2, 3]] - gxy 204 | j, k = ((gxy % 1. < g) & (gxy > 1.)).T 205 | l, m = ((gxi % 1. < g) & (gxi > 1.)).T 206 | j = torch.stack((torch.ones_like(j), j, k, l, m)) 207 | t = t.repeat((5, 1, 1))[j] 208 | offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] 209 | else: 210 | t = targets[0] 211 | offsets = 0 212 | 213 | # Define 214 | b, c = t[:, :2].long().T 215 | gxy = t[:, 2:4] 216 | gwh = t[:, 4:6] 217 | gij = (gxy - offsets).long() 218 | gi, gj = gij.T 219 | 220 | # Append 221 | a = t[:, 14].long() 222 | indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) 223 | tbox.append(torch.cat((gxy - gij, gwh), 1)) 224 | anch.append(anchors[a]) 225 | tcls.append(c) 226 | 227 | # landmarks 228 | lks = t[:, 6:14] 229 | lks_mask = torch.where(lks < 0, torch.full_like(lks, 0.), torch.full_like(lks, 1.0)) 230 | 231 | lks[:, [0, 1]] = (lks[:, [0, 1]] - gij) 232 | lks[:, [2, 3]] = (lks[:, [2, 3]] - gij) 233 | lks[:, [4, 5]] = (lks[:, [4, 5]] - gij) 234 | lks[:, [6, 7]] = (lks[:, [6, 7]] - gij) 235 | 236 | lks_mask_new = lks_mask 237 | lmks_mask.append(lks_mask_new) 238 | landmarks.append(lks) 239 | 240 | return tcls, tbox, indices, anch, landmarks, lmks_mask 241 | -------------------------------------------------------------------------------- /utils/metrics.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import torch 5 | import general 6 | 7 | 8 | def fitness(x): 9 | w = [0.0, 0.0, 0.1, 0.9] # model for [P, R, mAP@0.5, mAP@0.5:0.95] 10 | return (x[:, :4] * w).sum(1) 11 | 12 | 13 | def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='precision-recall_curve.png', names=[]): 14 | """ Compute the average precision, given the recall and precision curves. 15 | Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. 16 | # Arguments 17 | tp: True positives (nparray, nx1 or nx10). 18 | conf: Objectness value from 0-1 (nparray). 19 | pred_cls: Predicted object classes (nparray). 20 | target_cls: True object classes (nparray). 21 | plot: Plot precision-recall curve at mAP@0.5 22 | save_dir: Plot save directory 23 | # Returns 24 | The average precision as computed in py-faster-rcnn. 25 | """ 26 | 27 | # Sort 28 | i = np.argsort(-conf) 29 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] 30 | 31 | unique_classes = np.unique(target_cls) 32 | 33 | # Create Precision-Recall curve and compute AP for each class 34 | px, py = np.linspace(0, 1, 1000), [] 35 | pr_score = 0.1 36 | s = [unique_classes.shape[0], tp.shape[1]] 37 | ap, p, r = np.zeros(s), np.zeros(s), np.zeros(s) 38 | for ci, c in enumerate(unique_classes): 39 | i = pred_cls == c 40 | n_l = (target_cls == c).sum() 41 | n_p = i.sum() 42 | 43 | if n_p == 0 or n_l == 0: 44 | continue 45 | else: 46 | fpc = (1 - tp[i]).cumsum(0) 47 | tpc = tp[i].cumsum(0) 48 | 49 | # Recall 50 | recall = tpc / (n_l + 1e-16) 51 | r[ci] = np.interp(-pr_score, -conf[i], recall[:, 0]) 52 | 53 | # Precision 54 | precision = tpc / (tpc + fpc) 55 | p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0]) 56 | 57 | # AP from recall-precision curve 58 | for j in range(tp.shape[1]): 59 | ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) 60 | if plot and (j == 0): 61 | py.append(np.interp(px, mrec, mpre)) 62 | 63 | f1 = 2 * p * r / (p + r + 1e-16) 64 | 65 | if plot: 66 | plot_pr_curve(px, py, ap, save_dir, names) 67 | 68 | return p, r, ap, f1, unique_classes.astype('int32') 69 | 70 | 71 | def compute_ap(recall, precision): 72 | """ Compute the average precision, given the recall and precision curves 73 | # Arguments 74 | recall: The recall curve (list) 75 | precision: The precision curve (list) 76 | # Returns 77 | Average precision, precision curve, recall curve 78 | """ 79 | 80 | # Append sentinel values to beginning and end 81 | mrec = np.concatenate(([0.], recall, [recall[-1] + 0.01])) 82 | mpre = np.concatenate(([1.], precision, [0.])) 83 | 84 | # Compute the precision envelope 85 | mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) 86 | 87 | # Integrate area under curve 88 | method = 'interp' 89 | if method == 'interp': 90 | x = np.linspace(0, 1, 101) 91 | ap = np.trapz(np.interp(x, mrec, mpre), x) 92 | else: 93 | i = np.where(mrec[1:] != mrec[:-1])[0] 94 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 95 | 96 | return ap, mpre, mrec 97 | 98 | 99 | class ConfusionMatrix: 100 | def __init__(self, nc, conf=0.25, iou_thres=0.45): 101 | self.matrix = np.zeros((nc + 1, nc + 1)) 102 | self.nc = nc 103 | self.conf = conf 104 | self.iou_thres = iou_thres 105 | 106 | def process_batch(self, detections, labels): 107 | """ 108 | Return intersection-over-union (Jaccard index) of boxes. 109 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format. 110 | Arguments: 111 | detections (Array[N, 6]), x1, y1, x2, y2, conf, class 112 | labels (Array[M, 5]), class, x1, y1, x2, y2 113 | Returns: 114 | None, updates confusion matrix accordingly 115 | """ 116 | detections = detections[detections[:, 4] > self.conf] 117 | gt_classes = labels[:, 0].int() 118 | detection_classes = detections[:, 5].int() 119 | iou = general.box_iou(labels[:, 1:], detections[:, :4]) 120 | 121 | x = torch.where(iou > self.iou_thres) 122 | if x[0].shape[0]: 123 | matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() 124 | if x[0].shape[0] > 1: 125 | matches = matches[matches[:, 2].argsort()[::-1]] 126 | matches = matches[np.unique(matches[:, 1], return_index=True)[1]] 127 | matches = matches[matches[:, 2].argsort()[::-1]] 128 | matches = matches[np.unique(matches[:, 0], return_index=True)[1]] 129 | else: 130 | matches = np.zeros((0, 3)) 131 | 132 | n = matches.shape[0] > 0 133 | m0, m1, _ = matches.transpose().astype(np.int16) 134 | for i, gc in enumerate(gt_classes): 135 | j = m0 == i 136 | if n and sum(j) == 1: 137 | self.matrix[gc, detection_classes[m1[j]]] += 1 138 | else: 139 | self.matrix[gc, self.nc] += 1 140 | 141 | if n: 142 | for i, dc in enumerate(detection_classes): 143 | if not any(m1 == i): 144 | self.matrix[self.nc, dc] += 1 145 | 146 | def matrix(self): 147 | return self.matrix 148 | 149 | def plot(self, save_dir='', names=()): 150 | try: 151 | import seaborn as sn 152 | 153 | array = self.matrix / (self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6) 154 | array[array < 0.005] = np.nan 155 | 156 | fig = plt.figure(figsize=(12, 9), tight_layout=True) 157 | sn.set(font_scale=1.0 if self.nc < 50 else 0.8) 158 | labels = (0 < len(names) < 99) and len(names) == self.nc 159 | sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True, 160 | xticklabels=names + ['background FN'] if labels else "auto", 161 | yticklabels=names + ['background FP'] if labels else "auto").set_facecolor((1, 1, 1)) 162 | fig.axes[0].set_xlabel('True') 163 | fig.axes[0].set_ylabel('Predicted') 164 | fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250) 165 | except Exception as e: 166 | pass 167 | 168 | def print(self): 169 | for i in range(self.nc + 1): 170 | print(' '.join(map(str, self.matrix[i]))) 171 | 172 | 173 | # Plots 174 | 175 | def plot_pr_curve(px, py, ap, save_dir='.', names=()): 176 | fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) 177 | py = np.stack(py, axis=1) 178 | 179 | if 0 < len(names) < 21: 180 | for i, y in enumerate(py.T): 181 | ax.plot(px, y, linewidth=1, label=f'{names[i]} %.3f' % ap[i, 0]) 182 | else: 183 | ax.plot(px, py, linewidth=1, color='grey') 184 | 185 | ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean()) 186 | ax.set_xlabel('Recall') 187 | ax.set_ylabel('Precision') 188 | ax.set_xlim(0, 1) 189 | ax.set_ylim(0, 1) 190 | plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") 191 | fig.savefig(Path(save_dir) / 'precision_recall_curve.png', dpi=250) 192 | -------------------------------------------------------------------------------- /utils/plate_rec.py: -------------------------------------------------------------------------------- 1 | from Net.colorNet import myNet_ocr_color 2 | import torch 3 | import cv2 4 | import numpy as np 5 | import os 6 | import time 7 | 8 | 9 | def cv_imread(path): 10 | img = cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1) 11 | return img 12 | 13 | 14 | def allFilePath(rootPath, allFIleList): 15 | fileList = os.listdir(rootPath) 16 | for temp in fileList: 17 | if os.path.isfile(os.path.join(rootPath, temp)): 18 | if temp.endswith('.jpg') or temp.endswith('.png') or temp.endswith('.JPG'): 19 | allFIleList.append(os.path.join(rootPath, temp)) 20 | else: 21 | allFilePath(os.path.join(rootPath, temp), allFIleList) 22 | 23 | 24 | device = torch.device('cuda') if torch.cuda.is_available() else torch.device("cpu") 25 | color = ['黑色', '蓝色', '绿色', '白色', '黄色'] 26 | plateName = r"#京沪津渝冀晋蒙辽吉黑苏浙皖闽赣鲁豫鄂湘粤桂琼川贵云藏陕甘青宁新学警港澳挂使领民航危0123456789ABCDEFGHJKLMNPQRSTUVWXYZ险品" 27 | mean_value, std_value = (0.588, 0.193) 28 | 29 | 30 | def decodePlate(preds): 31 | pre = 0 32 | newPreds = [] 33 | index = [] 34 | for i in range(len(preds)): 35 | if preds[i] != 0 and preds[i] != pre: 36 | newPreds.append(preds[i]) 37 | index.append(i) 38 | pre = preds[i] 39 | return newPreds, index 40 | 41 | 42 | def image_processing(img, device): 43 | img = cv2.resize(img, (168, 48)) 44 | img = np.reshape(img, (48, 168, 3)) 45 | 46 | # normalize 47 | img = img.astype(np.float32) 48 | img = (img / 255. - mean_value) / std_value 49 | img = img.transpose([2, 0, 1]) 50 | img = torch.from_numpy(img) 51 | 52 | img = img.to(device) 53 | img = img.view(1, *img.size()) 54 | return img 55 | 56 | 57 | def get_plate_result(img, device, model, is_color=False): 58 | input = image_processing(img, device) 59 | if is_color: 60 | preds, color_preds = model(input) 61 | color_preds = torch.softmax(color_preds, dim=-1) 62 | color_conf, color_index = torch.max(color_preds, dim=-1) 63 | color_conf = color_conf.item() 64 | else: 65 | preds = model(input) 66 | preds = torch.softmax(preds, dim=-1) 67 | prob, index = preds.max(dim=-1) 68 | index = index.view(-1).detach().cpu().numpy() 69 | prob = prob.view(-1).detach().cpu().numpy() 70 | newPreds, new_index = decodePlate(index) 71 | prob = prob[new_index] 72 | plate = "" 73 | for i in newPreds: 74 | plate += plateName[i] 75 | if is_color: 76 | return plate, prob, color[color_index], color_conf # 返回车牌号以及每个字符的概率,以及颜色,和颜色的概率 77 | else: 78 | return plate, prob 79 | 80 | 81 | def init_model(device, model_path, is_color=False): 82 | check_point = torch.load(model_path, map_location=device) 83 | model_state = check_point['state_dict'] 84 | cfg = check_point['cfg'] 85 | color_classes = 0 86 | if is_color: 87 | color_classes = 5 # 颜色类别数 88 | model = myNet_ocr_color(num_classes=len(plateName), export=True, cfg=cfg, color_num=color_classes) 89 | 90 | model.load_state_dict(model_state, strict=False) 91 | model.to(device) 92 | model.eval() 93 | return model 94 | 95 | 96 | if __name__ == '__main__': 97 | model_path = r"model/plate_rec_color.pth" 98 | image_path = "imgs/1.jpg" 99 | testPath = r"Chinese_license_plate_detection_recognition-main/imgs" 100 | fileList = [] 101 | allFilePath(testPath, fileList) 102 | is_color = False 103 | model = init_model(device, model_path, is_color=is_color) 104 | right = 0 105 | begin = time.time() 106 | 107 | for imge_path in fileList: 108 | img = cv2.imread(imge_path) 109 | if is_color: 110 | plate, _, plate_color, _ = get_plate_result(img, device, model, is_color=is_color) 111 | print(plate) 112 | else: 113 | plate, _ = get_plate_result(img, device, model, is_color=is_color) 114 | print(plate, imge_path) 115 | -------------------------------------------------------------------------------- /utils/plots.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import math 3 | import os 4 | import random 5 | from copy import copy 6 | from pathlib import Path 7 | 8 | import cv2 9 | import matplotlib 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | import pandas as pd 13 | import seaborn as sns 14 | import torch 15 | import yaml 16 | from PIL import Image, ImageDraw 17 | from scipy.signal import butter, filtfilt 18 | 19 | from general import xywh2xyxy, xyxy2xywh 20 | from metrics import fitness 21 | 22 | # Settings 23 | matplotlib.rc('font', **{'size': 11}) 24 | matplotlib.use('Agg') 25 | 26 | 27 | def color_list(): 28 | def hex2rgb(h): 29 | return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)) 30 | 31 | return [hex2rgb(h) for h in plt.rcParams['axes.prop_cycle'].by_key()['color']] 32 | 33 | 34 | def hist2d(x, y, n=100): 35 | xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n) 36 | hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges)) 37 | xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1) 38 | yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1) 39 | return np.log(hist[xidx, yidx]) 40 | 41 | 42 | def plot_one_box(x, img, color=None, label=None, line_thickness=None): 43 | tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 44 | color = color or [random.randint(0, 255) for _ in range(3)] 45 | c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) 46 | cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) 47 | if label: 48 | tf = max(tl - 1, 1) 49 | t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] 50 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 51 | cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled 52 | cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) 53 | 54 | 55 | def output_to_target(output): 56 | targets = [] 57 | for i, o in enumerate(output): 58 | for *box, conf, cls in o.cpu().numpy(): 59 | targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf]) 60 | return np.array(targets) 61 | 62 | 63 | def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16): 64 | if isinstance(images, torch.Tensor): 65 | images = images.cpu().float().numpy() 66 | if isinstance(targets, torch.Tensor): 67 | targets = targets.cpu().numpy() 68 | 69 | if np.max(images[0]) <= 1: 70 | images *= 255 71 | 72 | tl = 3 73 | tf = max(tl - 1, 1) 74 | bs, _, h, w = images.shape 75 | bs = min(bs, max_subplots) 76 | ns = np.ceil(bs ** 0.5) 77 | 78 | scale_factor = max_size / max(h, w) 79 | if scale_factor < 1: 80 | h = math.ceil(scale_factor * h) 81 | w = math.ceil(scale_factor * w) 82 | 83 | mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) 84 | for i, img in enumerate(images): 85 | if i == max_subplots: 86 | break 87 | 88 | block_x = int(w * (i // ns)) 89 | block_y = int(h * (i % ns)) 90 | 91 | img = img.transpose(1, 2, 0) 92 | if scale_factor < 1: 93 | img = cv2.resize(img, (w, h)) 94 | 95 | mosaic[block_y:block_y + h, block_x:block_x + w, :] = img 96 | if len(targets) > 0: 97 | image_targets = targets[targets[:, 0] == i] 98 | boxes = xywh2xyxy(image_targets[:, 2:6]).T 99 | classes = image_targets[:, 1].astype('int') 100 | labels = image_targets.shape[1] == 6 101 | conf = None if labels else image_targets[:, 6] 102 | 103 | if boxes.shape[1]: 104 | if boxes.max() <= 1.01: 105 | boxes[[0, 2]] *= w 106 | boxes[[1, 3]] *= h 107 | elif scale_factor < 1: 108 | boxes *= scale_factor 109 | boxes[[0, 2]] += block_x 110 | boxes[[1, 3]] += block_y 111 | for j, box in enumerate(boxes.T): 112 | cls = int(classes[j]) 113 | cls = names[cls] if names else cls 114 | if labels or conf[j] > 0.25: 115 | label = '%s' % cls if labels else '%s %.1f' % (cls, conf[j]) 116 | plot_one_box(box, mosaic, label=label, color=None, line_thickness=tl) 117 | 118 | # Draw image filename labels 119 | if paths: 120 | label = Path(paths[i]).name[:40] # trim to 40 char 121 | t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] 122 | cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf, 123 | lineType=cv2.LINE_AA) 124 | 125 | # Image border 126 | cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3) 127 | 128 | if fname: 129 | r = min(1280. / max(h, w) / ns, 1.0) 130 | mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA) 131 | Image.fromarray(mosaic).save(fname) 132 | return mosaic 133 | 134 | 135 | def plot_study_txt(path='study/', x=None): 136 | fig, ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True) 137 | ax = ax.ravel() 138 | 139 | fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True) 140 | for f in [Path(path) / f'study_coco_{x}.txt' for x in ['yolov5s', 'yolov5m', 'yolov5l', 'yolov5x']]: 141 | y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T 142 | x = np.arange(y.shape[1]) if x is None else np.array(x) 143 | s = ['P', 'R', 'mAP@.5', 'mAP@.5:.95', 't_inference (ms/img)', 't_NMS (ms/img)', 't_total (ms/img)'] 144 | for i in range(7): 145 | ax[i].plot(x, y[i], '.-', linewidth=2, markersize=8) 146 | ax[i].set_title(s[i]) 147 | 148 | j = y[3].argmax() + 1 149 | ax2.plot(y[6, :j], y[3, :j] * 1E2, '.-', linewidth=2, markersize=8, 150 | label=f.stem.replace('study_coco_', '').replace('yolo', 'YOLO')) 151 | 152 | ax2.plot(1E3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5], 153 | 'k.-', linewidth=2, markersize=8, alpha=.25, label='EfficientDet') 154 | 155 | ax2.grid() 156 | ax2.set_yticks(np.arange(30, 60, 5)) 157 | ax2.set_xlim(0, 30) 158 | ax2.set_ylim(29, 51) 159 | ax2.set_xlabel('GPU Speed (ms/img)') 160 | ax2.set_ylabel('COCO AP val') 161 | ax2.legend(loc='lower right') 162 | plt.savefig('test_study.png', dpi=300) 163 | 164 | 165 | def plot_labels(labels, save_dir=Path(''), loggers=None): 166 | print('Plotting labels... ') 167 | c, b = labels[:, 0], labels[:, 1:5].transpose() # classes, boxes 168 | nc = int(c.max() + 1) # number of classes 169 | colors = color_list() 170 | x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height']) 171 | 172 | # seaborn correlogram 173 | sns.pairplot(x, corner=True, diag_kind='auto', kind='hist', diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9)) 174 | plt.savefig(save_dir / 'labels_correlogram.jpg', dpi=200) 175 | plt.close() 176 | 177 | # matplotlib labels 178 | matplotlib.use('svg') 179 | ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel() 180 | ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8) 181 | ax[0].set_xlabel('classes') 182 | sns.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9) 183 | sns.histplot(x, x='width', y='height', ax=ax[3], bins=50, pmax=0.9) 184 | 185 | # rectangles 186 | labels[:, 1:3] = 0.5 187 | labels[:, 1:] = xywh2xyxy(labels[:, 1:]) * 2000 188 | img = Image.fromarray(np.ones((2000, 2000, 3), dtype=np.uint8) * 255) 189 | ax[1].imshow(img) 190 | ax[1].axis('off') 191 | 192 | for a in [0, 1, 2, 3]: 193 | for s in ['top', 'right', 'left', 'bottom']: 194 | ax[a].spines[s].set_visible(False) 195 | 196 | plt.savefig(save_dir / 'labels.jpg', dpi=200) 197 | matplotlib.use('Agg') 198 | plt.close() 199 | 200 | # loggers 201 | for k, v in loggers.items() or {}: 202 | if k == 'wandb' and v: 203 | v.log({"Labels": [v.Image(str(x), caption=x.name) for x in save_dir.glob('*labels*.jpg')]}) 204 | 205 | 206 | def plot_evolution(yaml_file='data/hyp.finetune.yaml'): 207 | with open(yaml_file) as f: 208 | hyp = yaml.load(f, Loader=yaml.SafeLoader) 209 | x = np.loadtxt('evolve.txt', ndmin=2) 210 | f = fitness(x) 211 | plt.figure(figsize=(10, 12), tight_layout=True) 212 | matplotlib.rc('font', **{'size': 8}) 213 | for i, (k, v) in enumerate(hyp.items()): 214 | y = x[:, i + 7] 215 | mu = y[f.argmax()] 216 | plt.subplot(6, 5, i + 1) 217 | plt.scatter(y, f, c=hist2d(y, f, 20), cmap='viridis', alpha=.8, edgecolors='none') 218 | plt.plot(mu, f.max(), 'k+', markersize=15) 219 | plt.title('%s = %.3g' % (k, mu), fontdict={'size': 9}) 220 | if i % 5 != 0: 221 | plt.yticks([]) 222 | print('%15s: %.3g' % (k, mu)) 223 | plt.savefig('evolve.png', dpi=200) 224 | print('\nPlot saved as evolve.png') 225 | 226 | 227 | def profile_idetection(start=0, stop=0, labels=(), save_dir=''): 228 | ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel() 229 | s = ['Images', 'Free Storage (GB)', 'RAM Usage (GB)', 'Battery', 'dt_raw (ms)', 'dt_smooth (ms)', 'real-world FPS'] 230 | files = list(Path(save_dir).glob('frames*.txt')) 231 | for fi, f in enumerate(files): 232 | try: 233 | results = np.loadtxt(f, ndmin=2).T[:, 90:-30] 234 | n = results.shape[1] 235 | x = np.arange(start, min(stop, n) if stop else n) 236 | results = results[:, x] 237 | t = (results[0] - results[0].min()) 238 | results[0] = x 239 | for i, a in enumerate(ax): 240 | if i < len(results): 241 | label = labels[fi] if len(labels) else f.stem.replace('frames_', '') 242 | a.plot(t, results[i], marker='.', label=label, linewidth=1, markersize=5) 243 | a.set_title(s[i]) 244 | a.set_xlabel('time (s)') 245 | for side in ['top', 'right']: 246 | a.spines[side].set_visible(False) 247 | else: 248 | a.remove() 249 | except Exception as e: 250 | print('Warning: Plotting error for %s; %s' % (f, e)) 251 | 252 | ax[1].legend() 253 | plt.savefig(Path(save_dir) / 'idetection_profile.png', dpi=200) 254 | 255 | 256 | def plot_results(start=0, stop=0, bucket='', id=(), labels=(), save_dir=''): 257 | fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True) 258 | ax = ax.ravel() 259 | s = ['Box', 'Objectness', 'Classification', 'Precision', 'Recall', 260 | 'val Box', 'val Objectness', 'val Classification', 'mAP@0.5', 'mAP@0.5:0.95'] 261 | if bucket: 262 | files = ['results%g.txt' % x for x in id] 263 | c = ('gsutil cp ' + '%s ' * len(files) + '.') % tuple('gs://%s/results%g.txt' % (bucket, x) for x in id) 264 | os.system(c) 265 | else: 266 | files = list(Path(save_dir).glob('results*.txt')) 267 | assert len(files), 'No results.txt files found in %s, nothing to plot.' % os.path.abspath(save_dir) 268 | for fi, f in enumerate(files): 269 | try: 270 | results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T 271 | n = results.shape[1] 272 | x = range(start, min(stop, n) if stop else n) 273 | for i in range(10): 274 | y = results[i, x] 275 | if i in [0, 1, 2, 5, 6, 7]: 276 | y[y == 0] = np.nan 277 | label = labels[fi] if len(labels) else f.stem 278 | ax[i].plot(x, y, marker='.', label=label, linewidth=2, markersize=8) 279 | ax[i].set_title(s[i]) 280 | except Exception as e: 281 | print('Warning: Plotting error for %s; %s' % (f, e)) 282 | 283 | ax[1].legend() 284 | fig.savefig(Path(save_dir) / 'results.png', dpi=200) 285 | -------------------------------------------------------------------------------- /utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import math 3 | import os 4 | import subprocess 5 | import time 6 | from contextlib import contextmanager 7 | from copy import deepcopy 8 | from pathlib import Path 9 | import torch 10 | import torch.backends.cudnn as cudnn 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | import torchvision 14 | 15 | torch.cuda.is_available() 16 | 17 | try: 18 | import thop 19 | except ImportError: 20 | thop = None 21 | logger = logging.getLogger(__name__) 22 | 23 | 24 | @contextmanager 25 | def torch_distributed_zero_first(local_rank: int): 26 | """ 27 | Decorator to make all processes in distributed training wait for each local_master to do something. 28 | """ 29 | if local_rank not in [-1, 0]: 30 | torch.distributed.barrier() 31 | yield 32 | if local_rank == 0: 33 | torch.distributed.barrier() 34 | 35 | 36 | def init_torch_seeds(seed=0): 37 | torch.manual_seed(seed) 38 | if seed == 0: 39 | cudnn.benchmark, cudnn.deterministic = False, True 40 | else: 41 | cudnn.benchmark, cudnn.deterministic = True, False 42 | 43 | 44 | def git_describe(): 45 | if Path('.git').exists(): 46 | return subprocess.check_output('git describe --tags --long --always', shell=True).decode('utf-8')[:-1] 47 | else: 48 | return '' 49 | 50 | 51 | def select_device(device='', batch_size=None): 52 | s = f'YOLOv5 {git_describe()} torch {torch.__version__} ' 53 | cpu = device.lower() == 'cpu' 54 | if cpu: 55 | os.environ['CUDA_VISIBLE_DEVICES'] = '-1' 56 | elif device: 57 | os.environ['CUDA_VISIBLE_DEVICES'] = device 58 | print(torch.cuda.is_available()) 59 | print(torch.cuda.device_count()) 60 | assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' 61 | 62 | cuda = not cpu and torch.cuda.is_available() 63 | if cuda: 64 | n = torch.cuda.device_count() 65 | if n > 1 and batch_size: 66 | assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}' 67 | space = ' ' * len(s) 68 | for i, d in enumerate(device.split(',') if device else range(n)): 69 | p = torch.cuda.get_device_properties(i) 70 | s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2}MB)\n" 71 | else: 72 | s += 'CPU\n' 73 | 74 | logger.info(s) 75 | return torch.device('cuda:0' if cuda else 'cpu') 76 | 77 | 78 | def time_synchronized(): 79 | if torch.cuda.is_available(): 80 | torch.cuda.synchronize() 81 | return time.time() 82 | 83 | 84 | def profile(x, ops, n=100, device=None): 85 | device = device or torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 86 | x = x.to(device) 87 | x.requires_grad = True 88 | print(torch.__version__, device.type, torch.cuda.get_device_properties(0) if device.type == 'cuda' else '') 89 | print(f"\n{'Params':>12s}{'GFLOPS':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}") 90 | for m in ops if isinstance(ops, list) else [ops]: 91 | m = m.to(device) if hasattr(m, 'to') else m # device 92 | m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m # type 93 | dtf, dtb, t = 0., 0., [0., 0., 0.] # dt forward, backward 94 | try: 95 | flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPS 96 | except: 97 | flops = 0 98 | 99 | for _ in range(n): 100 | t[0] = time_synchronized() 101 | y = m(x) 102 | t[1] = time_synchronized() 103 | try: 104 | _ = y.sum().backward() 105 | t[2] = time_synchronized() 106 | except: 107 | t[2] = float('nan') 108 | dtf += (t[1] - t[0]) * 1000 / n 109 | dtb += (t[2] - t[1]) * 1000 / n 110 | 111 | s_in = tuple(x.shape) if isinstance(x, torch.Tensor) else 'list' 112 | s_out = tuple(y.shape) if isinstance(y, torch.Tensor) else 'list' 113 | p = sum(list(x.numel() for x in m.parameters())) if isinstance(m, nn.Module) else 0 114 | print(f'{p:12.4g}{flops:12.4g}{dtf:16.4g}{dtb:16.4g}{str(s_in):>24s}{str(s_out):>24s}') 115 | 116 | 117 | def is_parallel(model): 118 | return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) 119 | 120 | 121 | def intersect_dicts(da, db, exclude=()): 122 | return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape} 123 | 124 | 125 | def initialize_weights(model): 126 | for m in model.modules(): 127 | t = type(m) 128 | if t is nn.Conv2d: 129 | pass 130 | elif t is nn.BatchNorm2d: 131 | m.eps = 1e-3 132 | m.momentum = 0.03 133 | elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]: 134 | m.inplace = True 135 | 136 | 137 | def find_modules(model, mclass=nn.Conv2d): 138 | return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)] 139 | 140 | 141 | def sparsity(model): 142 | a, b = 0., 0. 143 | for p in model.parameters(): 144 | a += p.numel() 145 | b += (p == 0).sum() 146 | return b / a 147 | 148 | 149 | def prune(model, amount=0.3): 150 | import torch.nn.utils.prune as prune 151 | print('Pruning model... ', end='') 152 | for name, m in model.named_modules(): 153 | if isinstance(m, nn.Conv2d): 154 | prune.l1_unstructured(m, name='weight', amount=amount) 155 | prune.remove(m, 'weight') 156 | print(' %.3g global sparsity' % sparsity(model)) 157 | 158 | 159 | def fuse_conv_and_bn(conv, bn): 160 | fusedconv = nn.Conv2d(conv.in_channels, 161 | conv.out_channels, 162 | kernel_size=conv.kernel_size, 163 | stride=conv.stride, 164 | padding=conv.padding, 165 | groups=conv.groups, 166 | bias=True).requires_grad_(False).to(conv.weight.device) 167 | 168 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 169 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 170 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) 171 | 172 | b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias 173 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) 174 | fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) 175 | 176 | return fusedconv 177 | 178 | 179 | def model_info(model, verbose=False, img_size=640): 180 | n_p = sum(x.numel() for x in model.parameters()) 181 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) 182 | if verbose: 183 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 184 | for i, (name, p) in enumerate(model.named_parameters()): 185 | name = name.replace('module_list.', '') 186 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' % 187 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 188 | 189 | try: # FLOPS 190 | from thop import profile 191 | stride = int(model.stride.max()) if hasattr(model, 'stride') else 32 192 | img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device) # input 193 | flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2 194 | img_size = img_size if isinstance(img_size, list) else [img_size, img_size] 195 | fs = ', %.1f GFLOPS' % (flops * img_size[0] / stride * img_size[1] / stride) 196 | except (ImportError, Exception): 197 | fs = '' 198 | 199 | logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}") 200 | 201 | 202 | def load_classifier(name='resnet101', n=2): 203 | model = torchvision.models.__dict__[name](pretrained=True) 204 | 205 | # Reshape output to n classes 206 | filters = model.fc.weight.shape[1] 207 | model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True) 208 | model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True) 209 | model.fc.out_features = n 210 | return model 211 | 212 | 213 | def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416) 214 | if ratio == 1.0: 215 | return img 216 | else: 217 | h, w = img.shape[2:] 218 | s = (int(h * ratio), int(w * ratio)) 219 | img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) 220 | if not same_shape: # pad/crop img 221 | h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)] 222 | return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) 223 | 224 | 225 | def copy_attr(a, b, include=(), exclude=()): 226 | for k, v in b.__dict__.items(): 227 | if (len(include) and k not in include) or k.startswith('_') or k in exclude: 228 | continue 229 | else: 230 | setattr(a, k, v) 231 | 232 | 233 | class ModelEMA: 234 | """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models 235 | Keep a moving average of everything in the model state_dict (parameters and buffers). 236 | This is intended to allow functionality like 237 | https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage 238 | A smoothed version of the model is necessary for some training schemes to perform well. 239 | This class is sensitive where it is initialized in the sequence of model init, 240 | GPU assignment and distributed training wrappers. 241 | """ 242 | 243 | def __init__(self, model, decay=0.9999, updates=0): 244 | # Create EMA 245 | self.ema = deepcopy(model.module if is_parallel(model) else model).eval() 246 | self.updates = updates 247 | self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) 248 | for p in self.ema.parameters(): 249 | p.requires_grad_(False) 250 | 251 | def update(self, model): 252 | with torch.no_grad(): 253 | self.updates += 1 254 | d = self.decay(self.updates) 255 | 256 | msd = model.module.state_dict() if is_parallel(model) else model.state_dict() 257 | for k, v in self.ema.state_dict().items(): 258 | if v.dtype.is_floating_point: 259 | v *= d 260 | v += (1. - d) * msd[k].detach() 261 | 262 | def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): 263 | copy_attr(self.ema, model, include, exclude) 264 | -------------------------------------------------------------------------------- /weights/plate_rec.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S2mple1/License_Plate_Detection/60e0cef2f545f799497810c2bf0ae47a4c4a9b1e/weights/plate_rec.pt -------------------------------------------------------------------------------- /weights/plate_rec_color.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S2mple1/License_Plate_Detection/60e0cef2f545f799497810c2bf0ae47a4c4a9b1e/weights/plate_rec_color.pth -------------------------------------------------------------------------------- /weights/plate_rec_ocr.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S2mple1/License_Plate_Detection/60e0cef2f545f799497810c2bf0ae47a4c4a9b1e/weights/plate_rec_ocr.pth --------------------------------------------------------------------------------